[llvm] 94e69fb - [RISCV] Add DAG combine to fold (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 20 11:38:40 PST 2022


Author: Craig Topper
Date: 2022-01-20T11:35:37-08:00
New Revision: 94e69fbb4f3a9719d4d8cc7268dd5db5d0be7e8f

URL: https://github.com/llvm/llvm-project/commit/94e69fbb4f3a9719d4d8cc7268dd5db5d0be7e8f
DIFF: https://github.com/llvm/llvm-project/commit/94e69fbb4f3a9719d4d8cc7268dd5db5d0be7e8f.diff

LOG: [RISCV] Add DAG combine to fold (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))

Similar for ceil, trunc, round, and roundeven. This allows us to use
static rounding modes to avoid a libcall.

This is similar to D116771, but for the saturating conversions.

This optimization is done for AArch64 as isel patterns.
RISCV doesn't have instructions for ceil/floor/trunc/round/roundeven
so the operations don't stick around until isel to enable a pattern
match. Thus I've implemented a DAG combine.

I'm only handling saturating to i64 or i32. This could be extended
to other sizes in the future.

Reviewed By: asb

Differential Revision: https://reviews.llvm.org/D116864

Added: 
    llvm/test/CodeGen/RISCV/double-round-conv-sat.ll
    llvm/test/CodeGen/RISCV/float-round-conv-sat.ll
    llvm/test/CodeGen/RISCV/half-round-conv-sat.ll

Modified: 
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 304c05d9378f2..f942f395d5328 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1051,6 +1051,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
     setTargetDAGCombine(ISD::ZERO_EXTEND);
     setTargetDAGCombine(ISD::FP_TO_SINT);
     setTargetDAGCombine(ISD::FP_TO_UINT);
+    setTargetDAGCombine(ISD::FP_TO_SINT_SAT);
+    setTargetDAGCombine(ISD::FP_TO_UINT_SAT);
   }
   if (Subtarget.hasVInstructions()) {
     setTargetDAGCombine(ISD::FCOPYSIGN);
@@ -7180,13 +7182,24 @@ static SDValue combineMUL_VLToVWMUL(SDNode *N, SDValue Op0, SDValue Op1,
   return DAG.getNode(WMulOpc, DL, VT, Op0, Op1, Mask, VL);
 }
 
+static RISCVFPRndMode::RoundingMode matchRoundingOp(SDValue Op) {
+  switch (Op.getOpcode()) {
+  case ISD::FROUNDEVEN: return RISCVFPRndMode::RNE;
+  case ISD::FTRUNC:     return RISCVFPRndMode::RTZ;
+  case ISD::FFLOOR:     return RISCVFPRndMode::RDN;
+  case ISD::FCEIL:      return RISCVFPRndMode::RUP;
+  case ISD::FROUND:     return RISCVFPRndMode::RMM;
+  }
+
+  return RISCVFPRndMode::Invalid;
+}
+
 // Fold
 //   (fp_to_int (froundeven X)) -> fcvt X, rne
 //   (fp_to_int (ftrunc X))     -> fcvt X, rtz
 //   (fp_to_int (ffloor X))     -> fcvt X, rdn
 //   (fp_to_int (fceil X))      -> fcvt X, rup
 //   (fp_to_int (fround X))     -> fcvt X, rmm
-// FIXME: We should also do this for fp_to_int_sat.
 static SDValue performFP_TO_INTCombine(SDNode *N,
                                        TargetLowering::DAGCombinerInfo &DCI,
                                        const RISCVSubtarget &Subtarget) {
@@ -7210,16 +7223,9 @@ static SDValue performFP_TO_INTCombine(SDNode *N,
   if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
     return SDValue();
 
-  RISCVFPRndMode::RoundingMode FRM;
-  switch (Src->getOpcode()) {
-  default:
+  RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src);
+  if (FRM == RISCVFPRndMode::Invalid)
     return SDValue();
-  case ISD::FROUNDEVEN: FRM = RISCVFPRndMode::RNE; break;
-  case ISD::FTRUNC:     FRM = RISCVFPRndMode::RTZ; break;
-  case ISD::FFLOOR:     FRM = RISCVFPRndMode::RDN; break;
-  case ISD::FCEIL:      FRM = RISCVFPRndMode::RUP; break;
-  case ISD::FROUND:     FRM = RISCVFPRndMode::RMM; break;
-  }
 
   bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
 
@@ -7235,6 +7241,64 @@ static SDValue performFP_TO_INTCombine(SDNode *N,
   return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
 }
 
+// Fold
+//   (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
+//   (fp_to_int_sat (ftrunc X))     -> (select X == nan, 0, (fcvt X, rtz))
+//   (fp_to_int_sat (ffloor X))     -> (select X == nan, 0, (fcvt X, rdn))
+//   (fp_to_int_sat (fceil X))      -> (select X == nan, 0, (fcvt X, rup))
+//   (fp_to_int_sat (fround X))     -> (select X == nan, 0, (fcvt X, rmm))
+static SDValue performFP_TO_INT_SATCombine(SDNode *N,
+                                       TargetLowering::DAGCombinerInfo &DCI,
+                                       const RISCVSubtarget &Subtarget) {
+  SelectionDAG &DAG = DCI.DAG;
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  MVT XLenVT = Subtarget.getXLenVT();
+
+  // Only handle XLen types. Other types narrower than XLen will eventually be
+  // legalized to XLenVT.
+  EVT DstVT = N->getValueType(0);
+  if (DstVT != XLenVT)
+    return SDValue();
+
+  SDValue Src = N->getOperand(0);
+
+  // Ensure the FP type is also legal.
+  if (!TLI.isTypeLegal(Src.getValueType()))
+    return SDValue();
+
+  // Don't do this for f16 with Zfhmin and not Zfh.
+  if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
+    return SDValue();
+
+  EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+
+  RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src);
+  if (FRM == RISCVFPRndMode::Invalid)
+    return SDValue();
+
+  bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
+
+  unsigned Opc;
+  if (SatVT == DstVT)
+    Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
+  else if (DstVT == MVT::i64 && SatVT == MVT::i32)
+    Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
+  else
+    return SDValue();
+  // FIXME: Support other SatVTs by clamping before or after the conversion.
+
+  Src = Src.getOperand(0);
+
+  SDLoc DL(N);
+  SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
+                                DAG.getTargetConstant(FRM, DL, XLenVT));
+
+  // RISCV FP-to-int conversions saturate to the destination register size, but
+  // don't produce 0 for nan.
+  SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
+  return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
+}
+
 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
                                                DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
@@ -7548,6 +7612,9 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::FP_TO_SINT:
   case ISD::FP_TO_UINT:
     return performFP_TO_INTCombine(N, DCI, Subtarget);
+  case ISD::FP_TO_SINT_SAT:
+  case ISD::FP_TO_UINT_SAT:
+    return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
   case ISD::FCOPYSIGN: {
     EVT VT = N->getValueType(0);
     if (!VT.isVector())

diff  --git a/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll
new file mode 100644
index 0000000000000..38d82f6e46ff2
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll
@@ -0,0 +1,940 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+d -verify-machineinstrs < %s \
+; RUN:   -target-abi=ilp32d | FileCheck -check-prefix=RV32IFD %s
+; RUN: llc -mtriple=riscv64 -mattr=+d -verify-machineinstrs < %s \
+; RUN:   -target-abi=lp64d | FileCheck -check-prefix=RV64IFD %s
+
+define signext i32 @test_floor_si32(double %x) {
+; RV32IFD-LABEL: test_floor_si32:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    feq.d a0, fa0, fa0
+; RV32IFD-NEXT:    bnez a0, .LBB0_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB0_2:
+; RV32IFD-NEXT:    fcvt.w.d a0, fa0, rdn
+; RV32IFD-NEXT:    ret
+;
+; RV64IFD-LABEL: test_floor_si32:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB0_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB0_2:
+; RV64IFD-NEXT:    fcvt.w.d a0, fa0, rdn
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.floor.f64(double %x)
+  %b = call i32 @llvm.fptosi.sat.i32.f64(double %a)
+  ret i32 %b
+}
+
+define i64 @test_floor_si64(double %x) nounwind {
+; RV32IFD-LABEL: test_floor_si64:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    addi sp, sp, -16
+; RV32IFD-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
+; RV32IFD-NEXT:    call floor at plt
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI1_0)
+; RV32IFD-NEXT:    fld ft0, %lo(.LCPI1_0)(a0)
+; RV32IFD-NEXT:    fmv.d fs0, fa0
+; RV32IFD-NEXT:    fle.d s0, ft0, fa0
+; RV32IFD-NEXT:    call __fixdfdi at plt
+; RV32IFD-NEXT:    mv a2, a0
+; RV32IFD-NEXT:    bnez s0, .LBB1_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a2, 0
+; RV32IFD-NEXT:  .LBB1_2:
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI1_1)
+; RV32IFD-NEXT:    fld ft0, %lo(.LCPI1_1)(a0)
+; RV32IFD-NEXT:    flt.d a3, ft0, fs0
+; RV32IFD-NEXT:    li a0, -1
+; RV32IFD-NEXT:    beqz a3, .LBB1_9
+; RV32IFD-NEXT:  # %bb.3:
+; RV32IFD-NEXT:    feq.d a2, fs0, fs0
+; RV32IFD-NEXT:    beqz a2, .LBB1_10
+; RV32IFD-NEXT:  .LBB1_4:
+; RV32IFD-NEXT:    lui a4, 524288
+; RV32IFD-NEXT:    beqz s0, .LBB1_11
+; RV32IFD-NEXT:  .LBB1_5:
+; RV32IFD-NEXT:    bnez a3, .LBB1_12
+; RV32IFD-NEXT:  .LBB1_6:
+; RV32IFD-NEXT:    bnez a2, .LBB1_8
+; RV32IFD-NEXT:  .LBB1_7:
+; RV32IFD-NEXT:    li a1, 0
+; RV32IFD-NEXT:  .LBB1_8:
+; RV32IFD-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
+; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB1_9:
+; RV32IFD-NEXT:    mv a0, a2
+; RV32IFD-NEXT:    feq.d a2, fs0, fs0
+; RV32IFD-NEXT:    bnez a2, .LBB1_4
+; RV32IFD-NEXT:  .LBB1_10:
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    lui a4, 524288
+; RV32IFD-NEXT:    bnez s0, .LBB1_5
+; RV32IFD-NEXT:  .LBB1_11:
+; RV32IFD-NEXT:    lui a1, 524288
+; RV32IFD-NEXT:    beqz a3, .LBB1_6
+; RV32IFD-NEXT:  .LBB1_12:
+; RV32IFD-NEXT:    addi a1, a4, -1
+; RV32IFD-NEXT:    beqz a2, .LBB1_7
+; RV32IFD-NEXT:    j .LBB1_8
+;
+; RV64IFD-LABEL: test_floor_si64:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB1_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB1_2:
+; RV64IFD-NEXT:    fcvt.l.d a0, fa0, rdn
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.floor.f64(double %x)
+  %b = call i64 @llvm.fptosi.sat.i64.f64(double %a)
+  ret i64 %b
+}
+
+define signext i32 @test_floor_ui32(double %x) {
+; RV32IFD-LABEL: test_floor_ui32:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    feq.d a0, fa0, fa0
+; RV32IFD-NEXT:    bnez a0, .LBB2_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB2_2:
+; RV32IFD-NEXT:    fcvt.wu.d a0, fa0, rdn
+; RV32IFD-NEXT:    ret
+;
+; RV64IFD-LABEL: test_floor_ui32:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB2_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB2_2:
+; RV64IFD-NEXT:    fcvt.wu.d a0, fa0, rdn
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.floor.f64(double %x)
+  %b = call i32 @llvm.fptoui.sat.i32.f64(double %a)
+  ret i32 %b
+}
+
+define i64 @test_floor_ui64(double %x) nounwind {
+; RV32IFD-LABEL: test_floor_ui64:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    addi sp, sp, -16
+; RV32IFD-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
+; RV32IFD-NEXT:    call floor at plt
+; RV32IFD-NEXT:    fmv.d fs0, fa0
+; RV32IFD-NEXT:    fcvt.d.w ft0, zero
+; RV32IFD-NEXT:    fle.d s0, ft0, fa0
+; RV32IFD-NEXT:    call __fixunsdfdi at plt
+; RV32IFD-NEXT:    mv a3, a0
+; RV32IFD-NEXT:    bnez s0, .LBB3_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a3, 0
+; RV32IFD-NEXT:  .LBB3_2:
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI3_0)
+; RV32IFD-NEXT:    fld ft0, %lo(.LCPI3_0)(a0)
+; RV32IFD-NEXT:    flt.d a4, ft0, fs0
+; RV32IFD-NEXT:    li a2, -1
+; RV32IFD-NEXT:    li a0, -1
+; RV32IFD-NEXT:    beqz a4, .LBB3_7
+; RV32IFD-NEXT:  # %bb.3:
+; RV32IFD-NEXT:    beqz s0, .LBB3_8
+; RV32IFD-NEXT:  .LBB3_4:
+; RV32IFD-NEXT:    bnez a4, .LBB3_6
+; RV32IFD-NEXT:  .LBB3_5:
+; RV32IFD-NEXT:    mv a2, a1
+; RV32IFD-NEXT:  .LBB3_6:
+; RV32IFD-NEXT:    mv a1, a2
+; RV32IFD-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
+; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB3_7:
+; RV32IFD-NEXT:    mv a0, a3
+; RV32IFD-NEXT:    bnez s0, .LBB3_4
+; RV32IFD-NEXT:  .LBB3_8:
+; RV32IFD-NEXT:    li a1, 0
+; RV32IFD-NEXT:    beqz a4, .LBB3_5
+; RV32IFD-NEXT:    j .LBB3_6
+;
+; RV64IFD-LABEL: test_floor_ui64:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB3_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB3_2:
+; RV64IFD-NEXT:    fcvt.lu.d a0, fa0, rdn
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.floor.f64(double %x)
+  %b = call i64 @llvm.fptoui.sat.i64.f64(double %a)
+  ret i64 %b
+}
+
+define signext i32 @test_ceil_si32(double %x) {
+; RV32IFD-LABEL: test_ceil_si32:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    feq.d a0, fa0, fa0
+; RV32IFD-NEXT:    bnez a0, .LBB4_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB4_2:
+; RV32IFD-NEXT:    fcvt.w.d a0, fa0, rup
+; RV32IFD-NEXT:    ret
+;
+; RV64IFD-LABEL: test_ceil_si32:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB4_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB4_2:
+; RV64IFD-NEXT:    fcvt.w.d a0, fa0, rup
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.ceil.f64(double %x)
+  %b = call i32 @llvm.fptosi.sat.i32.f64(double %a)
+  ret i32 %b
+}
+
+define i64 @test_ceil_si64(double %x) nounwind {
+; RV32IFD-LABEL: test_ceil_si64:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    addi sp, sp, -16
+; RV32IFD-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
+; RV32IFD-NEXT:    call ceil at plt
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI5_0)
+; RV32IFD-NEXT:    fld ft0, %lo(.LCPI5_0)(a0)
+; RV32IFD-NEXT:    fmv.d fs0, fa0
+; RV32IFD-NEXT:    fle.d s0, ft0, fa0
+; RV32IFD-NEXT:    call __fixdfdi at plt
+; RV32IFD-NEXT:    mv a2, a0
+; RV32IFD-NEXT:    bnez s0, .LBB5_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a2, 0
+; RV32IFD-NEXT:  .LBB5_2:
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI5_1)
+; RV32IFD-NEXT:    fld ft0, %lo(.LCPI5_1)(a0)
+; RV32IFD-NEXT:    flt.d a3, ft0, fs0
+; RV32IFD-NEXT:    li a0, -1
+; RV32IFD-NEXT:    beqz a3, .LBB5_9
+; RV32IFD-NEXT:  # %bb.3:
+; RV32IFD-NEXT:    feq.d a2, fs0, fs0
+; RV32IFD-NEXT:    beqz a2, .LBB5_10
+; RV32IFD-NEXT:  .LBB5_4:
+; RV32IFD-NEXT:    lui a4, 524288
+; RV32IFD-NEXT:    beqz s0, .LBB5_11
+; RV32IFD-NEXT:  .LBB5_5:
+; RV32IFD-NEXT:    bnez a3, .LBB5_12
+; RV32IFD-NEXT:  .LBB5_6:
+; RV32IFD-NEXT:    bnez a2, .LBB5_8
+; RV32IFD-NEXT:  .LBB5_7:
+; RV32IFD-NEXT:    li a1, 0
+; RV32IFD-NEXT:  .LBB5_8:
+; RV32IFD-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
+; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB5_9:
+; RV32IFD-NEXT:    mv a0, a2
+; RV32IFD-NEXT:    feq.d a2, fs0, fs0
+; RV32IFD-NEXT:    bnez a2, .LBB5_4
+; RV32IFD-NEXT:  .LBB5_10:
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    lui a4, 524288
+; RV32IFD-NEXT:    bnez s0, .LBB5_5
+; RV32IFD-NEXT:  .LBB5_11:
+; RV32IFD-NEXT:    lui a1, 524288
+; RV32IFD-NEXT:    beqz a3, .LBB5_6
+; RV32IFD-NEXT:  .LBB5_12:
+; RV32IFD-NEXT:    addi a1, a4, -1
+; RV32IFD-NEXT:    beqz a2, .LBB5_7
+; RV32IFD-NEXT:    j .LBB5_8
+;
+; RV64IFD-LABEL: test_ceil_si64:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB5_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB5_2:
+; RV64IFD-NEXT:    fcvt.l.d a0, fa0, rup
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.ceil.f64(double %x)
+  %b = call i64 @llvm.fptosi.sat.i64.f64(double %a)
+  ret i64 %b
+}
+
+define signext i32 @test_ceil_ui32(double %x) {
+; RV32IFD-LABEL: test_ceil_ui32:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    feq.d a0, fa0, fa0
+; RV32IFD-NEXT:    bnez a0, .LBB6_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB6_2:
+; RV32IFD-NEXT:    fcvt.wu.d a0, fa0, rup
+; RV32IFD-NEXT:    ret
+;
+; RV64IFD-LABEL: test_ceil_ui32:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB6_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB6_2:
+; RV64IFD-NEXT:    fcvt.wu.d a0, fa0, rup
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.ceil.f64(double %x)
+  %b = call i32 @llvm.fptoui.sat.i32.f64(double %a)
+  ret i32 %b
+}
+
+define i64 @test_ceil_ui64(double %x) nounwind {
+; RV32IFD-LABEL: test_ceil_ui64:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    addi sp, sp, -16
+; RV32IFD-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
+; RV32IFD-NEXT:    call ceil at plt
+; RV32IFD-NEXT:    fmv.d fs0, fa0
+; RV32IFD-NEXT:    fcvt.d.w ft0, zero
+; RV32IFD-NEXT:    fle.d s0, ft0, fa0
+; RV32IFD-NEXT:    call __fixunsdfdi at plt
+; RV32IFD-NEXT:    mv a3, a0
+; RV32IFD-NEXT:    bnez s0, .LBB7_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a3, 0
+; RV32IFD-NEXT:  .LBB7_2:
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI7_0)
+; RV32IFD-NEXT:    fld ft0, %lo(.LCPI7_0)(a0)
+; RV32IFD-NEXT:    flt.d a4, ft0, fs0
+; RV32IFD-NEXT:    li a2, -1
+; RV32IFD-NEXT:    li a0, -1
+; RV32IFD-NEXT:    beqz a4, .LBB7_7
+; RV32IFD-NEXT:  # %bb.3:
+; RV32IFD-NEXT:    beqz s0, .LBB7_8
+; RV32IFD-NEXT:  .LBB7_4:
+; RV32IFD-NEXT:    bnez a4, .LBB7_6
+; RV32IFD-NEXT:  .LBB7_5:
+; RV32IFD-NEXT:    mv a2, a1
+; RV32IFD-NEXT:  .LBB7_6:
+; RV32IFD-NEXT:    mv a1, a2
+; RV32IFD-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
+; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB7_7:
+; RV32IFD-NEXT:    mv a0, a3
+; RV32IFD-NEXT:    bnez s0, .LBB7_4
+; RV32IFD-NEXT:  .LBB7_8:
+; RV32IFD-NEXT:    li a1, 0
+; RV32IFD-NEXT:    beqz a4, .LBB7_5
+; RV32IFD-NEXT:    j .LBB7_6
+;
+; RV64IFD-LABEL: test_ceil_ui64:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB7_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB7_2:
+; RV64IFD-NEXT:    fcvt.lu.d a0, fa0, rup
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.ceil.f64(double %x)
+  %b = call i64 @llvm.fptoui.sat.i64.f64(double %a)
+  ret i64 %b
+}
+
+define signext i32 @test_trunc_si32(double %x) {
+; RV32IFD-LABEL: test_trunc_si32:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    feq.d a0, fa0, fa0
+; RV32IFD-NEXT:    bnez a0, .LBB8_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB8_2:
+; RV32IFD-NEXT:    fcvt.w.d a0, fa0, rtz
+; RV32IFD-NEXT:    ret
+;
+; RV64IFD-LABEL: test_trunc_si32:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB8_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB8_2:
+; RV64IFD-NEXT:    fcvt.w.d a0, fa0, rtz
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.trunc.f64(double %x)
+  %b = call i32 @llvm.fptosi.sat.i32.f64(double %a)
+  ret i32 %b
+}
+
+define i64 @test_trunc_si64(double %x) nounwind {
+; RV32IFD-LABEL: test_trunc_si64:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    addi sp, sp, -16
+; RV32IFD-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
+; RV32IFD-NEXT:    call trunc at plt
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI9_0)
+; RV32IFD-NEXT:    fld ft0, %lo(.LCPI9_0)(a0)
+; RV32IFD-NEXT:    fmv.d fs0, fa0
+; RV32IFD-NEXT:    fle.d s0, ft0, fa0
+; RV32IFD-NEXT:    call __fixdfdi at plt
+; RV32IFD-NEXT:    mv a2, a0
+; RV32IFD-NEXT:    bnez s0, .LBB9_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a2, 0
+; RV32IFD-NEXT:  .LBB9_2:
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI9_1)
+; RV32IFD-NEXT:    fld ft0, %lo(.LCPI9_1)(a0)
+; RV32IFD-NEXT:    flt.d a3, ft0, fs0
+; RV32IFD-NEXT:    li a0, -1
+; RV32IFD-NEXT:    beqz a3, .LBB9_9
+; RV32IFD-NEXT:  # %bb.3:
+; RV32IFD-NEXT:    feq.d a2, fs0, fs0
+; RV32IFD-NEXT:    beqz a2, .LBB9_10
+; RV32IFD-NEXT:  .LBB9_4:
+; RV32IFD-NEXT:    lui a4, 524288
+; RV32IFD-NEXT:    beqz s0, .LBB9_11
+; RV32IFD-NEXT:  .LBB9_5:
+; RV32IFD-NEXT:    bnez a3, .LBB9_12
+; RV32IFD-NEXT:  .LBB9_6:
+; RV32IFD-NEXT:    bnez a2, .LBB9_8
+; RV32IFD-NEXT:  .LBB9_7:
+; RV32IFD-NEXT:    li a1, 0
+; RV32IFD-NEXT:  .LBB9_8:
+; RV32IFD-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
+; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB9_9:
+; RV32IFD-NEXT:    mv a0, a2
+; RV32IFD-NEXT:    feq.d a2, fs0, fs0
+; RV32IFD-NEXT:    bnez a2, .LBB9_4
+; RV32IFD-NEXT:  .LBB9_10:
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    lui a4, 524288
+; RV32IFD-NEXT:    bnez s0, .LBB9_5
+; RV32IFD-NEXT:  .LBB9_11:
+; RV32IFD-NEXT:    lui a1, 524288
+; RV32IFD-NEXT:    beqz a3, .LBB9_6
+; RV32IFD-NEXT:  .LBB9_12:
+; RV32IFD-NEXT:    addi a1, a4, -1
+; RV32IFD-NEXT:    beqz a2, .LBB9_7
+; RV32IFD-NEXT:    j .LBB9_8
+;
+; RV64IFD-LABEL: test_trunc_si64:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB9_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB9_2:
+; RV64IFD-NEXT:    fcvt.l.d a0, fa0, rtz
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.trunc.f64(double %x)
+  %b = call i64 @llvm.fptosi.sat.i64.f64(double %a)
+  ret i64 %b
+}
+
+define signext i32 @test_trunc_ui32(double %x) {
+; RV32IFD-LABEL: test_trunc_ui32:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    feq.d a0, fa0, fa0
+; RV32IFD-NEXT:    bnez a0, .LBB10_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB10_2:
+; RV32IFD-NEXT:    fcvt.wu.d a0, fa0, rtz
+; RV32IFD-NEXT:    ret
+;
+; RV64IFD-LABEL: test_trunc_ui32:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB10_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB10_2:
+; RV64IFD-NEXT:    fcvt.wu.d a0, fa0, rtz
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.trunc.f64(double %x)
+  %b = call i32 @llvm.fptoui.sat.i32.f64(double %a)
+  ret i32 %b
+}
+
+define i64 @test_trunc_ui64(double %x) nounwind {
+; RV32IFD-LABEL: test_trunc_ui64:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    addi sp, sp, -16
+; RV32IFD-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
+; RV32IFD-NEXT:    call trunc at plt
+; RV32IFD-NEXT:    fmv.d fs0, fa0
+; RV32IFD-NEXT:    fcvt.d.w ft0, zero
+; RV32IFD-NEXT:    fle.d s0, ft0, fa0
+; RV32IFD-NEXT:    call __fixunsdfdi at plt
+; RV32IFD-NEXT:    mv a3, a0
+; RV32IFD-NEXT:    bnez s0, .LBB11_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a3, 0
+; RV32IFD-NEXT:  .LBB11_2:
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI11_0)
+; RV32IFD-NEXT:    fld ft0, %lo(.LCPI11_0)(a0)
+; RV32IFD-NEXT:    flt.d a4, ft0, fs0
+; RV32IFD-NEXT:    li a2, -1
+; RV32IFD-NEXT:    li a0, -1
+; RV32IFD-NEXT:    beqz a4, .LBB11_7
+; RV32IFD-NEXT:  # %bb.3:
+; RV32IFD-NEXT:    beqz s0, .LBB11_8
+; RV32IFD-NEXT:  .LBB11_4:
+; RV32IFD-NEXT:    bnez a4, .LBB11_6
+; RV32IFD-NEXT:  .LBB11_5:
+; RV32IFD-NEXT:    mv a2, a1
+; RV32IFD-NEXT:  .LBB11_6:
+; RV32IFD-NEXT:    mv a1, a2
+; RV32IFD-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
+; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB11_7:
+; RV32IFD-NEXT:    mv a0, a3
+; RV32IFD-NEXT:    bnez s0, .LBB11_4
+; RV32IFD-NEXT:  .LBB11_8:
+; RV32IFD-NEXT:    li a1, 0
+; RV32IFD-NEXT:    beqz a4, .LBB11_5
+; RV32IFD-NEXT:    j .LBB11_6
+;
+; RV64IFD-LABEL: test_trunc_ui64:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB11_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB11_2:
+; RV64IFD-NEXT:    fcvt.lu.d a0, fa0, rtz
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.trunc.f64(double %x)
+  %b = call i64 @llvm.fptoui.sat.i64.f64(double %a)
+  ret i64 %b
+}
+
+define signext i32 @test_round_si32(double %x) {
+; RV32IFD-LABEL: test_round_si32:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    feq.d a0, fa0, fa0
+; RV32IFD-NEXT:    bnez a0, .LBB12_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB12_2:
+; RV32IFD-NEXT:    fcvt.w.d a0, fa0, rmm
+; RV32IFD-NEXT:    ret
+;
+; RV64IFD-LABEL: test_round_si32:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB12_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB12_2:
+; RV64IFD-NEXT:    fcvt.w.d a0, fa0, rmm
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.round.f64(double %x)
+  %b = call i32 @llvm.fptosi.sat.i32.f64(double %a)
+  ret i32 %b
+}
+
+define i64 @test_round_si64(double %x) nounwind {
+; RV32IFD-LABEL: test_round_si64:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    addi sp, sp, -16
+; RV32IFD-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
+; RV32IFD-NEXT:    call round at plt
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI13_0)
+; RV32IFD-NEXT:    fld ft0, %lo(.LCPI13_0)(a0)
+; RV32IFD-NEXT:    fmv.d fs0, fa0
+; RV32IFD-NEXT:    fle.d s0, ft0, fa0
+; RV32IFD-NEXT:    call __fixdfdi at plt
+; RV32IFD-NEXT:    mv a2, a0
+; RV32IFD-NEXT:    bnez s0, .LBB13_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a2, 0
+; RV32IFD-NEXT:  .LBB13_2:
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI13_1)
+; RV32IFD-NEXT:    fld ft0, %lo(.LCPI13_1)(a0)
+; RV32IFD-NEXT:    flt.d a3, ft0, fs0
+; RV32IFD-NEXT:    li a0, -1
+; RV32IFD-NEXT:    beqz a3, .LBB13_9
+; RV32IFD-NEXT:  # %bb.3:
+; RV32IFD-NEXT:    feq.d a2, fs0, fs0
+; RV32IFD-NEXT:    beqz a2, .LBB13_10
+; RV32IFD-NEXT:  .LBB13_4:
+; RV32IFD-NEXT:    lui a4, 524288
+; RV32IFD-NEXT:    beqz s0, .LBB13_11
+; RV32IFD-NEXT:  .LBB13_5:
+; RV32IFD-NEXT:    bnez a3, .LBB13_12
+; RV32IFD-NEXT:  .LBB13_6:
+; RV32IFD-NEXT:    bnez a2, .LBB13_8
+; RV32IFD-NEXT:  .LBB13_7:
+; RV32IFD-NEXT:    li a1, 0
+; RV32IFD-NEXT:  .LBB13_8:
+; RV32IFD-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
+; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB13_9:
+; RV32IFD-NEXT:    mv a0, a2
+; RV32IFD-NEXT:    feq.d a2, fs0, fs0
+; RV32IFD-NEXT:    bnez a2, .LBB13_4
+; RV32IFD-NEXT:  .LBB13_10:
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    lui a4, 524288
+; RV32IFD-NEXT:    bnez s0, .LBB13_5
+; RV32IFD-NEXT:  .LBB13_11:
+; RV32IFD-NEXT:    lui a1, 524288
+; RV32IFD-NEXT:    beqz a3, .LBB13_6
+; RV32IFD-NEXT:  .LBB13_12:
+; RV32IFD-NEXT:    addi a1, a4, -1
+; RV32IFD-NEXT:    beqz a2, .LBB13_7
+; RV32IFD-NEXT:    j .LBB13_8
+;
+; RV64IFD-LABEL: test_round_si64:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB13_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB13_2:
+; RV64IFD-NEXT:    fcvt.l.d a0, fa0, rmm
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.round.f64(double %x)
+  %b = call i64 @llvm.fptosi.sat.i64.f64(double %a)
+  ret i64 %b
+}
+
+define signext i32 @test_round_ui32(double %x) {
+; RV32IFD-LABEL: test_round_ui32:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    feq.d a0, fa0, fa0
+; RV32IFD-NEXT:    bnez a0, .LBB14_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB14_2:
+; RV32IFD-NEXT:    fcvt.wu.d a0, fa0, rmm
+; RV32IFD-NEXT:    ret
+;
+; RV64IFD-LABEL: test_round_ui32:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB14_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB14_2:
+; RV64IFD-NEXT:    fcvt.wu.d a0, fa0, rmm
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.round.f64(double %x)
+  %b = call i32 @llvm.fptoui.sat.i32.f64(double %a)
+  ret i32 %b
+}
+
+define i64 @test_round_ui64(double %x) nounwind {
+; RV32IFD-LABEL: test_round_ui64:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    addi sp, sp, -16
+; RV32IFD-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
+; RV32IFD-NEXT:    call round at plt
+; RV32IFD-NEXT:    fmv.d fs0, fa0
+; RV32IFD-NEXT:    fcvt.d.w ft0, zero
+; RV32IFD-NEXT:    fle.d s0, ft0, fa0
+; RV32IFD-NEXT:    call __fixunsdfdi at plt
+; RV32IFD-NEXT:    mv a3, a0
+; RV32IFD-NEXT:    bnez s0, .LBB15_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a3, 0
+; RV32IFD-NEXT:  .LBB15_2:
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI15_0)
+; RV32IFD-NEXT:    fld ft0, %lo(.LCPI15_0)(a0)
+; RV32IFD-NEXT:    flt.d a4, ft0, fs0
+; RV32IFD-NEXT:    li a2, -1
+; RV32IFD-NEXT:    li a0, -1
+; RV32IFD-NEXT:    beqz a4, .LBB15_7
+; RV32IFD-NEXT:  # %bb.3:
+; RV32IFD-NEXT:    beqz s0, .LBB15_8
+; RV32IFD-NEXT:  .LBB15_4:
+; RV32IFD-NEXT:    bnez a4, .LBB15_6
+; RV32IFD-NEXT:  .LBB15_5:
+; RV32IFD-NEXT:    mv a2, a1
+; RV32IFD-NEXT:  .LBB15_6:
+; RV32IFD-NEXT:    mv a1, a2
+; RV32IFD-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
+; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB15_7:
+; RV32IFD-NEXT:    mv a0, a3
+; RV32IFD-NEXT:    bnez s0, .LBB15_4
+; RV32IFD-NEXT:  .LBB15_8:
+; RV32IFD-NEXT:    li a1, 0
+; RV32IFD-NEXT:    beqz a4, .LBB15_5
+; RV32IFD-NEXT:    j .LBB15_6
+;
+; RV64IFD-LABEL: test_round_ui64:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB15_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB15_2:
+; RV64IFD-NEXT:    fcvt.lu.d a0, fa0, rmm
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.round.f64(double %x)
+  %b = call i64 @llvm.fptoui.sat.i64.f64(double %a)
+  ret i64 %b
+}
+
+define signext i32 @test_roundeven_si32(double %x) {
+; RV32IFD-LABEL: test_roundeven_si32:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    feq.d a0, fa0, fa0
+; RV32IFD-NEXT:    bnez a0, .LBB16_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB16_2:
+; RV32IFD-NEXT:    fcvt.w.d a0, fa0, rne
+; RV32IFD-NEXT:    ret
+;
+; RV64IFD-LABEL: test_roundeven_si32:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB16_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB16_2:
+; RV64IFD-NEXT:    fcvt.w.d a0, fa0, rne
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.roundeven.f64(double %x)
+  %b = call i32 @llvm.fptosi.sat.i32.f64(double %a)
+  ret i32 %b
+}
+
+define i64 @test_roundeven_si64(double %x) nounwind {
+; RV32IFD-LABEL: test_roundeven_si64:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    addi sp, sp, -16
+; RV32IFD-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
+; RV32IFD-NEXT:    call roundeven at plt
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI17_0)
+; RV32IFD-NEXT:    fld ft0, %lo(.LCPI17_0)(a0)
+; RV32IFD-NEXT:    fmv.d fs0, fa0
+; RV32IFD-NEXT:    fle.d s0, ft0, fa0
+; RV32IFD-NEXT:    call __fixdfdi at plt
+; RV32IFD-NEXT:    mv a2, a0
+; RV32IFD-NEXT:    bnez s0, .LBB17_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a2, 0
+; RV32IFD-NEXT:  .LBB17_2:
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI17_1)
+; RV32IFD-NEXT:    fld ft0, %lo(.LCPI17_1)(a0)
+; RV32IFD-NEXT:    flt.d a3, ft0, fs0
+; RV32IFD-NEXT:    li a0, -1
+; RV32IFD-NEXT:    beqz a3, .LBB17_9
+; RV32IFD-NEXT:  # %bb.3:
+; RV32IFD-NEXT:    feq.d a2, fs0, fs0
+; RV32IFD-NEXT:    beqz a2, .LBB17_10
+; RV32IFD-NEXT:  .LBB17_4:
+; RV32IFD-NEXT:    lui a4, 524288
+; RV32IFD-NEXT:    beqz s0, .LBB17_11
+; RV32IFD-NEXT:  .LBB17_5:
+; RV32IFD-NEXT:    bnez a3, .LBB17_12
+; RV32IFD-NEXT:  .LBB17_6:
+; RV32IFD-NEXT:    bnez a2, .LBB17_8
+; RV32IFD-NEXT:  .LBB17_7:
+; RV32IFD-NEXT:    li a1, 0
+; RV32IFD-NEXT:  .LBB17_8:
+; RV32IFD-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
+; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB17_9:
+; RV32IFD-NEXT:    mv a0, a2
+; RV32IFD-NEXT:    feq.d a2, fs0, fs0
+; RV32IFD-NEXT:    bnez a2, .LBB17_4
+; RV32IFD-NEXT:  .LBB17_10:
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    lui a4, 524288
+; RV32IFD-NEXT:    bnez s0, .LBB17_5
+; RV32IFD-NEXT:  .LBB17_11:
+; RV32IFD-NEXT:    lui a1, 524288
+; RV32IFD-NEXT:    beqz a3, .LBB17_6
+; RV32IFD-NEXT:  .LBB17_12:
+; RV32IFD-NEXT:    addi a1, a4, -1
+; RV32IFD-NEXT:    beqz a2, .LBB17_7
+; RV32IFD-NEXT:    j .LBB17_8
+;
+; RV64IFD-LABEL: test_roundeven_si64:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB17_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB17_2:
+; RV64IFD-NEXT:    fcvt.l.d a0, fa0, rne
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.roundeven.f64(double %x)
+  %b = call i64 @llvm.fptosi.sat.i64.f64(double %a)
+  ret i64 %b
+}
+
+define signext i32 @test_roundeven_ui32(double %x) {
+; RV32IFD-LABEL: test_roundeven_ui32:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    feq.d a0, fa0, fa0
+; RV32IFD-NEXT:    bnez a0, .LBB18_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB18_2:
+; RV32IFD-NEXT:    fcvt.wu.d a0, fa0, rne
+; RV32IFD-NEXT:    ret
+;
+; RV64IFD-LABEL: test_roundeven_ui32:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB18_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB18_2:
+; RV64IFD-NEXT:    fcvt.wu.d a0, fa0, rne
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.roundeven.f64(double %x)
+  %b = call i32 @llvm.fptoui.sat.i32.f64(double %a)
+  ret i32 %b
+}
+
+define i64 @test_roundeven_ui64(double %x) nounwind {
+; RV32IFD-LABEL: test_roundeven_ui64:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    addi sp, sp, -16
+; RV32IFD-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
+; RV32IFD-NEXT:    call roundeven at plt
+; RV32IFD-NEXT:    fmv.d fs0, fa0
+; RV32IFD-NEXT:    fcvt.d.w ft0, zero
+; RV32IFD-NEXT:    fle.d s0, ft0, fa0
+; RV32IFD-NEXT:    call __fixunsdfdi at plt
+; RV32IFD-NEXT:    mv a3, a0
+; RV32IFD-NEXT:    bnez s0, .LBB19_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a3, 0
+; RV32IFD-NEXT:  .LBB19_2:
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI19_0)
+; RV32IFD-NEXT:    fld ft0, %lo(.LCPI19_0)(a0)
+; RV32IFD-NEXT:    flt.d a4, ft0, fs0
+; RV32IFD-NEXT:    li a2, -1
+; RV32IFD-NEXT:    li a0, -1
+; RV32IFD-NEXT:    beqz a4, .LBB19_7
+; RV32IFD-NEXT:  # %bb.3:
+; RV32IFD-NEXT:    beqz s0, .LBB19_8
+; RV32IFD-NEXT:  .LBB19_4:
+; RV32IFD-NEXT:    bnez a4, .LBB19_6
+; RV32IFD-NEXT:  .LBB19_5:
+; RV32IFD-NEXT:    mv a2, a1
+; RV32IFD-NEXT:  .LBB19_6:
+; RV32IFD-NEXT:    mv a1, a2
+; RV32IFD-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
+; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB19_7:
+; RV32IFD-NEXT:    mv a0, a3
+; RV32IFD-NEXT:    bnez s0, .LBB19_4
+; RV32IFD-NEXT:  .LBB19_8:
+; RV32IFD-NEXT:    li a1, 0
+; RV32IFD-NEXT:    beqz a4, .LBB19_5
+; RV32IFD-NEXT:    j .LBB19_6
+;
+; RV64IFD-LABEL: test_roundeven_ui64:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB19_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB19_2:
+; RV64IFD-NEXT:    fcvt.lu.d a0, fa0, rne
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.roundeven.f64(double %x)
+  %b = call i64 @llvm.fptoui.sat.i64.f64(double %a)
+  ret i64 %b
+}
+
+declare double @llvm.floor.f64(double)
+declare double @llvm.ceil.f64(double)
+declare double @llvm.trunc.f64(double)
+declare double @llvm.round.f64(double)
+declare double @llvm.roundeven.f64(double)
+declare i32 @llvm.fptosi.sat.i32.f64(double)
+declare i64 @llvm.fptosi.sat.i64.f64(double)
+declare i32 @llvm.fptoui.sat.i32.f64(double)
+declare i64 @llvm.fptoui.sat.i64.f64(double)

diff  --git a/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll
new file mode 100644
index 0000000000000..9893b697af294
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll
@@ -0,0 +1,940 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+f -verify-machineinstrs < %s \
+; RUN:   -target-abi=ilp32f | FileCheck -check-prefix=RV32IF %s
+; RUN: llc -mtriple=riscv64 -mattr=+f -verify-machineinstrs < %s \
+; RUN:   -target-abi=lp64f | FileCheck -check-prefix=RV64IF %s
+
+define signext i32 @test_floor_si32(float %x) {
+; RV32IF-LABEL: test_floor_si32:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    feq.s a0, fa0, fa0
+; RV32IF-NEXT:    bnez a0, .LBB0_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a0, 0
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB0_2:
+; RV32IF-NEXT:    fcvt.w.s a0, fa0, rdn
+; RV32IF-NEXT:    ret
+;
+; RV64IF-LABEL: test_floor_si32:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB0_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB0_2:
+; RV64IF-NEXT:    fcvt.w.s a0, fa0, rdn
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.floor.f32(float %x)
+  %b = call i32 @llvm.fptosi.sat.i32.f32(float %a)
+  ret i32 %b
+}
+
+define i64 @test_floor_si64(float %x) nounwind {
+; RV32IF-LABEL: test_floor_si64:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    addi sp, sp, -16
+; RV32IF-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    call floorf at plt
+; RV32IF-NEXT:    lui a0, %hi(.LCPI1_0)
+; RV32IF-NEXT:    flw ft0, %lo(.LCPI1_0)(a0)
+; RV32IF-NEXT:    fmv.s fs0, fa0
+; RV32IF-NEXT:    fle.s s0, ft0, fa0
+; RV32IF-NEXT:    call __fixsfdi at plt
+; RV32IF-NEXT:    mv a2, a0
+; RV32IF-NEXT:    bnez s0, .LBB1_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a2, 0
+; RV32IF-NEXT:  .LBB1_2:
+; RV32IF-NEXT:    lui a0, %hi(.LCPI1_1)
+; RV32IF-NEXT:    flw ft0, %lo(.LCPI1_1)(a0)
+; RV32IF-NEXT:    flt.s a3, ft0, fs0
+; RV32IF-NEXT:    li a0, -1
+; RV32IF-NEXT:    beqz a3, .LBB1_9
+; RV32IF-NEXT:  # %bb.3:
+; RV32IF-NEXT:    feq.s a2, fs0, fs0
+; RV32IF-NEXT:    beqz a2, .LBB1_10
+; RV32IF-NEXT:  .LBB1_4:
+; RV32IF-NEXT:    lui a4, 524288
+; RV32IF-NEXT:    beqz s0, .LBB1_11
+; RV32IF-NEXT:  .LBB1_5:
+; RV32IF-NEXT:    bnez a3, .LBB1_12
+; RV32IF-NEXT:  .LBB1_6:
+; RV32IF-NEXT:    bnez a2, .LBB1_8
+; RV32IF-NEXT:  .LBB1_7:
+; RV32IF-NEXT:    li a1, 0
+; RV32IF-NEXT:  .LBB1_8:
+; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    addi sp, sp, 16
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB1_9:
+; RV32IF-NEXT:    mv a0, a2
+; RV32IF-NEXT:    feq.s a2, fs0, fs0
+; RV32IF-NEXT:    bnez a2, .LBB1_4
+; RV32IF-NEXT:  .LBB1_10:
+; RV32IF-NEXT:    li a0, 0
+; RV32IF-NEXT:    lui a4, 524288
+; RV32IF-NEXT:    bnez s0, .LBB1_5
+; RV32IF-NEXT:  .LBB1_11:
+; RV32IF-NEXT:    lui a1, 524288
+; RV32IF-NEXT:    beqz a3, .LBB1_6
+; RV32IF-NEXT:  .LBB1_12:
+; RV32IF-NEXT:    addi a1, a4, -1
+; RV32IF-NEXT:    beqz a2, .LBB1_7
+; RV32IF-NEXT:    j .LBB1_8
+;
+; RV64IF-LABEL: test_floor_si64:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB1_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB1_2:
+; RV64IF-NEXT:    fcvt.l.s a0, fa0, rdn
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.floor.f32(float %x)
+  %b = call i64 @llvm.fptosi.sat.i64.f32(float %a)
+  ret i64 %b
+}
+
+define signext i32 @test_floor_ui32(float %x) {
+; RV32IF-LABEL: test_floor_ui32:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    feq.s a0, fa0, fa0
+; RV32IF-NEXT:    bnez a0, .LBB2_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a0, 0
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB2_2:
+; RV32IF-NEXT:    fcvt.wu.s a0, fa0, rdn
+; RV32IF-NEXT:    ret
+;
+; RV64IF-LABEL: test_floor_ui32:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB2_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB2_2:
+; RV64IF-NEXT:    fcvt.wu.s a0, fa0, rdn
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.floor.f32(float %x)
+  %b = call i32 @llvm.fptoui.sat.i32.f32(float %a)
+  ret i32 %b
+}
+
+define i64 @test_floor_ui64(float %x) nounwind {
+; RV32IF-LABEL: test_floor_ui64:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    addi sp, sp, -16
+; RV32IF-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    call floorf at plt
+; RV32IF-NEXT:    fmv.s fs0, fa0
+; RV32IF-NEXT:    fmv.w.x ft0, zero
+; RV32IF-NEXT:    fle.s s0, ft0, fa0
+; RV32IF-NEXT:    call __fixunssfdi at plt
+; RV32IF-NEXT:    mv a3, a0
+; RV32IF-NEXT:    bnez s0, .LBB3_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a3, 0
+; RV32IF-NEXT:  .LBB3_2:
+; RV32IF-NEXT:    lui a0, %hi(.LCPI3_0)
+; RV32IF-NEXT:    flw ft0, %lo(.LCPI3_0)(a0)
+; RV32IF-NEXT:    flt.s a4, ft0, fs0
+; RV32IF-NEXT:    li a2, -1
+; RV32IF-NEXT:    li a0, -1
+; RV32IF-NEXT:    beqz a4, .LBB3_7
+; RV32IF-NEXT:  # %bb.3:
+; RV32IF-NEXT:    beqz s0, .LBB3_8
+; RV32IF-NEXT:  .LBB3_4:
+; RV32IF-NEXT:    bnez a4, .LBB3_6
+; RV32IF-NEXT:  .LBB3_5:
+; RV32IF-NEXT:    mv a2, a1
+; RV32IF-NEXT:  .LBB3_6:
+; RV32IF-NEXT:    mv a1, a2
+; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    addi sp, sp, 16
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB3_7:
+; RV32IF-NEXT:    mv a0, a3
+; RV32IF-NEXT:    bnez s0, .LBB3_4
+; RV32IF-NEXT:  .LBB3_8:
+; RV32IF-NEXT:    li a1, 0
+; RV32IF-NEXT:    beqz a4, .LBB3_5
+; RV32IF-NEXT:    j .LBB3_6
+;
+; RV64IF-LABEL: test_floor_ui64:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB3_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB3_2:
+; RV64IF-NEXT:    fcvt.lu.s a0, fa0, rdn
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.floor.f32(float %x)
+  %b = call i64 @llvm.fptoui.sat.i64.f32(float %a)
+  ret i64 %b
+}
+
+define signext i32 @test_ceil_si32(float %x) {
+; RV32IF-LABEL: test_ceil_si32:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    feq.s a0, fa0, fa0
+; RV32IF-NEXT:    bnez a0, .LBB4_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a0, 0
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB4_2:
+; RV32IF-NEXT:    fcvt.w.s a0, fa0, rup
+; RV32IF-NEXT:    ret
+;
+; RV64IF-LABEL: test_ceil_si32:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB4_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB4_2:
+; RV64IF-NEXT:    fcvt.w.s a0, fa0, rup
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.ceil.f32(float %x)
+  %b = call i32 @llvm.fptosi.sat.i32.f32(float %a)
+  ret i32 %b
+}
+
+define i64 @test_ceil_si64(float %x) nounwind {
+; RV32IF-LABEL: test_ceil_si64:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    addi sp, sp, -16
+; RV32IF-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    call ceilf at plt
+; RV32IF-NEXT:    lui a0, %hi(.LCPI5_0)
+; RV32IF-NEXT:    flw ft0, %lo(.LCPI5_0)(a0)
+; RV32IF-NEXT:    fmv.s fs0, fa0
+; RV32IF-NEXT:    fle.s s0, ft0, fa0
+; RV32IF-NEXT:    call __fixsfdi at plt
+; RV32IF-NEXT:    mv a2, a0
+; RV32IF-NEXT:    bnez s0, .LBB5_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a2, 0
+; RV32IF-NEXT:  .LBB5_2:
+; RV32IF-NEXT:    lui a0, %hi(.LCPI5_1)
+; RV32IF-NEXT:    flw ft0, %lo(.LCPI5_1)(a0)
+; RV32IF-NEXT:    flt.s a3, ft0, fs0
+; RV32IF-NEXT:    li a0, -1
+; RV32IF-NEXT:    beqz a3, .LBB5_9
+; RV32IF-NEXT:  # %bb.3:
+; RV32IF-NEXT:    feq.s a2, fs0, fs0
+; RV32IF-NEXT:    beqz a2, .LBB5_10
+; RV32IF-NEXT:  .LBB5_4:
+; RV32IF-NEXT:    lui a4, 524288
+; RV32IF-NEXT:    beqz s0, .LBB5_11
+; RV32IF-NEXT:  .LBB5_5:
+; RV32IF-NEXT:    bnez a3, .LBB5_12
+; RV32IF-NEXT:  .LBB5_6:
+; RV32IF-NEXT:    bnez a2, .LBB5_8
+; RV32IF-NEXT:  .LBB5_7:
+; RV32IF-NEXT:    li a1, 0
+; RV32IF-NEXT:  .LBB5_8:
+; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    addi sp, sp, 16
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB5_9:
+; RV32IF-NEXT:    mv a0, a2
+; RV32IF-NEXT:    feq.s a2, fs0, fs0
+; RV32IF-NEXT:    bnez a2, .LBB5_4
+; RV32IF-NEXT:  .LBB5_10:
+; RV32IF-NEXT:    li a0, 0
+; RV32IF-NEXT:    lui a4, 524288
+; RV32IF-NEXT:    bnez s0, .LBB5_5
+; RV32IF-NEXT:  .LBB5_11:
+; RV32IF-NEXT:    lui a1, 524288
+; RV32IF-NEXT:    beqz a3, .LBB5_6
+; RV32IF-NEXT:  .LBB5_12:
+; RV32IF-NEXT:    addi a1, a4, -1
+; RV32IF-NEXT:    beqz a2, .LBB5_7
+; RV32IF-NEXT:    j .LBB5_8
+;
+; RV64IF-LABEL: test_ceil_si64:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB5_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB5_2:
+; RV64IF-NEXT:    fcvt.l.s a0, fa0, rup
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.ceil.f32(float %x)
+  %b = call i64 @llvm.fptosi.sat.i64.f32(float %a)
+  ret i64 %b
+}
+
+define signext i32 @test_ceil_ui32(float %x) {
+; RV32IF-LABEL: test_ceil_ui32:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    feq.s a0, fa0, fa0
+; RV32IF-NEXT:    bnez a0, .LBB6_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a0, 0
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB6_2:
+; RV32IF-NEXT:    fcvt.wu.s a0, fa0, rup
+; RV32IF-NEXT:    ret
+;
+; RV64IF-LABEL: test_ceil_ui32:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB6_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB6_2:
+; RV64IF-NEXT:    fcvt.wu.s a0, fa0, rup
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.ceil.f32(float %x)
+  %b = call i32 @llvm.fptoui.sat.i32.f32(float %a)
+  ret i32 %b
+}
+
+define i64 @test_ceil_ui64(float %x) nounwind {
+; RV32IF-LABEL: test_ceil_ui64:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    addi sp, sp, -16
+; RV32IF-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    call ceilf at plt
+; RV32IF-NEXT:    fmv.s fs0, fa0
+; RV32IF-NEXT:    fmv.w.x ft0, zero
+; RV32IF-NEXT:    fle.s s0, ft0, fa0
+; RV32IF-NEXT:    call __fixunssfdi at plt
+; RV32IF-NEXT:    mv a3, a0
+; RV32IF-NEXT:    bnez s0, .LBB7_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a3, 0
+; RV32IF-NEXT:  .LBB7_2:
+; RV32IF-NEXT:    lui a0, %hi(.LCPI7_0)
+; RV32IF-NEXT:    flw ft0, %lo(.LCPI7_0)(a0)
+; RV32IF-NEXT:    flt.s a4, ft0, fs0
+; RV32IF-NEXT:    li a2, -1
+; RV32IF-NEXT:    li a0, -1
+; RV32IF-NEXT:    beqz a4, .LBB7_7
+; RV32IF-NEXT:  # %bb.3:
+; RV32IF-NEXT:    beqz s0, .LBB7_8
+; RV32IF-NEXT:  .LBB7_4:
+; RV32IF-NEXT:    bnez a4, .LBB7_6
+; RV32IF-NEXT:  .LBB7_5:
+; RV32IF-NEXT:    mv a2, a1
+; RV32IF-NEXT:  .LBB7_6:
+; RV32IF-NEXT:    mv a1, a2
+; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    addi sp, sp, 16
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB7_7:
+; RV32IF-NEXT:    mv a0, a3
+; RV32IF-NEXT:    bnez s0, .LBB7_4
+; RV32IF-NEXT:  .LBB7_8:
+; RV32IF-NEXT:    li a1, 0
+; RV32IF-NEXT:    beqz a4, .LBB7_5
+; RV32IF-NEXT:    j .LBB7_6
+;
+; RV64IF-LABEL: test_ceil_ui64:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB7_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB7_2:
+; RV64IF-NEXT:    fcvt.lu.s a0, fa0, rup
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.ceil.f32(float %x)
+  %b = call i64 @llvm.fptoui.sat.i64.f32(float %a)
+  ret i64 %b
+}
+
+define signext i32 @test_trunc_si32(float %x) {
+; RV32IF-LABEL: test_trunc_si32:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    feq.s a0, fa0, fa0
+; RV32IF-NEXT:    bnez a0, .LBB8_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a0, 0
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB8_2:
+; RV32IF-NEXT:    fcvt.w.s a0, fa0, rtz
+; RV32IF-NEXT:    ret
+;
+; RV64IF-LABEL: test_trunc_si32:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB8_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB8_2:
+; RV64IF-NEXT:    fcvt.w.s a0, fa0, rtz
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.trunc.f32(float %x)
+  %b = call i32 @llvm.fptosi.sat.i32.f32(float %a)
+  ret i32 %b
+}
+
+define i64 @test_trunc_si64(float %x) nounwind {
+; RV32IF-LABEL: test_trunc_si64:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    addi sp, sp, -16
+; RV32IF-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    call truncf at plt
+; RV32IF-NEXT:    lui a0, %hi(.LCPI9_0)
+; RV32IF-NEXT:    flw ft0, %lo(.LCPI9_0)(a0)
+; RV32IF-NEXT:    fmv.s fs0, fa0
+; RV32IF-NEXT:    fle.s s0, ft0, fa0
+; RV32IF-NEXT:    call __fixsfdi at plt
+; RV32IF-NEXT:    mv a2, a0
+; RV32IF-NEXT:    bnez s0, .LBB9_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a2, 0
+; RV32IF-NEXT:  .LBB9_2:
+; RV32IF-NEXT:    lui a0, %hi(.LCPI9_1)
+; RV32IF-NEXT:    flw ft0, %lo(.LCPI9_1)(a0)
+; RV32IF-NEXT:    flt.s a3, ft0, fs0
+; RV32IF-NEXT:    li a0, -1
+; RV32IF-NEXT:    beqz a3, .LBB9_9
+; RV32IF-NEXT:  # %bb.3:
+; RV32IF-NEXT:    feq.s a2, fs0, fs0
+; RV32IF-NEXT:    beqz a2, .LBB9_10
+; RV32IF-NEXT:  .LBB9_4:
+; RV32IF-NEXT:    lui a4, 524288
+; RV32IF-NEXT:    beqz s0, .LBB9_11
+; RV32IF-NEXT:  .LBB9_5:
+; RV32IF-NEXT:    bnez a3, .LBB9_12
+; RV32IF-NEXT:  .LBB9_6:
+; RV32IF-NEXT:    bnez a2, .LBB9_8
+; RV32IF-NEXT:  .LBB9_7:
+; RV32IF-NEXT:    li a1, 0
+; RV32IF-NEXT:  .LBB9_8:
+; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    addi sp, sp, 16
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB9_9:
+; RV32IF-NEXT:    mv a0, a2
+; RV32IF-NEXT:    feq.s a2, fs0, fs0
+; RV32IF-NEXT:    bnez a2, .LBB9_4
+; RV32IF-NEXT:  .LBB9_10:
+; RV32IF-NEXT:    li a0, 0
+; RV32IF-NEXT:    lui a4, 524288
+; RV32IF-NEXT:    bnez s0, .LBB9_5
+; RV32IF-NEXT:  .LBB9_11:
+; RV32IF-NEXT:    lui a1, 524288
+; RV32IF-NEXT:    beqz a3, .LBB9_6
+; RV32IF-NEXT:  .LBB9_12:
+; RV32IF-NEXT:    addi a1, a4, -1
+; RV32IF-NEXT:    beqz a2, .LBB9_7
+; RV32IF-NEXT:    j .LBB9_8
+;
+; RV64IF-LABEL: test_trunc_si64:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB9_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB9_2:
+; RV64IF-NEXT:    fcvt.l.s a0, fa0, rtz
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.trunc.f32(float %x)
+  %b = call i64 @llvm.fptosi.sat.i64.f32(float %a)
+  ret i64 %b
+}
+
+define signext i32 @test_trunc_ui32(float %x) {
+; RV32IF-LABEL: test_trunc_ui32:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    feq.s a0, fa0, fa0
+; RV32IF-NEXT:    bnez a0, .LBB10_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a0, 0
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB10_2:
+; RV32IF-NEXT:    fcvt.wu.s a0, fa0, rtz
+; RV32IF-NEXT:    ret
+;
+; RV64IF-LABEL: test_trunc_ui32:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB10_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB10_2:
+; RV64IF-NEXT:    fcvt.wu.s a0, fa0, rtz
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.trunc.f32(float %x)
+  %b = call i32 @llvm.fptoui.sat.i32.f32(float %a)
+  ret i32 %b
+}
+
+define i64 @test_trunc_ui64(float %x) nounwind {
+; RV32IF-LABEL: test_trunc_ui64:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    addi sp, sp, -16
+; RV32IF-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    call truncf at plt
+; RV32IF-NEXT:    fmv.s fs0, fa0
+; RV32IF-NEXT:    fmv.w.x ft0, zero
+; RV32IF-NEXT:    fle.s s0, ft0, fa0
+; RV32IF-NEXT:    call __fixunssfdi at plt
+; RV32IF-NEXT:    mv a3, a0
+; RV32IF-NEXT:    bnez s0, .LBB11_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a3, 0
+; RV32IF-NEXT:  .LBB11_2:
+; RV32IF-NEXT:    lui a0, %hi(.LCPI11_0)
+; RV32IF-NEXT:    flw ft0, %lo(.LCPI11_0)(a0)
+; RV32IF-NEXT:    flt.s a4, ft0, fs0
+; RV32IF-NEXT:    li a2, -1
+; RV32IF-NEXT:    li a0, -1
+; RV32IF-NEXT:    beqz a4, .LBB11_7
+; RV32IF-NEXT:  # %bb.3:
+; RV32IF-NEXT:    beqz s0, .LBB11_8
+; RV32IF-NEXT:  .LBB11_4:
+; RV32IF-NEXT:    bnez a4, .LBB11_6
+; RV32IF-NEXT:  .LBB11_5:
+; RV32IF-NEXT:    mv a2, a1
+; RV32IF-NEXT:  .LBB11_6:
+; RV32IF-NEXT:    mv a1, a2
+; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    addi sp, sp, 16
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB11_7:
+; RV32IF-NEXT:    mv a0, a3
+; RV32IF-NEXT:    bnez s0, .LBB11_4
+; RV32IF-NEXT:  .LBB11_8:
+; RV32IF-NEXT:    li a1, 0
+; RV32IF-NEXT:    beqz a4, .LBB11_5
+; RV32IF-NEXT:    j .LBB11_6
+;
+; RV64IF-LABEL: test_trunc_ui64:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB11_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB11_2:
+; RV64IF-NEXT:    fcvt.lu.s a0, fa0, rtz
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.trunc.f32(float %x)
+  %b = call i64 @llvm.fptoui.sat.i64.f32(float %a)
+  ret i64 %b
+}
+
+define signext i32 @test_round_si32(float %x) {
+; RV32IF-LABEL: test_round_si32:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    feq.s a0, fa0, fa0
+; RV32IF-NEXT:    bnez a0, .LBB12_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a0, 0
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB12_2:
+; RV32IF-NEXT:    fcvt.w.s a0, fa0, rmm
+; RV32IF-NEXT:    ret
+;
+; RV64IF-LABEL: test_round_si32:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB12_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB12_2:
+; RV64IF-NEXT:    fcvt.w.s a0, fa0, rmm
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.round.f32(float %x)
+  %b = call i32 @llvm.fptosi.sat.i32.f32(float %a)
+  ret i32 %b
+}
+
+define i64 @test_round_si64(float %x) nounwind {
+; RV32IF-LABEL: test_round_si64:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    addi sp, sp, -16
+; RV32IF-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    call roundf at plt
+; RV32IF-NEXT:    lui a0, %hi(.LCPI13_0)
+; RV32IF-NEXT:    flw ft0, %lo(.LCPI13_0)(a0)
+; RV32IF-NEXT:    fmv.s fs0, fa0
+; RV32IF-NEXT:    fle.s s0, ft0, fa0
+; RV32IF-NEXT:    call __fixsfdi at plt
+; RV32IF-NEXT:    mv a2, a0
+; RV32IF-NEXT:    bnez s0, .LBB13_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a2, 0
+; RV32IF-NEXT:  .LBB13_2:
+; RV32IF-NEXT:    lui a0, %hi(.LCPI13_1)
+; RV32IF-NEXT:    flw ft0, %lo(.LCPI13_1)(a0)
+; RV32IF-NEXT:    flt.s a3, ft0, fs0
+; RV32IF-NEXT:    li a0, -1
+; RV32IF-NEXT:    beqz a3, .LBB13_9
+; RV32IF-NEXT:  # %bb.3:
+; RV32IF-NEXT:    feq.s a2, fs0, fs0
+; RV32IF-NEXT:    beqz a2, .LBB13_10
+; RV32IF-NEXT:  .LBB13_4:
+; RV32IF-NEXT:    lui a4, 524288
+; RV32IF-NEXT:    beqz s0, .LBB13_11
+; RV32IF-NEXT:  .LBB13_5:
+; RV32IF-NEXT:    bnez a3, .LBB13_12
+; RV32IF-NEXT:  .LBB13_6:
+; RV32IF-NEXT:    bnez a2, .LBB13_8
+; RV32IF-NEXT:  .LBB13_7:
+; RV32IF-NEXT:    li a1, 0
+; RV32IF-NEXT:  .LBB13_8:
+; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    addi sp, sp, 16
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB13_9:
+; RV32IF-NEXT:    mv a0, a2
+; RV32IF-NEXT:    feq.s a2, fs0, fs0
+; RV32IF-NEXT:    bnez a2, .LBB13_4
+; RV32IF-NEXT:  .LBB13_10:
+; RV32IF-NEXT:    li a0, 0
+; RV32IF-NEXT:    lui a4, 524288
+; RV32IF-NEXT:    bnez s0, .LBB13_5
+; RV32IF-NEXT:  .LBB13_11:
+; RV32IF-NEXT:    lui a1, 524288
+; RV32IF-NEXT:    beqz a3, .LBB13_6
+; RV32IF-NEXT:  .LBB13_12:
+; RV32IF-NEXT:    addi a1, a4, -1
+; RV32IF-NEXT:    beqz a2, .LBB13_7
+; RV32IF-NEXT:    j .LBB13_8
+;
+; RV64IF-LABEL: test_round_si64:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB13_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB13_2:
+; RV64IF-NEXT:    fcvt.l.s a0, fa0, rmm
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.round.f32(float %x)
+  %b = call i64 @llvm.fptosi.sat.i64.f32(float %a)
+  ret i64 %b
+}
+
+define signext i32 @test_round_ui32(float %x) {
+; RV32IF-LABEL: test_round_ui32:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    feq.s a0, fa0, fa0
+; RV32IF-NEXT:    bnez a0, .LBB14_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a0, 0
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB14_2:
+; RV32IF-NEXT:    fcvt.wu.s a0, fa0, rmm
+; RV32IF-NEXT:    ret
+;
+; RV64IF-LABEL: test_round_ui32:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB14_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB14_2:
+; RV64IF-NEXT:    fcvt.wu.s a0, fa0, rmm
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.round.f32(float %x)
+  %b = call i32 @llvm.fptoui.sat.i32.f32(float %a)
+  ret i32 %b
+}
+
+define i64 @test_round_ui64(float %x) nounwind {
+; RV32IF-LABEL: test_round_ui64:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    addi sp, sp, -16
+; RV32IF-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    call roundf at plt
+; RV32IF-NEXT:    fmv.s fs0, fa0
+; RV32IF-NEXT:    fmv.w.x ft0, zero
+; RV32IF-NEXT:    fle.s s0, ft0, fa0
+; RV32IF-NEXT:    call __fixunssfdi at plt
+; RV32IF-NEXT:    mv a3, a0
+; RV32IF-NEXT:    bnez s0, .LBB15_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a3, 0
+; RV32IF-NEXT:  .LBB15_2:
+; RV32IF-NEXT:    lui a0, %hi(.LCPI15_0)
+; RV32IF-NEXT:    flw ft0, %lo(.LCPI15_0)(a0)
+; RV32IF-NEXT:    flt.s a4, ft0, fs0
+; RV32IF-NEXT:    li a2, -1
+; RV32IF-NEXT:    li a0, -1
+; RV32IF-NEXT:    beqz a4, .LBB15_7
+; RV32IF-NEXT:  # %bb.3:
+; RV32IF-NEXT:    beqz s0, .LBB15_8
+; RV32IF-NEXT:  .LBB15_4:
+; RV32IF-NEXT:    bnez a4, .LBB15_6
+; RV32IF-NEXT:  .LBB15_5:
+; RV32IF-NEXT:    mv a2, a1
+; RV32IF-NEXT:  .LBB15_6:
+; RV32IF-NEXT:    mv a1, a2
+; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    addi sp, sp, 16
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB15_7:
+; RV32IF-NEXT:    mv a0, a3
+; RV32IF-NEXT:    bnez s0, .LBB15_4
+; RV32IF-NEXT:  .LBB15_8:
+; RV32IF-NEXT:    li a1, 0
+; RV32IF-NEXT:    beqz a4, .LBB15_5
+; RV32IF-NEXT:    j .LBB15_6
+;
+; RV64IF-LABEL: test_round_ui64:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB15_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB15_2:
+; RV64IF-NEXT:    fcvt.lu.s a0, fa0, rmm
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.round.f32(float %x)
+  %b = call i64 @llvm.fptoui.sat.i64.f32(float %a)
+  ret i64 %b
+}
+
+define signext i32 @test_roundeven_si32(float %x) {
+; RV32IF-LABEL: test_roundeven_si32:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    feq.s a0, fa0, fa0
+; RV32IF-NEXT:    bnez a0, .LBB16_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a0, 0
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB16_2:
+; RV32IF-NEXT:    fcvt.w.s a0, fa0, rne
+; RV32IF-NEXT:    ret
+;
+; RV64IF-LABEL: test_roundeven_si32:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB16_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB16_2:
+; RV64IF-NEXT:    fcvt.w.s a0, fa0, rne
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.roundeven.f32(float %x)
+  %b = call i32 @llvm.fptosi.sat.i32.f32(float %a)
+  ret i32 %b
+}
+
+define i64 @test_roundeven_si64(float %x) nounwind {
+; RV32IF-LABEL: test_roundeven_si64:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    addi sp, sp, -16
+; RV32IF-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    call roundevenf at plt
+; RV32IF-NEXT:    lui a0, %hi(.LCPI17_0)
+; RV32IF-NEXT:    flw ft0, %lo(.LCPI17_0)(a0)
+; RV32IF-NEXT:    fmv.s fs0, fa0
+; RV32IF-NEXT:    fle.s s0, ft0, fa0
+; RV32IF-NEXT:    call __fixsfdi at plt
+; RV32IF-NEXT:    mv a2, a0
+; RV32IF-NEXT:    bnez s0, .LBB17_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a2, 0
+; RV32IF-NEXT:  .LBB17_2:
+; RV32IF-NEXT:    lui a0, %hi(.LCPI17_1)
+; RV32IF-NEXT:    flw ft0, %lo(.LCPI17_1)(a0)
+; RV32IF-NEXT:    flt.s a3, ft0, fs0
+; RV32IF-NEXT:    li a0, -1
+; RV32IF-NEXT:    beqz a3, .LBB17_9
+; RV32IF-NEXT:  # %bb.3:
+; RV32IF-NEXT:    feq.s a2, fs0, fs0
+; RV32IF-NEXT:    beqz a2, .LBB17_10
+; RV32IF-NEXT:  .LBB17_4:
+; RV32IF-NEXT:    lui a4, 524288
+; RV32IF-NEXT:    beqz s0, .LBB17_11
+; RV32IF-NEXT:  .LBB17_5:
+; RV32IF-NEXT:    bnez a3, .LBB17_12
+; RV32IF-NEXT:  .LBB17_6:
+; RV32IF-NEXT:    bnez a2, .LBB17_8
+; RV32IF-NEXT:  .LBB17_7:
+; RV32IF-NEXT:    li a1, 0
+; RV32IF-NEXT:  .LBB17_8:
+; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    addi sp, sp, 16
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB17_9:
+; RV32IF-NEXT:    mv a0, a2
+; RV32IF-NEXT:    feq.s a2, fs0, fs0
+; RV32IF-NEXT:    bnez a2, .LBB17_4
+; RV32IF-NEXT:  .LBB17_10:
+; RV32IF-NEXT:    li a0, 0
+; RV32IF-NEXT:    lui a4, 524288
+; RV32IF-NEXT:    bnez s0, .LBB17_5
+; RV32IF-NEXT:  .LBB17_11:
+; RV32IF-NEXT:    lui a1, 524288
+; RV32IF-NEXT:    beqz a3, .LBB17_6
+; RV32IF-NEXT:  .LBB17_12:
+; RV32IF-NEXT:    addi a1, a4, -1
+; RV32IF-NEXT:    beqz a2, .LBB17_7
+; RV32IF-NEXT:    j .LBB17_8
+;
+; RV64IF-LABEL: test_roundeven_si64:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB17_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB17_2:
+; RV64IF-NEXT:    fcvt.l.s a0, fa0, rne
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.roundeven.f32(float %x)
+  %b = call i64 @llvm.fptosi.sat.i64.f32(float %a)
+  ret i64 %b
+}
+
+define signext i32 @test_roundeven_ui32(float %x) {
+; RV32IF-LABEL: test_roundeven_ui32:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    feq.s a0, fa0, fa0
+; RV32IF-NEXT:    bnez a0, .LBB18_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a0, 0
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB18_2:
+; RV32IF-NEXT:    fcvt.wu.s a0, fa0, rne
+; RV32IF-NEXT:    ret
+;
+; RV64IF-LABEL: test_roundeven_ui32:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB18_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB18_2:
+; RV64IF-NEXT:    fcvt.wu.s a0, fa0, rne
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.roundeven.f32(float %x)
+  %b = call i32 @llvm.fptoui.sat.i32.f32(float %a)
+  ret i32 %b
+}
+
+define i64 @test_roundeven_ui64(float %x) nounwind {
+; RV32IF-LABEL: test_roundeven_ui64:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    addi sp, sp, -16
+; RV32IF-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    call roundevenf at plt
+; RV32IF-NEXT:    fmv.s fs0, fa0
+; RV32IF-NEXT:    fmv.w.x ft0, zero
+; RV32IF-NEXT:    fle.s s0, ft0, fa0
+; RV32IF-NEXT:    call __fixunssfdi at plt
+; RV32IF-NEXT:    mv a3, a0
+; RV32IF-NEXT:    bnez s0, .LBB19_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a3, 0
+; RV32IF-NEXT:  .LBB19_2:
+; RV32IF-NEXT:    lui a0, %hi(.LCPI19_0)
+; RV32IF-NEXT:    flw ft0, %lo(.LCPI19_0)(a0)
+; RV32IF-NEXT:    flt.s a4, ft0, fs0
+; RV32IF-NEXT:    li a2, -1
+; RV32IF-NEXT:    li a0, -1
+; RV32IF-NEXT:    beqz a4, .LBB19_7
+; RV32IF-NEXT:  # %bb.3:
+; RV32IF-NEXT:    beqz s0, .LBB19_8
+; RV32IF-NEXT:  .LBB19_4:
+; RV32IF-NEXT:    bnez a4, .LBB19_6
+; RV32IF-NEXT:  .LBB19_5:
+; RV32IF-NEXT:    mv a2, a1
+; RV32IF-NEXT:  .LBB19_6:
+; RV32IF-NEXT:    mv a1, a2
+; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    addi sp, sp, 16
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB19_7:
+; RV32IF-NEXT:    mv a0, a3
+; RV32IF-NEXT:    bnez s0, .LBB19_4
+; RV32IF-NEXT:  .LBB19_8:
+; RV32IF-NEXT:    li a1, 0
+; RV32IF-NEXT:    beqz a4, .LBB19_5
+; RV32IF-NEXT:    j .LBB19_6
+;
+; RV64IF-LABEL: test_roundeven_ui64:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB19_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB19_2:
+; RV64IF-NEXT:    fcvt.lu.s a0, fa0, rne
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.roundeven.f32(float %x)
+  %b = call i64 @llvm.fptoui.sat.i64.f32(float %a)
+  ret i64 %b
+}
+
+declare float @llvm.floor.f32(float)
+declare float @llvm.ceil.f32(float)
+declare float @llvm.trunc.f32(float)
+declare float @llvm.round.f32(float)
+declare float @llvm.roundeven.f32(float)
+declare i32 @llvm.fptosi.sat.i32.f32(float)
+declare i64 @llvm.fptosi.sat.i64.f32(float)
+declare i32 @llvm.fptoui.sat.i32.f32(float)
+declare i64 @llvm.fptoui.sat.i64.f32(float)

diff  --git a/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll
new file mode 100644
index 0000000000000..7b3104c69bef6
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll
@@ -0,0 +1,970 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+zfh -verify-machineinstrs < %s \
+; RUN:   -target-abi=ilp32f | FileCheck -check-prefix=RV32IZFH %s
+; RUN: llc -mtriple=riscv64 -mattr=+zfh -verify-machineinstrs < %s \
+; RUN:   -target-abi=lp64f | FileCheck -check-prefix=RV64IZFH %s
+
+define signext i32 @test_floor_si32(half %x) {
+; RV32IZFH-LABEL: test_floor_si32:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV32IZFH-NEXT:    bnez a0, .LBB0_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a0, 0
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB0_2:
+; RV32IZFH-NEXT:    fcvt.w.h a0, fa0, rdn
+; RV32IZFH-NEXT:    ret
+;
+; RV64IZFH-LABEL: test_floor_si32:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB0_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB0_2:
+; RV64IZFH-NEXT:    fcvt.w.h a0, fa0, rdn
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.floor.f16(half %x)
+  %b = call i32 @llvm.fptosi.sat.i32.f16(half %a)
+  ret i32 %b
+}
+
+define i64 @test_floor_si64(half %x) nounwind {
+; RV32IZFH-LABEL: test_floor_si64:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    addi sp, sp, -16
+; RV32IZFH-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fcvt.s.h fa0, fa0
+; RV32IZFH-NEXT:    call floorf at plt
+; RV32IZFH-NEXT:    lui a0, %hi(.LCPI1_0)
+; RV32IZFH-NEXT:    flw ft0, %lo(.LCPI1_0)(a0)
+; RV32IZFH-NEXT:    fcvt.h.s ft1, fa0
+; RV32IZFH-NEXT:    fcvt.s.h fs0, ft1
+; RV32IZFH-NEXT:    fle.s s0, ft0, fs0
+; RV32IZFH-NEXT:    fmv.s fa0, fs0
+; RV32IZFH-NEXT:    call __fixsfdi at plt
+; RV32IZFH-NEXT:    mv a2, a0
+; RV32IZFH-NEXT:    bnez s0, .LBB1_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a2, 0
+; RV32IZFH-NEXT:  .LBB1_2:
+; RV32IZFH-NEXT:    lui a0, %hi(.LCPI1_1)
+; RV32IZFH-NEXT:    flw ft0, %lo(.LCPI1_1)(a0)
+; RV32IZFH-NEXT:    flt.s a3, ft0, fs0
+; RV32IZFH-NEXT:    li a0, -1
+; RV32IZFH-NEXT:    beqz a3, .LBB1_9
+; RV32IZFH-NEXT:  # %bb.3:
+; RV32IZFH-NEXT:    feq.s a2, fs0, fs0
+; RV32IZFH-NEXT:    beqz a2, .LBB1_10
+; RV32IZFH-NEXT:  .LBB1_4:
+; RV32IZFH-NEXT:    lui a4, 524288
+; RV32IZFH-NEXT:    beqz s0, .LBB1_11
+; RV32IZFH-NEXT:  .LBB1_5:
+; RV32IZFH-NEXT:    bnez a3, .LBB1_12
+; RV32IZFH-NEXT:  .LBB1_6:
+; RV32IZFH-NEXT:    bnez a2, .LBB1_8
+; RV32IZFH-NEXT:  .LBB1_7:
+; RV32IZFH-NEXT:    li a1, 0
+; RV32IZFH-NEXT:  .LBB1_8:
+; RV32IZFH-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    addi sp, sp, 16
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB1_9:
+; RV32IZFH-NEXT:    mv a0, a2
+; RV32IZFH-NEXT:    feq.s a2, fs0, fs0
+; RV32IZFH-NEXT:    bnez a2, .LBB1_4
+; RV32IZFH-NEXT:  .LBB1_10:
+; RV32IZFH-NEXT:    li a0, 0
+; RV32IZFH-NEXT:    lui a4, 524288
+; RV32IZFH-NEXT:    bnez s0, .LBB1_5
+; RV32IZFH-NEXT:  .LBB1_11:
+; RV32IZFH-NEXT:    lui a1, 524288
+; RV32IZFH-NEXT:    beqz a3, .LBB1_6
+; RV32IZFH-NEXT:  .LBB1_12:
+; RV32IZFH-NEXT:    addi a1, a4, -1
+; RV32IZFH-NEXT:    beqz a2, .LBB1_7
+; RV32IZFH-NEXT:    j .LBB1_8
+;
+; RV64IZFH-LABEL: test_floor_si64:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB1_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB1_2:
+; RV64IZFH-NEXT:    fcvt.l.h a0, fa0, rdn
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.floor.f16(half %x)
+  %b = call i64 @llvm.fptosi.sat.i64.f16(half %a)
+  ret i64 %b
+}
+
+define signext i32 @test_floor_ui32(half %x) {
+; RV32IZFH-LABEL: test_floor_ui32:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV32IZFH-NEXT:    bnez a0, .LBB2_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a0, 0
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB2_2:
+; RV32IZFH-NEXT:    fcvt.wu.h a0, fa0, rdn
+; RV32IZFH-NEXT:    ret
+;
+; RV64IZFH-LABEL: test_floor_ui32:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB2_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB2_2:
+; RV64IZFH-NEXT:    fcvt.wu.h a0, fa0, rdn
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.floor.f16(half %x)
+  %b = call i32 @llvm.fptoui.sat.i32.f16(half %a)
+  ret i32 %b
+}
+
+define i64 @test_floor_ui64(half %x) nounwind {
+; RV32IZFH-LABEL: test_floor_ui64:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    addi sp, sp, -16
+; RV32IZFH-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fcvt.s.h fa0, fa0
+; RV32IZFH-NEXT:    call floorf at plt
+; RV32IZFH-NEXT:    fcvt.h.s ft0, fa0
+; RV32IZFH-NEXT:    fcvt.s.h fs0, ft0
+; RV32IZFH-NEXT:    fmv.w.x ft0, zero
+; RV32IZFH-NEXT:    fle.s s0, ft0, fs0
+; RV32IZFH-NEXT:    fmv.s fa0, fs0
+; RV32IZFH-NEXT:    call __fixunssfdi at plt
+; RV32IZFH-NEXT:    mv a3, a0
+; RV32IZFH-NEXT:    bnez s0, .LBB3_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a3, 0
+; RV32IZFH-NEXT:  .LBB3_2:
+; RV32IZFH-NEXT:    lui a0, %hi(.LCPI3_0)
+; RV32IZFH-NEXT:    flw ft0, %lo(.LCPI3_0)(a0)
+; RV32IZFH-NEXT:    flt.s a4, ft0, fs0
+; RV32IZFH-NEXT:    li a2, -1
+; RV32IZFH-NEXT:    li a0, -1
+; RV32IZFH-NEXT:    beqz a4, .LBB3_7
+; RV32IZFH-NEXT:  # %bb.3:
+; RV32IZFH-NEXT:    beqz s0, .LBB3_8
+; RV32IZFH-NEXT:  .LBB3_4:
+; RV32IZFH-NEXT:    bnez a4, .LBB3_6
+; RV32IZFH-NEXT:  .LBB3_5:
+; RV32IZFH-NEXT:    mv a2, a1
+; RV32IZFH-NEXT:  .LBB3_6:
+; RV32IZFH-NEXT:    mv a1, a2
+; RV32IZFH-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    addi sp, sp, 16
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB3_7:
+; RV32IZFH-NEXT:    mv a0, a3
+; RV32IZFH-NEXT:    bnez s0, .LBB3_4
+; RV32IZFH-NEXT:  .LBB3_8:
+; RV32IZFH-NEXT:    li a1, 0
+; RV32IZFH-NEXT:    beqz a4, .LBB3_5
+; RV32IZFH-NEXT:    j .LBB3_6
+;
+; RV64IZFH-LABEL: test_floor_ui64:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB3_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB3_2:
+; RV64IZFH-NEXT:    fcvt.lu.h a0, fa0, rdn
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.floor.f16(half %x)
+  %b = call i64 @llvm.fptoui.sat.i64.f16(half %a)
+  ret i64 %b
+}
+
+define signext i32 @test_ceil_si32(half %x) {
+; RV32IZFH-LABEL: test_ceil_si32:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV32IZFH-NEXT:    bnez a0, .LBB4_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a0, 0
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB4_2:
+; RV32IZFH-NEXT:    fcvt.w.h a0, fa0, rup
+; RV32IZFH-NEXT:    ret
+;
+; RV64IZFH-LABEL: test_ceil_si32:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB4_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB4_2:
+; RV64IZFH-NEXT:    fcvt.w.h a0, fa0, rup
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.ceil.f16(half %x)
+  %b = call i32 @llvm.fptosi.sat.i32.f16(half %a)
+  ret i32 %b
+}
+
+define i64 @test_ceil_si64(half %x) nounwind {
+; RV32IZFH-LABEL: test_ceil_si64:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    addi sp, sp, -16
+; RV32IZFH-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fcvt.s.h fa0, fa0
+; RV32IZFH-NEXT:    call ceilf at plt
+; RV32IZFH-NEXT:    lui a0, %hi(.LCPI5_0)
+; RV32IZFH-NEXT:    flw ft0, %lo(.LCPI5_0)(a0)
+; RV32IZFH-NEXT:    fcvt.h.s ft1, fa0
+; RV32IZFH-NEXT:    fcvt.s.h fs0, ft1
+; RV32IZFH-NEXT:    fle.s s0, ft0, fs0
+; RV32IZFH-NEXT:    fmv.s fa0, fs0
+; RV32IZFH-NEXT:    call __fixsfdi at plt
+; RV32IZFH-NEXT:    mv a2, a0
+; RV32IZFH-NEXT:    bnez s0, .LBB5_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a2, 0
+; RV32IZFH-NEXT:  .LBB5_2:
+; RV32IZFH-NEXT:    lui a0, %hi(.LCPI5_1)
+; RV32IZFH-NEXT:    flw ft0, %lo(.LCPI5_1)(a0)
+; RV32IZFH-NEXT:    flt.s a3, ft0, fs0
+; RV32IZFH-NEXT:    li a0, -1
+; RV32IZFH-NEXT:    beqz a3, .LBB5_9
+; RV32IZFH-NEXT:  # %bb.3:
+; RV32IZFH-NEXT:    feq.s a2, fs0, fs0
+; RV32IZFH-NEXT:    beqz a2, .LBB5_10
+; RV32IZFH-NEXT:  .LBB5_4:
+; RV32IZFH-NEXT:    lui a4, 524288
+; RV32IZFH-NEXT:    beqz s0, .LBB5_11
+; RV32IZFH-NEXT:  .LBB5_5:
+; RV32IZFH-NEXT:    bnez a3, .LBB5_12
+; RV32IZFH-NEXT:  .LBB5_6:
+; RV32IZFH-NEXT:    bnez a2, .LBB5_8
+; RV32IZFH-NEXT:  .LBB5_7:
+; RV32IZFH-NEXT:    li a1, 0
+; RV32IZFH-NEXT:  .LBB5_8:
+; RV32IZFH-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    addi sp, sp, 16
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB5_9:
+; RV32IZFH-NEXT:    mv a0, a2
+; RV32IZFH-NEXT:    feq.s a2, fs0, fs0
+; RV32IZFH-NEXT:    bnez a2, .LBB5_4
+; RV32IZFH-NEXT:  .LBB5_10:
+; RV32IZFH-NEXT:    li a0, 0
+; RV32IZFH-NEXT:    lui a4, 524288
+; RV32IZFH-NEXT:    bnez s0, .LBB5_5
+; RV32IZFH-NEXT:  .LBB5_11:
+; RV32IZFH-NEXT:    lui a1, 524288
+; RV32IZFH-NEXT:    beqz a3, .LBB5_6
+; RV32IZFH-NEXT:  .LBB5_12:
+; RV32IZFH-NEXT:    addi a1, a4, -1
+; RV32IZFH-NEXT:    beqz a2, .LBB5_7
+; RV32IZFH-NEXT:    j .LBB5_8
+;
+; RV64IZFH-LABEL: test_ceil_si64:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB5_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB5_2:
+; RV64IZFH-NEXT:    fcvt.l.h a0, fa0, rup
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.ceil.f16(half %x)
+  %b = call i64 @llvm.fptosi.sat.i64.f16(half %a)
+  ret i64 %b
+}
+
+define signext i32 @test_ceil_ui32(half %x) {
+; RV32IZFH-LABEL: test_ceil_ui32:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV32IZFH-NEXT:    bnez a0, .LBB6_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a0, 0
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB6_2:
+; RV32IZFH-NEXT:    fcvt.wu.h a0, fa0, rup
+; RV32IZFH-NEXT:    ret
+;
+; RV64IZFH-LABEL: test_ceil_ui32:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB6_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB6_2:
+; RV64IZFH-NEXT:    fcvt.wu.h a0, fa0, rup
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.ceil.f16(half %x)
+  %b = call i32 @llvm.fptoui.sat.i32.f16(half %a)
+  ret i32 %b
+}
+
+define i64 @test_ceil_ui64(half %x) nounwind {
+; RV32IZFH-LABEL: test_ceil_ui64:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    addi sp, sp, -16
+; RV32IZFH-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fcvt.s.h fa0, fa0
+; RV32IZFH-NEXT:    call ceilf at plt
+; RV32IZFH-NEXT:    fcvt.h.s ft0, fa0
+; RV32IZFH-NEXT:    fcvt.s.h fs0, ft0
+; RV32IZFH-NEXT:    fmv.w.x ft0, zero
+; RV32IZFH-NEXT:    fle.s s0, ft0, fs0
+; RV32IZFH-NEXT:    fmv.s fa0, fs0
+; RV32IZFH-NEXT:    call __fixunssfdi at plt
+; RV32IZFH-NEXT:    mv a3, a0
+; RV32IZFH-NEXT:    bnez s0, .LBB7_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a3, 0
+; RV32IZFH-NEXT:  .LBB7_2:
+; RV32IZFH-NEXT:    lui a0, %hi(.LCPI7_0)
+; RV32IZFH-NEXT:    flw ft0, %lo(.LCPI7_0)(a0)
+; RV32IZFH-NEXT:    flt.s a4, ft0, fs0
+; RV32IZFH-NEXT:    li a2, -1
+; RV32IZFH-NEXT:    li a0, -1
+; RV32IZFH-NEXT:    beqz a4, .LBB7_7
+; RV32IZFH-NEXT:  # %bb.3:
+; RV32IZFH-NEXT:    beqz s0, .LBB7_8
+; RV32IZFH-NEXT:  .LBB7_4:
+; RV32IZFH-NEXT:    bnez a4, .LBB7_6
+; RV32IZFH-NEXT:  .LBB7_5:
+; RV32IZFH-NEXT:    mv a2, a1
+; RV32IZFH-NEXT:  .LBB7_6:
+; RV32IZFH-NEXT:    mv a1, a2
+; RV32IZFH-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    addi sp, sp, 16
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB7_7:
+; RV32IZFH-NEXT:    mv a0, a3
+; RV32IZFH-NEXT:    bnez s0, .LBB7_4
+; RV32IZFH-NEXT:  .LBB7_8:
+; RV32IZFH-NEXT:    li a1, 0
+; RV32IZFH-NEXT:    beqz a4, .LBB7_5
+; RV32IZFH-NEXT:    j .LBB7_6
+;
+; RV64IZFH-LABEL: test_ceil_ui64:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB7_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB7_2:
+; RV64IZFH-NEXT:    fcvt.lu.h a0, fa0, rup
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.ceil.f16(half %x)
+  %b = call i64 @llvm.fptoui.sat.i64.f16(half %a)
+  ret i64 %b
+}
+
+define signext i32 @test_trunc_si32(half %x) {
+; RV32IZFH-LABEL: test_trunc_si32:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV32IZFH-NEXT:    bnez a0, .LBB8_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a0, 0
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB8_2:
+; RV32IZFH-NEXT:    fcvt.w.h a0, fa0, rtz
+; RV32IZFH-NEXT:    ret
+;
+; RV64IZFH-LABEL: test_trunc_si32:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB8_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB8_2:
+; RV64IZFH-NEXT:    fcvt.w.h a0, fa0, rtz
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.trunc.f16(half %x)
+  %b = call i32 @llvm.fptosi.sat.i32.f16(half %a)
+  ret i32 %b
+}
+
+define i64 @test_trunc_si64(half %x) nounwind {
+; RV32IZFH-LABEL: test_trunc_si64:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    addi sp, sp, -16
+; RV32IZFH-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fcvt.s.h fa0, fa0
+; RV32IZFH-NEXT:    call truncf at plt
+; RV32IZFH-NEXT:    lui a0, %hi(.LCPI9_0)
+; RV32IZFH-NEXT:    flw ft0, %lo(.LCPI9_0)(a0)
+; RV32IZFH-NEXT:    fcvt.h.s ft1, fa0
+; RV32IZFH-NEXT:    fcvt.s.h fs0, ft1
+; RV32IZFH-NEXT:    fle.s s0, ft0, fs0
+; RV32IZFH-NEXT:    fmv.s fa0, fs0
+; RV32IZFH-NEXT:    call __fixsfdi at plt
+; RV32IZFH-NEXT:    mv a2, a0
+; RV32IZFH-NEXT:    bnez s0, .LBB9_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a2, 0
+; RV32IZFH-NEXT:  .LBB9_2:
+; RV32IZFH-NEXT:    lui a0, %hi(.LCPI9_1)
+; RV32IZFH-NEXT:    flw ft0, %lo(.LCPI9_1)(a0)
+; RV32IZFH-NEXT:    flt.s a3, ft0, fs0
+; RV32IZFH-NEXT:    li a0, -1
+; RV32IZFH-NEXT:    beqz a3, .LBB9_9
+; RV32IZFH-NEXT:  # %bb.3:
+; RV32IZFH-NEXT:    feq.s a2, fs0, fs0
+; RV32IZFH-NEXT:    beqz a2, .LBB9_10
+; RV32IZFH-NEXT:  .LBB9_4:
+; RV32IZFH-NEXT:    lui a4, 524288
+; RV32IZFH-NEXT:    beqz s0, .LBB9_11
+; RV32IZFH-NEXT:  .LBB9_5:
+; RV32IZFH-NEXT:    bnez a3, .LBB9_12
+; RV32IZFH-NEXT:  .LBB9_6:
+; RV32IZFH-NEXT:    bnez a2, .LBB9_8
+; RV32IZFH-NEXT:  .LBB9_7:
+; RV32IZFH-NEXT:    li a1, 0
+; RV32IZFH-NEXT:  .LBB9_8:
+; RV32IZFH-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    addi sp, sp, 16
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB9_9:
+; RV32IZFH-NEXT:    mv a0, a2
+; RV32IZFH-NEXT:    feq.s a2, fs0, fs0
+; RV32IZFH-NEXT:    bnez a2, .LBB9_4
+; RV32IZFH-NEXT:  .LBB9_10:
+; RV32IZFH-NEXT:    li a0, 0
+; RV32IZFH-NEXT:    lui a4, 524288
+; RV32IZFH-NEXT:    bnez s0, .LBB9_5
+; RV32IZFH-NEXT:  .LBB9_11:
+; RV32IZFH-NEXT:    lui a1, 524288
+; RV32IZFH-NEXT:    beqz a3, .LBB9_6
+; RV32IZFH-NEXT:  .LBB9_12:
+; RV32IZFH-NEXT:    addi a1, a4, -1
+; RV32IZFH-NEXT:    beqz a2, .LBB9_7
+; RV32IZFH-NEXT:    j .LBB9_8
+;
+; RV64IZFH-LABEL: test_trunc_si64:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB9_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB9_2:
+; RV64IZFH-NEXT:    fcvt.l.h a0, fa0, rtz
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.trunc.f16(half %x)
+  %b = call i64 @llvm.fptosi.sat.i64.f16(half %a)
+  ret i64 %b
+}
+
+define signext i32 @test_trunc_ui32(half %x) {
+; RV32IZFH-LABEL: test_trunc_ui32:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV32IZFH-NEXT:    bnez a0, .LBB10_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a0, 0
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB10_2:
+; RV32IZFH-NEXT:    fcvt.wu.h a0, fa0, rtz
+; RV32IZFH-NEXT:    ret
+;
+; RV64IZFH-LABEL: test_trunc_ui32:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB10_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB10_2:
+; RV64IZFH-NEXT:    fcvt.wu.h a0, fa0, rtz
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.trunc.f16(half %x)
+  %b = call i32 @llvm.fptoui.sat.i32.f16(half %a)
+  ret i32 %b
+}
+
+define i64 @test_trunc_ui64(half %x) nounwind {
+; RV32IZFH-LABEL: test_trunc_ui64:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    addi sp, sp, -16
+; RV32IZFH-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fcvt.s.h fa0, fa0
+; RV32IZFH-NEXT:    call truncf at plt
+; RV32IZFH-NEXT:    fcvt.h.s ft0, fa0
+; RV32IZFH-NEXT:    fcvt.s.h fs0, ft0
+; RV32IZFH-NEXT:    fmv.w.x ft0, zero
+; RV32IZFH-NEXT:    fle.s s0, ft0, fs0
+; RV32IZFH-NEXT:    fmv.s fa0, fs0
+; RV32IZFH-NEXT:    call __fixunssfdi at plt
+; RV32IZFH-NEXT:    mv a3, a0
+; RV32IZFH-NEXT:    bnez s0, .LBB11_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a3, 0
+; RV32IZFH-NEXT:  .LBB11_2:
+; RV32IZFH-NEXT:    lui a0, %hi(.LCPI11_0)
+; RV32IZFH-NEXT:    flw ft0, %lo(.LCPI11_0)(a0)
+; RV32IZFH-NEXT:    flt.s a4, ft0, fs0
+; RV32IZFH-NEXT:    li a2, -1
+; RV32IZFH-NEXT:    li a0, -1
+; RV32IZFH-NEXT:    beqz a4, .LBB11_7
+; RV32IZFH-NEXT:  # %bb.3:
+; RV32IZFH-NEXT:    beqz s0, .LBB11_8
+; RV32IZFH-NEXT:  .LBB11_4:
+; RV32IZFH-NEXT:    bnez a4, .LBB11_6
+; RV32IZFH-NEXT:  .LBB11_5:
+; RV32IZFH-NEXT:    mv a2, a1
+; RV32IZFH-NEXT:  .LBB11_6:
+; RV32IZFH-NEXT:    mv a1, a2
+; RV32IZFH-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    addi sp, sp, 16
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB11_7:
+; RV32IZFH-NEXT:    mv a0, a3
+; RV32IZFH-NEXT:    bnez s0, .LBB11_4
+; RV32IZFH-NEXT:  .LBB11_8:
+; RV32IZFH-NEXT:    li a1, 0
+; RV32IZFH-NEXT:    beqz a4, .LBB11_5
+; RV32IZFH-NEXT:    j .LBB11_6
+;
+; RV64IZFH-LABEL: test_trunc_ui64:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB11_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB11_2:
+; RV64IZFH-NEXT:    fcvt.lu.h a0, fa0, rtz
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.trunc.f16(half %x)
+  %b = call i64 @llvm.fptoui.sat.i64.f16(half %a)
+  ret i64 %b
+}
+
+define signext i32 @test_round_si32(half %x) {
+; RV32IZFH-LABEL: test_round_si32:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV32IZFH-NEXT:    bnez a0, .LBB12_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a0, 0
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB12_2:
+; RV32IZFH-NEXT:    fcvt.w.h a0, fa0, rmm
+; RV32IZFH-NEXT:    ret
+;
+; RV64IZFH-LABEL: test_round_si32:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB12_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB12_2:
+; RV64IZFH-NEXT:    fcvt.w.h a0, fa0, rmm
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.round.f16(half %x)
+  %b = call i32 @llvm.fptosi.sat.i32.f16(half %a)
+  ret i32 %b
+}
+
+define i64 @test_round_si64(half %x) nounwind {
+; RV32IZFH-LABEL: test_round_si64:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    addi sp, sp, -16
+; RV32IZFH-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fcvt.s.h fa0, fa0
+; RV32IZFH-NEXT:    call roundf at plt
+; RV32IZFH-NEXT:    lui a0, %hi(.LCPI13_0)
+; RV32IZFH-NEXT:    flw ft0, %lo(.LCPI13_0)(a0)
+; RV32IZFH-NEXT:    fcvt.h.s ft1, fa0
+; RV32IZFH-NEXT:    fcvt.s.h fs0, ft1
+; RV32IZFH-NEXT:    fle.s s0, ft0, fs0
+; RV32IZFH-NEXT:    fmv.s fa0, fs0
+; RV32IZFH-NEXT:    call __fixsfdi at plt
+; RV32IZFH-NEXT:    mv a2, a0
+; RV32IZFH-NEXT:    bnez s0, .LBB13_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a2, 0
+; RV32IZFH-NEXT:  .LBB13_2:
+; RV32IZFH-NEXT:    lui a0, %hi(.LCPI13_1)
+; RV32IZFH-NEXT:    flw ft0, %lo(.LCPI13_1)(a0)
+; RV32IZFH-NEXT:    flt.s a3, ft0, fs0
+; RV32IZFH-NEXT:    li a0, -1
+; RV32IZFH-NEXT:    beqz a3, .LBB13_9
+; RV32IZFH-NEXT:  # %bb.3:
+; RV32IZFH-NEXT:    feq.s a2, fs0, fs0
+; RV32IZFH-NEXT:    beqz a2, .LBB13_10
+; RV32IZFH-NEXT:  .LBB13_4:
+; RV32IZFH-NEXT:    lui a4, 524288
+; RV32IZFH-NEXT:    beqz s0, .LBB13_11
+; RV32IZFH-NEXT:  .LBB13_5:
+; RV32IZFH-NEXT:    bnez a3, .LBB13_12
+; RV32IZFH-NEXT:  .LBB13_6:
+; RV32IZFH-NEXT:    bnez a2, .LBB13_8
+; RV32IZFH-NEXT:  .LBB13_7:
+; RV32IZFH-NEXT:    li a1, 0
+; RV32IZFH-NEXT:  .LBB13_8:
+; RV32IZFH-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    addi sp, sp, 16
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB13_9:
+; RV32IZFH-NEXT:    mv a0, a2
+; RV32IZFH-NEXT:    feq.s a2, fs0, fs0
+; RV32IZFH-NEXT:    bnez a2, .LBB13_4
+; RV32IZFH-NEXT:  .LBB13_10:
+; RV32IZFH-NEXT:    li a0, 0
+; RV32IZFH-NEXT:    lui a4, 524288
+; RV32IZFH-NEXT:    bnez s0, .LBB13_5
+; RV32IZFH-NEXT:  .LBB13_11:
+; RV32IZFH-NEXT:    lui a1, 524288
+; RV32IZFH-NEXT:    beqz a3, .LBB13_6
+; RV32IZFH-NEXT:  .LBB13_12:
+; RV32IZFH-NEXT:    addi a1, a4, -1
+; RV32IZFH-NEXT:    beqz a2, .LBB13_7
+; RV32IZFH-NEXT:    j .LBB13_8
+;
+; RV64IZFH-LABEL: test_round_si64:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB13_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB13_2:
+; RV64IZFH-NEXT:    fcvt.l.h a0, fa0, rmm
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.round.f16(half %x)
+  %b = call i64 @llvm.fptosi.sat.i64.f16(half %a)
+  ret i64 %b
+}
+
+define signext i32 @test_round_ui32(half %x) {
+; RV32IZFH-LABEL: test_round_ui32:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV32IZFH-NEXT:    bnez a0, .LBB14_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a0, 0
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB14_2:
+; RV32IZFH-NEXT:    fcvt.wu.h a0, fa0, rmm
+; RV32IZFH-NEXT:    ret
+;
+; RV64IZFH-LABEL: test_round_ui32:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB14_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB14_2:
+; RV64IZFH-NEXT:    fcvt.wu.h a0, fa0, rmm
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.round.f16(half %x)
+  %b = call i32 @llvm.fptoui.sat.i32.f16(half %a)
+  ret i32 %b
+}
+
+define i64 @test_round_ui64(half %x) nounwind {
+; RV32IZFH-LABEL: test_round_ui64:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    addi sp, sp, -16
+; RV32IZFH-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fcvt.s.h fa0, fa0
+; RV32IZFH-NEXT:    call roundf at plt
+; RV32IZFH-NEXT:    fcvt.h.s ft0, fa0
+; RV32IZFH-NEXT:    fcvt.s.h fs0, ft0
+; RV32IZFH-NEXT:    fmv.w.x ft0, zero
+; RV32IZFH-NEXT:    fle.s s0, ft0, fs0
+; RV32IZFH-NEXT:    fmv.s fa0, fs0
+; RV32IZFH-NEXT:    call __fixunssfdi at plt
+; RV32IZFH-NEXT:    mv a3, a0
+; RV32IZFH-NEXT:    bnez s0, .LBB15_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a3, 0
+; RV32IZFH-NEXT:  .LBB15_2:
+; RV32IZFH-NEXT:    lui a0, %hi(.LCPI15_0)
+; RV32IZFH-NEXT:    flw ft0, %lo(.LCPI15_0)(a0)
+; RV32IZFH-NEXT:    flt.s a4, ft0, fs0
+; RV32IZFH-NEXT:    li a2, -1
+; RV32IZFH-NEXT:    li a0, -1
+; RV32IZFH-NEXT:    beqz a4, .LBB15_7
+; RV32IZFH-NEXT:  # %bb.3:
+; RV32IZFH-NEXT:    beqz s0, .LBB15_8
+; RV32IZFH-NEXT:  .LBB15_4:
+; RV32IZFH-NEXT:    bnez a4, .LBB15_6
+; RV32IZFH-NEXT:  .LBB15_5:
+; RV32IZFH-NEXT:    mv a2, a1
+; RV32IZFH-NEXT:  .LBB15_6:
+; RV32IZFH-NEXT:    mv a1, a2
+; RV32IZFH-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    addi sp, sp, 16
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB15_7:
+; RV32IZFH-NEXT:    mv a0, a3
+; RV32IZFH-NEXT:    bnez s0, .LBB15_4
+; RV32IZFH-NEXT:  .LBB15_8:
+; RV32IZFH-NEXT:    li a1, 0
+; RV32IZFH-NEXT:    beqz a4, .LBB15_5
+; RV32IZFH-NEXT:    j .LBB15_6
+;
+; RV64IZFH-LABEL: test_round_ui64:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB15_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB15_2:
+; RV64IZFH-NEXT:    fcvt.lu.h a0, fa0, rmm
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.round.f16(half %x)
+  %b = call i64 @llvm.fptoui.sat.i64.f16(half %a)
+  ret i64 %b
+}
+
+define signext i32 @test_roundeven_si32(half %x) {
+; RV32IZFH-LABEL: test_roundeven_si32:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV32IZFH-NEXT:    bnez a0, .LBB16_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a0, 0
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB16_2:
+; RV32IZFH-NEXT:    fcvt.w.h a0, fa0, rne
+; RV32IZFH-NEXT:    ret
+;
+; RV64IZFH-LABEL: test_roundeven_si32:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB16_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB16_2:
+; RV64IZFH-NEXT:    fcvt.w.h a0, fa0, rne
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.roundeven.f16(half %x)
+  %b = call i32 @llvm.fptosi.sat.i32.f16(half %a)
+  ret i32 %b
+}
+
+define i64 @test_roundeven_si64(half %x) nounwind {
+; RV32IZFH-LABEL: test_roundeven_si64:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    addi sp, sp, -16
+; RV32IZFH-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fcvt.s.h fa0, fa0
+; RV32IZFH-NEXT:    call roundevenf at plt
+; RV32IZFH-NEXT:    lui a0, %hi(.LCPI17_0)
+; RV32IZFH-NEXT:    flw ft0, %lo(.LCPI17_0)(a0)
+; RV32IZFH-NEXT:    fcvt.h.s ft1, fa0
+; RV32IZFH-NEXT:    fcvt.s.h fs0, ft1
+; RV32IZFH-NEXT:    fle.s s0, ft0, fs0
+; RV32IZFH-NEXT:    fmv.s fa0, fs0
+; RV32IZFH-NEXT:    call __fixsfdi at plt
+; RV32IZFH-NEXT:    mv a2, a0
+; RV32IZFH-NEXT:    bnez s0, .LBB17_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a2, 0
+; RV32IZFH-NEXT:  .LBB17_2:
+; RV32IZFH-NEXT:    lui a0, %hi(.LCPI17_1)
+; RV32IZFH-NEXT:    flw ft0, %lo(.LCPI17_1)(a0)
+; RV32IZFH-NEXT:    flt.s a3, ft0, fs0
+; RV32IZFH-NEXT:    li a0, -1
+; RV32IZFH-NEXT:    beqz a3, .LBB17_9
+; RV32IZFH-NEXT:  # %bb.3:
+; RV32IZFH-NEXT:    feq.s a2, fs0, fs0
+; RV32IZFH-NEXT:    beqz a2, .LBB17_10
+; RV32IZFH-NEXT:  .LBB17_4:
+; RV32IZFH-NEXT:    lui a4, 524288
+; RV32IZFH-NEXT:    beqz s0, .LBB17_11
+; RV32IZFH-NEXT:  .LBB17_5:
+; RV32IZFH-NEXT:    bnez a3, .LBB17_12
+; RV32IZFH-NEXT:  .LBB17_6:
+; RV32IZFH-NEXT:    bnez a2, .LBB17_8
+; RV32IZFH-NEXT:  .LBB17_7:
+; RV32IZFH-NEXT:    li a1, 0
+; RV32IZFH-NEXT:  .LBB17_8:
+; RV32IZFH-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    addi sp, sp, 16
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB17_9:
+; RV32IZFH-NEXT:    mv a0, a2
+; RV32IZFH-NEXT:    feq.s a2, fs0, fs0
+; RV32IZFH-NEXT:    bnez a2, .LBB17_4
+; RV32IZFH-NEXT:  .LBB17_10:
+; RV32IZFH-NEXT:    li a0, 0
+; RV32IZFH-NEXT:    lui a4, 524288
+; RV32IZFH-NEXT:    bnez s0, .LBB17_5
+; RV32IZFH-NEXT:  .LBB17_11:
+; RV32IZFH-NEXT:    lui a1, 524288
+; RV32IZFH-NEXT:    beqz a3, .LBB17_6
+; RV32IZFH-NEXT:  .LBB17_12:
+; RV32IZFH-NEXT:    addi a1, a4, -1
+; RV32IZFH-NEXT:    beqz a2, .LBB17_7
+; RV32IZFH-NEXT:    j .LBB17_8
+;
+; RV64IZFH-LABEL: test_roundeven_si64:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB17_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB17_2:
+; RV64IZFH-NEXT:    fcvt.l.h a0, fa0, rne
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.roundeven.f16(half %x)
+  %b = call i64 @llvm.fptosi.sat.i64.f16(half %a)
+  ret i64 %b
+}
+
+define signext i32 @test_roundeven_ui32(half %x) {
+; RV32IZFH-LABEL: test_roundeven_ui32:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV32IZFH-NEXT:    bnez a0, .LBB18_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a0, 0
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB18_2:
+; RV32IZFH-NEXT:    fcvt.wu.h a0, fa0, rne
+; RV32IZFH-NEXT:    ret
+;
+; RV64IZFH-LABEL: test_roundeven_ui32:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB18_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB18_2:
+; RV64IZFH-NEXT:    fcvt.wu.h a0, fa0, rne
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.roundeven.f16(half %x)
+  %b = call i32 @llvm.fptoui.sat.i32.f16(half %a)
+  ret i32 %b
+}
+
+define i64 @test_roundeven_ui64(half %x) nounwind {
+; RV32IZFH-LABEL: test_roundeven_ui64:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    addi sp, sp, -16
+; RV32IZFH-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fcvt.s.h fa0, fa0
+; RV32IZFH-NEXT:    call roundevenf at plt
+; RV32IZFH-NEXT:    fcvt.h.s ft0, fa0
+; RV32IZFH-NEXT:    fcvt.s.h fs0, ft0
+; RV32IZFH-NEXT:    fmv.w.x ft0, zero
+; RV32IZFH-NEXT:    fle.s s0, ft0, fs0
+; RV32IZFH-NEXT:    fmv.s fa0, fs0
+; RV32IZFH-NEXT:    call __fixunssfdi at plt
+; RV32IZFH-NEXT:    mv a3, a0
+; RV32IZFH-NEXT:    bnez s0, .LBB19_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a3, 0
+; RV32IZFH-NEXT:  .LBB19_2:
+; RV32IZFH-NEXT:    lui a0, %hi(.LCPI19_0)
+; RV32IZFH-NEXT:    flw ft0, %lo(.LCPI19_0)(a0)
+; RV32IZFH-NEXT:    flt.s a4, ft0, fs0
+; RV32IZFH-NEXT:    li a2, -1
+; RV32IZFH-NEXT:    li a0, -1
+; RV32IZFH-NEXT:    beqz a4, .LBB19_7
+; RV32IZFH-NEXT:  # %bb.3:
+; RV32IZFH-NEXT:    beqz s0, .LBB19_8
+; RV32IZFH-NEXT:  .LBB19_4:
+; RV32IZFH-NEXT:    bnez a4, .LBB19_6
+; RV32IZFH-NEXT:  .LBB19_5:
+; RV32IZFH-NEXT:    mv a2, a1
+; RV32IZFH-NEXT:  .LBB19_6:
+; RV32IZFH-NEXT:    mv a1, a2
+; RV32IZFH-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    addi sp, sp, 16
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB19_7:
+; RV32IZFH-NEXT:    mv a0, a3
+; RV32IZFH-NEXT:    bnez s0, .LBB19_4
+; RV32IZFH-NEXT:  .LBB19_8:
+; RV32IZFH-NEXT:    li a1, 0
+; RV32IZFH-NEXT:    beqz a4, .LBB19_5
+; RV32IZFH-NEXT:    j .LBB19_6
+;
+; RV64IZFH-LABEL: test_roundeven_ui64:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB19_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB19_2:
+; RV64IZFH-NEXT:    fcvt.lu.h a0, fa0, rne
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.roundeven.f16(half %x)
+  %b = call i64 @llvm.fptoui.sat.i64.f16(half %a)
+  ret i64 %b
+}
+
+declare half @llvm.floor.f16(half)
+declare half @llvm.ceil.f16(half)
+declare half @llvm.trunc.f16(half)
+declare half @llvm.round.f16(half)
+declare half @llvm.roundeven.f16(half)
+declare i32 @llvm.fptosi.sat.i32.f16(half)
+declare i64 @llvm.fptosi.sat.i64.f16(half)
+declare i32 @llvm.fptoui.sat.i32.f16(half)
+declare i64 @llvm.fptoui.sat.i64.f16(half)


        


More information about the llvm-commits mailing list