[llvm] 23d6186 - [SelectionDAG] Fix fptoi.sat scalable vector lowering

David Green via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 21 00:00:27 PDT 2022


Author: David Green
Date: 2022-07-21T08:00:22+01:00
New Revision: 23d6186be0c9c16b7e99a7601c93d1b774610750

URL: https://github.com/llvm/llvm-project/commit/23d6186be0c9c16b7e99a7601c93d1b774610750
DIFF: https://github.com/llvm/llvm-project/commit/23d6186be0c9c16b7e99a7601c93d1b774610750.diff

LOG: [SelectionDAG] Fix fptoi.sat scalable vector lowering

Vector fptosi_sat and fptoui_sat were being expanded by unrolling the
vector operation. This doesn't work for scalable vector, so this patch
adds a call to TLI.expandFP_TO_INT_SAT if the vector is scalable.

Scalable tests are added for AArch64 and RISCV. Some of the AArch64
fptoi_sat operations should be legal, but that will be handled in
another patch.

Differential Revision: https://reviews.llvm.org/D130028

Added: 
    llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll
    llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll
    llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll
    llvm/test/CodeGen/RISCV/rvv/fptoui-sat.ll

Modified: 
    llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
    llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 842ffa2aa23e4..f5a1eae1e7fe4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -737,6 +737,20 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
   case ISD::SELECT:
     Results.push_back(ExpandSELECT(Node));
     return;
+  case ISD::SELECT_CC: {
+    if (Node->getValueType(0).isScalableVector()) {
+      EVT CondVT = TLI.getSetCCResultType(
+          DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
+      SDValue SetCC =
+          DAG.getNode(ISD::SETCC, SDLoc(Node), CondVT, Node->getOperand(0),
+                      Node->getOperand(1), Node->getOperand(4));
+      Results.push_back(DAG.getSelect(SDLoc(Node), Node->getValueType(0), SetCC,
+                                      Node->getOperand(2),
+                                      Node->getOperand(3)));
+      return;
+    }
+    break;
+  }
   case ISD::FP_TO_UINT:
     ExpandFP_TO_UINT(Node, Results);
     return;
@@ -833,6 +847,16 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
       return;
     }
     break;
+  case ISD::FP_TO_SINT_SAT:
+  case ISD::FP_TO_UINT_SAT:
+    // Expand the fpsosisat if it is scalable to prevent it from unrolling below.
+    if (Node->getValueType(0).isScalableVector()) {
+      if (SDValue Expanded = TLI.expandFP_TO_INT_SAT(Node, DAG)) {
+        Results.push_back(Expanded);
+        return;
+      }
+    }
+    break;
   case ISD::SMULFIX:
   case ISD::UMULFIX:
     if (SDValue Expanded = TLI.expandFixedPointMul(Node, DAG)) {

diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 6ac7972d8e5ab..d3f99dcca63e4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6111,8 +6111,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     assert(N1.getValueType().isVector() == VT.isVector() &&
            "FP_TO_*INT_SAT type should be vector iff the operand type is "
            "vector!");
-    assert((!VT.isVector() || VT.getVectorNumElements() ==
-                                  N1.getValueType().getVectorNumElements()) &&
+    assert((!VT.isVector() || VT.getVectorElementCount() ==
+                                  N1.getValueType().getVectorElementCount()) &&
            "Vector element counts must match in FP_TO_*INT_SAT");
     assert(!cast<VTSDNode>(N2)->getVT().isVector() &&
            "Type to saturate to must be a scalar.");
@@ -8941,6 +8941,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
            "True and False arms of SelectCC must have same type!");
     assert(Ops[2].getValueType() == VT &&
            "select_cc node must be of same type as true and false value!");
+    assert((!Ops[0].getValueType().isVector() ||
+            Ops[0].getValueType().getVectorElementCount() ==
+                VT.getVectorElementCount()) &&
+           "Expected select_cc with vector result to have the same sized "
+           "comparison type!");
     break;
   case ISD::BR_CC:
     assert(NumOps == 5 && "BR_CC takes 5 operands!");

diff  --git a/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll b/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll
new file mode 100644
index 0000000000000..2bba40ddd9d5c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll
@@ -0,0 +1,720 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64 -mattr=+sve | FileCheck %s
+
+; Float
+
+declare <vscale x 2 x i32> @llvm.fptosi.sat.nxv2f32.nxv2i32(<vscale x 2 x float>)
+declare <vscale x 4 x i32> @llvm.fptosi.sat.nxv4f32.nxv4i32(<vscale x 4 x float>)
+declare <vscale x 8 x i32> @llvm.fptosi.sat.nxv8f32.nxv8i32(<vscale x 8 x float>)
+declare <vscale x 4 x i16> @llvm.fptosi.sat.nxv4f32.nxv4i16(<vscale x 4 x float>)
+declare <vscale x 8 x i16> @llvm.fptosi.sat.nxv8f32.nxv8i16(<vscale x 8 x float>)
+declare <vscale x 2 x i64> @llvm.fptosi.sat.nxv2f32.nxv2i64(<vscale x 2 x float>)
+declare <vscale x 4 x i64> @llvm.fptosi.sat.nxv4f32.nxv4i64(<vscale x 4 x float>)
+
+define <vscale x 2 x i32> @test_signed_v2f32_v2i32(<vscale x 2 x float> %f) {
+; CHECK-LABEL: test_signed_v2f32_v2i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #-822083584
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov z2.d, #0xffffffff80000000
+; CHECK-NEXT:    mov z1.s, w8
+; CHECK-NEXT:    mov w8, #1325400063
+; CHECK-NEXT:    fcmge p1.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    movprfx z1, z0
+; CHECK-NEXT:    fcvtzs z1.d, p0/m, z0.s
+; CHECK-NEXT:    not p1.b, p0/z, p1.b
+; CHECK-NEXT:    mov z1.d, p1/m, z2.d
+; CHECK-NEXT:    mov z2.s, w8
+; CHECK-NEXT:    fcmgt p1.s, p0/z, z0.s, z2.s
+; CHECK-NEXT:    mov z2.d, #0x7fffffff
+; CHECK-NEXT:    mov z1.d, p1/m, z2.d
+; CHECK-NEXT:    fcmuo p0.s, p0/z, z0.s, z0.s
+; CHECK-NEXT:    mov z1.d, p0/m, #0 // =0x0
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ret
+    %x = call <vscale x 2 x i32> @llvm.fptosi.sat.nxv2f32.nxv2i32(<vscale x 2 x float> %f)
+    ret <vscale x 2 x i32> %x
+}
+
+define <vscale x 4 x i32> @test_signed_v4f32_v4i32(<vscale x 4 x float> %f) {
+; CHECK-LABEL: test_signed_v4f32_v4i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #-822083584
+; CHECK-NEXT:    mov w9, #-2147483648
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    mov z1.s, w8
+; CHECK-NEXT:    mov w8, #1325400063
+; CHECK-NEXT:    mov z2.s, w9
+; CHECK-NEXT:    mov w9, #2147483647
+; CHECK-NEXT:    fcmge p1.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    movprfx z1, z0
+; CHECK-NEXT:    fcvtzs z1.s, p0/m, z0.s
+; CHECK-NEXT:    not p1.b, p0/z, p1.b
+; CHECK-NEXT:    mov z3.s, w8
+; CHECK-NEXT:    mov z1.s, p1/m, z2.s
+; CHECK-NEXT:    fcmgt p1.s, p0/z, z0.s, z3.s
+; CHECK-NEXT:    mov z2.s, w9
+; CHECK-NEXT:    fcmuo p0.s, p0/z, z0.s, z0.s
+; CHECK-NEXT:    mov z1.s, p1/m, z2.s
+; CHECK-NEXT:    mov z1.s, p0/m, #0 // =0x0
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ret
+    %x = call <vscale x 4 x i32> @llvm.fptosi.sat.nxv4f32.nxv4i32(<vscale x 4 x float> %f)
+    ret <vscale x 4 x i32> %x
+}
+
+define <vscale x 8 x i32> @test_signed_v8f32_v8i32(<vscale x 8 x float> %f) {
+; CHECK-LABEL: test_signed_v8f32_v8i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #-822083584
+; CHECK-NEXT:    mov w9, #-2147483648
+; CHECK-NEXT:    mov w10, #1325400063
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    movprfx z5, z0
+; CHECK-NEXT:    fcvtzs z5.s, p0/m, z0.s
+; CHECK-NEXT:    mov z2.s, w8
+; CHECK-NEXT:    mov w8, #2147483647
+; CHECK-NEXT:    fcmge p1.s, p0/z, z0.s, z2.s
+; CHECK-NEXT:    fcmge p2.s, p0/z, z1.s, z2.s
+; CHECK-NEXT:    mov z3.s, w9
+; CHECK-NEXT:    mov z4.s, w10
+; CHECK-NEXT:    not p1.b, p0/z, p1.b
+; CHECK-NEXT:    not p2.b, p0/z, p2.b
+; CHECK-NEXT:    mov z5.s, p1/m, z3.s
+; CHECK-NEXT:    fcmgt p1.s, p0/z, z0.s, z4.s
+; CHECK-NEXT:    mov z6.s, w8
+; CHECK-NEXT:    movprfx z2, z1
+; CHECK-NEXT:    fcvtzs z2.s, p0/m, z1.s
+; CHECK-NEXT:    sel z3.s, p2, z3.s, z2.s
+; CHECK-NEXT:    fcmgt p2.s, p0/z, z1.s, z4.s
+; CHECK-NEXT:    sel z2.s, p1, z6.s, z5.s
+; CHECK-NEXT:    mov z3.s, p2/m, z6.s
+; CHECK-NEXT:    fcmuo p1.s, p0/z, z0.s, z0.s
+; CHECK-NEXT:    fcmuo p0.s, p0/z, z1.s, z1.s
+; CHECK-NEXT:    mov z2.s, p1/m, #0 // =0x0
+; CHECK-NEXT:    mov z3.s, p0/m, #0 // =0x0
+; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    mov z1.d, z3.d
+; CHECK-NEXT:    ret
+    %x = call <vscale x 8 x i32> @llvm.fptosi.sat.nxv8f32.nxv8i32(<vscale x 8 x float> %f)
+    ret <vscale x 8 x i32> %x
+}
+
+define <vscale x 4 x i16> @test_signed_v4f32_v4i16(<vscale x 4 x float> %f) {
+; CHECK-LABEL: test_signed_v4f32_v4i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #-956301312
+; CHECK-NEXT:    mov w9, #65024
+; CHECK-NEXT:    movk w9, #18175, lsl #16
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    mov z1.s, w8
+; CHECK-NEXT:    mov w8, #32767
+; CHECK-NEXT:    fcmge p1.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    movprfx z1, z0
+; CHECK-NEXT:    fcvtzs z1.s, p0/m, z0.s
+; CHECK-NEXT:    not p1.b, p0/z, p1.b
+; CHECK-NEXT:    mov z2.s, w9
+; CHECK-NEXT:    mov z1.s, p1/m, #-32768 // =0xffffffffffff8000
+; CHECK-NEXT:    fcmgt p1.s, p0/z, z0.s, z2.s
+; CHECK-NEXT:    mov z2.s, w8
+; CHECK-NEXT:    fcmuo p0.s, p0/z, z0.s, z0.s
+; CHECK-NEXT:    mov z1.s, p1/m, z2.s
+; CHECK-NEXT:    mov z1.s, p0/m, #0 // =0x0
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ret
+    %x = call <vscale x 4 x i16> @llvm.fptosi.sat.nxv4f32.nxv4i16(<vscale x 4 x float> %f)
+    ret <vscale x 4 x i16> %x
+}
+
+define <vscale x 8 x i16> @test_signed_v8f32_v8i16(<vscale x 8 x float> %f) {
+; CHECK-LABEL: test_signed_v8f32_v8i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #-956301312
+; CHECK-NEXT:    mov w9, #65024
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    movk w9, #18175, lsl #16
+; CHECK-NEXT:    movprfx z4, z1
+; CHECK-NEXT:    fcvtzs z4.s, p0/m, z1.s
+; CHECK-NEXT:    mov z2.s, w8
+; CHECK-NEXT:    mov w8, #32767
+; CHECK-NEXT:    fcmge p1.s, p0/z, z1.s, z2.s
+; CHECK-NEXT:    mov z3.s, w9
+; CHECK-NEXT:    not p1.b, p0/z, p1.b
+; CHECK-NEXT:    fcmgt p2.s, p0/z, z1.s, z3.s
+; CHECK-NEXT:    mov z4.s, p1/m, #-32768 // =0xffffffffffff8000
+; CHECK-NEXT:    fcmge p1.s, p0/z, z0.s, z2.s
+; CHECK-NEXT:    movprfx z2, z0
+; CHECK-NEXT:    fcvtzs z2.s, p0/m, z0.s
+; CHECK-NEXT:    not p1.b, p0/z, p1.b
+; CHECK-NEXT:    mov z5.s, w8
+; CHECK-NEXT:    mov z2.s, p1/m, #-32768 // =0xffffffffffff8000
+; CHECK-NEXT:    fcmgt p1.s, p0/z, z0.s, z3.s
+; CHECK-NEXT:    sel z3.s, p2, z5.s, z4.s
+; CHECK-NEXT:    mov z2.s, p1/m, z5.s
+; CHECK-NEXT:    fcmuo p1.s, p0/z, z1.s, z1.s
+; CHECK-NEXT:    fcmuo p0.s, p0/z, z0.s, z0.s
+; CHECK-NEXT:    mov z3.s, p1/m, #0 // =0x0
+; CHECK-NEXT:    mov z2.s, p0/m, #0 // =0x0
+; CHECK-NEXT:    uzp1 z0.h, z2.h, z3.h
+; CHECK-NEXT:    ret
+    %x = call <vscale x 8 x i16> @llvm.fptosi.sat.nxv8f32.nxv8i16(<vscale x 8 x float> %f)
+    ret <vscale x 8 x i16> %x
+}
+
+define <vscale x 2 x i64> @test_signed_v2f32_v2i64(<vscale x 2 x float> %f) {
+; CHECK-LABEL: test_signed_v2f32_v2i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #-553648128
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov z2.d, #0x8000000000000000
+; CHECK-NEXT:    mov z1.s, w8
+; CHECK-NEXT:    mov w8, #1593835519
+; CHECK-NEXT:    fcmge p1.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    movprfx z1, z0
+; CHECK-NEXT:    fcvtzs z1.d, p0/m, z0.s
+; CHECK-NEXT:    not p1.b, p0/z, p1.b
+; CHECK-NEXT:    mov z1.d, p1/m, z2.d
+; CHECK-NEXT:    mov z2.s, w8
+; CHECK-NEXT:    fcmgt p1.s, p0/z, z0.s, z2.s
+; CHECK-NEXT:    mov z2.d, #0x7fffffffffffffff
+; CHECK-NEXT:    mov z1.d, p1/m, z2.d
+; CHECK-NEXT:    fcmuo p0.s, p0/z, z0.s, z0.s
+; CHECK-NEXT:    mov z1.d, p0/m, #0 // =0x0
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ret
+    %x = call <vscale x 2 x i64> @llvm.fptosi.sat.nxv2f32.nxv2i64(<vscale x 2 x float> %f)
+    ret <vscale x 2 x i64> %x
+}
+
+define <vscale x 4 x i64> @test_signed_v4f32_v4i64(<vscale x 4 x float> %f) {
+; CHECK-LABEL: test_signed_v4f32_v4i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #-553648128
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    uunpklo z3.d, z0.s
+; CHECK-NEXT:    mov w9, #1593835519
+; CHECK-NEXT:    mov z2.d, #0x8000000000000000
+; CHECK-NEXT:    uunpkhi z5.d, z0.s
+; CHECK-NEXT:    mov z1.s, w8
+; CHECK-NEXT:    movprfx z0, z3
+; CHECK-NEXT:    fcvtzs z0.d, p0/m, z3.s
+; CHECK-NEXT:    fcmge p1.s, p0/z, z3.s, z1.s
+; CHECK-NEXT:    mov z4.s, w9
+; CHECK-NEXT:    not p1.b, p0/z, p1.b
+; CHECK-NEXT:    fcmgt p2.s, p0/z, z3.s, z4.s
+; CHECK-NEXT:    mov z0.d, p1/m, z2.d
+; CHECK-NEXT:    fcmge p1.s, p0/z, z5.s, z1.s
+; CHECK-NEXT:    movprfx z1, z5
+; CHECK-NEXT:    fcvtzs z1.d, p0/m, z5.s
+; CHECK-NEXT:    not p1.b, p0/z, p1.b
+; CHECK-NEXT:    mov z6.d, #0x7fffffffffffffff
+; CHECK-NEXT:    mov z1.d, p1/m, z2.d
+; CHECK-NEXT:    fcmgt p1.s, p0/z, z5.s, z4.s
+; CHECK-NEXT:    mov z0.d, p2/m, z6.d
+; CHECK-NEXT:    mov z1.d, p1/m, z6.d
+; CHECK-NEXT:    fcmuo p1.s, p0/z, z3.s, z3.s
+; CHECK-NEXT:    fcmuo p0.s, p0/z, z5.s, z5.s
+; CHECK-NEXT:    mov z0.d, p1/m, #0 // =0x0
+; CHECK-NEXT:    mov z1.d, p0/m, #0 // =0x0
+; CHECK-NEXT:    ret
+    %x = call <vscale x 4 x i64> @llvm.fptosi.sat.nxv4f32.nxv4i64(<vscale x 4 x float> %f)
+    ret <vscale x 4 x i64> %x
+}
+
+; Double
+
+declare <vscale x 2 x i32> @llvm.fptosi.sat.nxv2f64.nxv2i32(<vscale x 2 x double>)
+declare <vscale x 4 x i32> @llvm.fptosi.sat.nxv4f64.nxv4i32(<vscale x 4 x double>)
+declare <vscale x 8 x i32> @llvm.fptosi.sat.nxv8f64.nxv8i32(<vscale x 8 x double>)
+declare <vscale x 4 x i16> @llvm.fptosi.sat.nxv4f64.nxv4i16(<vscale x 4 x double>)
+declare <vscale x 8 x i16> @llvm.fptosi.sat.nxv8f64.nxv8i16(<vscale x 8 x double>)
+declare <vscale x 2 x i64> @llvm.fptosi.sat.nxv2f64.nxv2i64(<vscale x 2 x double>)
+declare <vscale x 4 x i64> @llvm.fptosi.sat.nxv4f64.nxv4i64(<vscale x 4 x double>)
+
+define <vscale x 2 x i32> @test_signed_v2f64_v2i32(<vscale x 2 x double> %f) {
+; CHECK-LABEL: test_signed_v2f64_v2i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, #-4476578029606273024
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov z2.d, #0xffffffff80000000
+; CHECK-NEXT:    mov z1.d, x8
+; CHECK-NEXT:    mov x8, #281474972516352
+; CHECK-NEXT:    movk x8, #16863, lsl #48
+; CHECK-NEXT:    fcmge p1.d, p0/z, z0.d, z1.d
+; CHECK-NEXT:    movprfx z1, z0
+; CHECK-NEXT:    fcvtzs z1.d, p0/m, z0.d
+; CHECK-NEXT:    not p1.b, p0/z, p1.b
+; CHECK-NEXT:    mov z1.d, p1/m, z2.d
+; CHECK-NEXT:    mov z2.d, x8
+; CHECK-NEXT:    fcmgt p1.d, p0/z, z0.d, z2.d
+; CHECK-NEXT:    mov z2.d, #0x7fffffff
+; CHECK-NEXT:    mov z1.d, p1/m, z2.d
+; CHECK-NEXT:    fcmuo p0.d, p0/z, z0.d, z0.d
+; CHECK-NEXT:    mov z1.d, p0/m, #0 // =0x0
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ret
+    %x = call <vscale x 2 x i32> @llvm.fptosi.sat.nxv2f64.nxv2i32(<vscale x 2 x double> %f)
+    ret <vscale x 2 x i32> %x
+}
+
+define <vscale x 4 x i32> @test_signed_v4f64_v4i32(<vscale x 4 x double> %f) {
+; CHECK-LABEL: test_signed_v4f64_v4i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, #-4476578029606273024
+; CHECK-NEXT:    mov x9, #281474972516352
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    movk x9, #16863, lsl #48
+; CHECK-NEXT:    mov z3.d, #0xffffffff80000000
+; CHECK-NEXT:    movprfx z4, z1
+; CHECK-NEXT:    fcvtzs z4.d, p0/m, z1.d
+; CHECK-NEXT:    mov z2.d, x8
+; CHECK-NEXT:    mov z6.d, #0x7fffffff
+; CHECK-NEXT:    fcmge p1.d, p0/z, z1.d, z2.d
+; CHECK-NEXT:    mov z5.d, x9
+; CHECK-NEXT:    not p1.b, p0/z, p1.b
+; CHECK-NEXT:    fcmgt p2.d, p0/z, z1.d, z5.d
+; CHECK-NEXT:    mov z4.d, p1/m, z3.d
+; CHECK-NEXT:    fcmge p1.d, p0/z, z0.d, z2.d
+; CHECK-NEXT:    movprfx z2, z0
+; CHECK-NEXT:    fcvtzs z2.d, p0/m, z0.d
+; CHECK-NEXT:    not p1.b, p0/z, p1.b
+; CHECK-NEXT:    mov z2.d, p1/m, z3.d
+; CHECK-NEXT:    fcmgt p1.d, p0/z, z0.d, z5.d
+; CHECK-NEXT:    sel z3.d, p2, z6.d, z4.d
+; CHECK-NEXT:    mov z2.d, p1/m, z6.d
+; CHECK-NEXT:    fcmuo p1.d, p0/z, z1.d, z1.d
+; CHECK-NEXT:    fcmuo p0.d, p0/z, z0.d, z0.d
+; CHECK-NEXT:    mov z3.d, p1/m, #0 // =0x0
+; CHECK-NEXT:    mov z2.d, p0/m, #0 // =0x0
+; CHECK-NEXT:    uzp1 z0.s, z2.s, z3.s
+; CHECK-NEXT:    ret
+    %x = call <vscale x 4 x i32> @llvm.fptosi.sat.nxv4f64.nxv4i32(<vscale x 4 x double> %f)
+    ret <vscale x 4 x i32> %x
+}
+
+define <vscale x 8 x i32> @test_signed_v8f64_v8i32(<vscale x 8 x double> %f) {
+; CHECK-LABEL: test_signed_v8f64_v8i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, #-4476578029606273024
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov z5.d, #0xffffffff80000000
+; CHECK-NEXT:    movprfx z6, z1
+; CHECK-NEXT:    fcvtzs z6.d, p0/m, z1.d
+; CHECK-NEXT:    mov z24.d, #0x7fffffff
+; CHECK-NEXT:    mov z4.d, x8
+; CHECK-NEXT:    mov x8, #281474972516352
+; CHECK-NEXT:    movk x8, #16863, lsl #48
+; CHECK-NEXT:    fcmge p1.d, p0/z, z1.d, z4.d
+; CHECK-NEXT:    not p1.b, p0/z, p1.b
+; CHECK-NEXT:    fcmge p2.d, p0/z, z0.d, z4.d
+; CHECK-NEXT:    mov z6.d, p1/m, z5.d
+; CHECK-NEXT:    not p2.b, p0/z, p2.b
+; CHECK-NEXT:    mov z7.d, x8
+; CHECK-NEXT:    fcmgt p1.d, p0/z, z1.d, z7.d
+; CHECK-NEXT:    mov z6.d, p1/m, z24.d
+; CHECK-NEXT:    fcmuo p1.d, p0/z, z1.d, z1.d
+; CHECK-NEXT:    movprfx z1, z0
+; CHECK-NEXT:    fcvtzs z1.d, p0/m, z0.d
+; CHECK-NEXT:    mov z6.d, p1/m, #0 // =0x0
+; CHECK-NEXT:    mov z1.d, p2/m, z5.d
+; CHECK-NEXT:    fcmgt p2.d, p0/z, z0.d, z7.d
+; CHECK-NEXT:    mov z1.d, p2/m, z24.d
+; CHECK-NEXT:    fcmge p2.d, p0/z, z3.d, z4.d
+; CHECK-NEXT:    fcmuo p1.d, p0/z, z0.d, z0.d
+; CHECK-NEXT:    movprfx z0, z3
+; CHECK-NEXT:    fcvtzs z0.d, p0/m, z3.d
+; CHECK-NEXT:    not p2.b, p0/z, p2.b
+; CHECK-NEXT:    mov z1.d, p1/m, #0 // =0x0
+; CHECK-NEXT:    mov z0.d, p2/m, z5.d
+; CHECK-NEXT:    fcmge p2.d, p0/z, z2.d, z4.d
+; CHECK-NEXT:    movprfx z4, z2
+; CHECK-NEXT:    fcvtzs z4.d, p0/m, z2.d
+; CHECK-NEXT:    not p2.b, p0/z, p2.b
+; CHECK-NEXT:    fcmgt p1.d, p0/z, z3.d, z7.d
+; CHECK-NEXT:    mov z4.d, p2/m, z5.d
+; CHECK-NEXT:    fcmgt p2.d, p0/z, z2.d, z7.d
+; CHECK-NEXT:    sel z5.d, p1, z24.d, z0.d
+; CHECK-NEXT:    mov z4.d, p2/m, z24.d
+; CHECK-NEXT:    fcmuo p1.d, p0/z, z3.d, z3.d
+; CHECK-NEXT:    fcmuo p0.d, p0/z, z2.d, z2.d
+; CHECK-NEXT:    mov z5.d, p1/m, #0 // =0x0
+; CHECK-NEXT:    mov z4.d, p0/m, #0 // =0x0
+; CHECK-NEXT:    uzp1 z0.s, z1.s, z6.s
+; CHECK-NEXT:    uzp1 z1.s, z4.s, z5.s
+; CHECK-NEXT:    ret
+    %x = call <vscale x 8 x i32> @llvm.fptosi.sat.nxv8f64.nxv8i32(<vscale x 8 x double> %f)
+    ret <vscale x 8 x i32> %x
+}
+
+define <vscale x 4 x i16> @test_signed_v4f64_v4i16(<vscale x 4 x double> %f) {
+; CHECK-LABEL: test_signed_v4f64_v4i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, #-4548635623644200960
+; CHECK-NEXT:    mov x9, #281200098803712
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    movk x9, #16607, lsl #48
+; CHECK-NEXT:    movprfx z4, z1
+; CHECK-NEXT:    fcvtzs z4.d, p0/m, z1.d
+; CHECK-NEXT:    mov z3.d, #32767 // =0x7fff
+; CHECK-NEXT:    mov z2.d, x8
+; CHECK-NEXT:    fcmge p1.d, p0/z, z1.d, z2.d
+; CHECK-NEXT:    mov z5.d, x9
+; CHECK-NEXT:    not p1.b, p0/z, p1.b
+; CHECK-NEXT:    fcmgt p2.d, p0/z, z1.d, z5.d
+; CHECK-NEXT:    mov z4.d, p1/m, #-32768 // =0xffffffffffff8000
+; CHECK-NEXT:    fcmge p1.d, p0/z, z0.d, z2.d
+; CHECK-NEXT:    movprfx z2, z0
+; CHECK-NEXT:    fcvtzs z2.d, p0/m, z0.d
+; CHECK-NEXT:    not p1.b, p0/z, p1.b
+; CHECK-NEXT:    mov z2.d, p1/m, #-32768 // =0xffffffffffff8000
+; CHECK-NEXT:    fcmgt p1.d, p0/z, z0.d, z5.d
+; CHECK-NEXT:    mov z4.d, p2/m, z3.d
+; CHECK-NEXT:    mov z2.d, p1/m, z3.d
+; CHECK-NEXT:    fcmuo p1.d, p0/z, z1.d, z1.d
+; CHECK-NEXT:    fcmuo p0.d, p0/z, z0.d, z0.d
+; CHECK-NEXT:    mov z4.d, p1/m, #0 // =0x0
+; CHECK-NEXT:    mov z2.d, p0/m, #0 // =0x0
+; CHECK-NEXT:    uzp1 z0.s, z2.s, z4.s
+; CHECK-NEXT:    ret
+    %x = call <vscale x 4 x i16> @llvm.fptosi.sat.nxv4f64.nxv4i16(<vscale x 4 x double> %f)
+    ret <vscale x 4 x i16> %x
+}
+
+define <vscale x 8 x i16> @test_signed_v8f64_v8i16(<vscale x 8 x double> %f) {
+; CHECK-LABEL: test_signed_v8f64_v8i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, #-4548635623644200960
+; CHECK-NEXT:    mov x9, #281200098803712
+; CHECK-NEXT:    movk x9, #16607, lsl #48
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    movprfx z5, z3
+; CHECK-NEXT:    fcvtzs z5.d, p0/m, z3.d
+; CHECK-NEXT:    mov z7.d, #32767 // =0x7fff
+; CHECK-NEXT:    mov z4.d, x8
+; CHECK-NEXT:    fcmge p1.d, p0/z, z3.d, z4.d
+; CHECK-NEXT:    mov z6.d, x9
+; CHECK-NEXT:    not p1.b, p0/z, p1.b
+; CHECK-NEXT:    fcmgt p2.d, p0/z, z3.d, z6.d
+; CHECK-NEXT:    mov z5.d, p1/m, #-32768 // =0xffffffffffff8000
+; CHECK-NEXT:    fcmuo p1.d, p0/z, z3.d, z3.d
+; CHECK-NEXT:    mov z5.d, p2/m, z7.d
+; CHECK-NEXT:    fcmge p2.d, p0/z, z2.d, z4.d
+; CHECK-NEXT:    movprfx z3, z2
+; CHECK-NEXT:    fcvtzs z3.d, p0/m, z2.d
+; CHECK-NEXT:    not p2.b, p0/z, p2.b
+; CHECK-NEXT:    mov z3.d, p2/m, #-32768 // =0xffffffffffff8000
+; CHECK-NEXT:    fcmgt p2.d, p0/z, z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, p2/m, z7.d
+; CHECK-NEXT:    fcmge p2.d, p0/z, z1.d, z4.d
+; CHECK-NEXT:    mov z5.d, p1/m, #0 // =0x0
+; CHECK-NEXT:    fcmuo p1.d, p0/z, z2.d, z2.d
+; CHECK-NEXT:    movprfx z2, z1
+; CHECK-NEXT:    fcvtzs z2.d, p0/m, z1.d
+; CHECK-NEXT:    not p2.b, p0/z, p2.b
+; CHECK-NEXT:    mov z2.d, p2/m, #-32768 // =0xffffffffffff8000
+; CHECK-NEXT:    fcmge p2.d, p0/z, z0.d, z4.d
+; CHECK-NEXT:    movprfx z4, z0
+; CHECK-NEXT:    fcvtzs z4.d, p0/m, z0.d
+; CHECK-NEXT:    not p2.b, p0/z, p2.b
+; CHECK-NEXT:    mov z3.d, p1/m, #0 // =0x0
+; CHECK-NEXT:    fcmgt p1.d, p0/z, z1.d, z6.d
+; CHECK-NEXT:    mov z4.d, p2/m, #-32768 // =0xffffffffffff8000
+; CHECK-NEXT:    fcmgt p2.d, p0/z, z0.d, z6.d
+; CHECK-NEXT:    mov z2.d, p1/m, z7.d
+; CHECK-NEXT:    mov z4.d, p2/m, z7.d
+; CHECK-NEXT:    fcmuo p1.d, p0/z, z1.d, z1.d
+; CHECK-NEXT:    fcmuo p0.d, p0/z, z0.d, z0.d
+; CHECK-NEXT:    mov z2.d, p1/m, #0 // =0x0
+; CHECK-NEXT:    mov z4.d, p0/m, #0 // =0x0
+; CHECK-NEXT:    uzp1 z0.s, z3.s, z5.s
+; CHECK-NEXT:    uzp1 z1.s, z4.s, z2.s
+; CHECK-NEXT:    uzp1 z0.h, z1.h, z0.h
+; CHECK-NEXT:    ret
+    %x = call <vscale x 8 x i16> @llvm.fptosi.sat.nxv8f64.nxv8i16(<vscale x 8 x double> %f)
+    ret <vscale x 8 x i16> %x
+}
+
+define <vscale x 2 x i64> @test_signed_v2f64_v2i64(<vscale x 2 x double> %f) {
+; CHECK-LABEL: test_signed_v2f64_v2i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, #-4332462841530417152
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov z2.d, #0x8000000000000000
+; CHECK-NEXT:    mov z1.d, x8
+; CHECK-NEXT:    mov x8, #4890909195324358655
+; CHECK-NEXT:    fcmge p1.d, p0/z, z0.d, z1.d
+; CHECK-NEXT:    movprfx z1, z0
+; CHECK-NEXT:    fcvtzs z1.d, p0/m, z0.d
+; CHECK-NEXT:    not p1.b, p0/z, p1.b
+; CHECK-NEXT:    mov z1.d, p1/m, z2.d
+; CHECK-NEXT:    mov z2.d, x8
+; CHECK-NEXT:    fcmgt p1.d, p0/z, z0.d, z2.d
+; CHECK-NEXT:    mov z2.d, #0x7fffffffffffffff
+; CHECK-NEXT:    mov z1.d, p1/m, z2.d
+; CHECK-NEXT:    fcmuo p0.d, p0/z, z0.d, z0.d
+; CHECK-NEXT:    mov z1.d, p0/m, #0 // =0x0
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ret
+    %x = call <vscale x 2 x i64> @llvm.fptosi.sat.nxv2f64.nxv2i64(<vscale x 2 x double> %f)
+    ret <vscale x 2 x i64> %x
+}
+
+define <vscale x 4 x i64> @test_signed_v4f64_v4i64(<vscale x 4 x double> %f) {
+; CHECK-LABEL: test_signed_v4f64_v4i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, #-4332462841530417152
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov x9, #4890909195324358655
+; CHECK-NEXT:    mov z3.d, #0x8000000000000000
+; CHECK-NEXT:    movprfx z4, z0
+; CHECK-NEXT:    fcvtzs z4.d, p0/m, z0.d
+; CHECK-NEXT:    mov z6.d, #0x7fffffffffffffff
+; CHECK-NEXT:    mov z2.d, x8
+; CHECK-NEXT:    fcmge p1.d, p0/z, z0.d, z2.d
+; CHECK-NEXT:    mov z5.d, x9
+; CHECK-NEXT:    not p1.b, p0/z, p1.b
+; CHECK-NEXT:    fcmgt p2.d, p0/z, z0.d, z5.d
+; CHECK-NEXT:    mov z4.d, p1/m, z3.d
+; CHECK-NEXT:    fcmge p1.d, p0/z, z1.d, z2.d
+; CHECK-NEXT:    not p1.b, p0/z, p1.b
+; CHECK-NEXT:    movprfx z2, z1
+; CHECK-NEXT:    fcvtzs z2.d, p0/m, z1.d
+; CHECK-NEXT:    sel z3.d, p1, z3.d, z2.d
+; CHECK-NEXT:    fcmgt p1.d, p0/z, z1.d, z5.d
+; CHECK-NEXT:    sel z2.d, p2, z6.d, z4.d
+; CHECK-NEXT:    mov z3.d, p1/m, z6.d
+; CHECK-NEXT:    fcmuo p1.d, p0/z, z0.d, z0.d
+; CHECK-NEXT:    fcmuo p0.d, p0/z, z1.d, z1.d
+; CHECK-NEXT:    mov z2.d, p1/m, #0 // =0x0
+; CHECK-NEXT:    mov z3.d, p0/m, #0 // =0x0
+; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    mov z1.d, z3.d
+; CHECK-NEXT:    ret
+    %x = call <vscale x 4 x i64> @llvm.fptosi.sat.nxv4f64.nxv4i64(<vscale x 4 x double> %f)
+    ret <vscale x 4 x i64> %x
+}
+
+
+; half
+
+declare <vscale x 2 x i32> @llvm.fptosi.sat.nxv2f16.nxv2i32(<vscale x 2 x half>)
+declare <vscale x 4 x i32> @llvm.fptosi.sat.nxv4f16.nxv4i32(<vscale x 4 x half>)
+declare <vscale x 8 x i32> @llvm.fptosi.sat.nxv8f16.nxv8i32(<vscale x 8 x half>)
+declare <vscale x 4 x i16> @llvm.fptosi.sat.nxv4f16.nxv4i16(<vscale x 4 x half>)
+declare <vscale x 8 x i16> @llvm.fptosi.sat.nxv8f16.nxv8i16(<vscale x 8 x half>)
+declare <vscale x 2 x i64> @llvm.fptosi.sat.nxv2f16.nxv2i64(<vscale x 2 x half>)
+declare <vscale x 4 x i64> @llvm.fptosi.sat.nxv4f16.nxv4i64(<vscale x 4 x half>)
+
+define <vscale x 2 x i32> @test_signed_v2f16_v2i32(<vscale x 2 x half> %f) {
+; CHECK-LABEL: test_signed_v2f16_v2i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, .LCPI14_0
+; CHECK-NEXT:    add x8, x8, :lo12:.LCPI14_0
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov z3.d, #0xffffffff80000000
+; CHECK-NEXT:    ld1rh { z1.d }, p0/z, [x8]
+; CHECK-NEXT:    adrp x8, .LCPI14_1
+; CHECK-NEXT:    add x8, x8, :lo12:.LCPI14_1
+; CHECK-NEXT:    ld1rh { z2.d }, p0/z, [x8]
+; CHECK-NEXT:    fcmge p1.h, p0/z, z0.h, z1.h
+; CHECK-NEXT:    movprfx z1, z0
+; CHECK-NEXT:    fcvtzs z1.d, p0/m, z0.h
+; CHECK-NEXT:    not p1.b, p0/z, p1.b
+; CHECK-NEXT:    mov z1.d, p1/m, z3.d
+; CHECK-NEXT:    fcmgt p1.h, p0/z, z0.h, z2.h
+; CHECK-NEXT:    mov z2.d, #0x7fffffff
+; CHECK-NEXT:    mov z1.d, p1/m, z2.d
+; CHECK-NEXT:    fcmuo p0.h, p0/z, z0.h, z0.h
+; CHECK-NEXT:    mov z1.d, p0/m, #0 // =0x0
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ret
+    %x = call <vscale x 2 x i32> @llvm.fptosi.sat.nxv2f16.nxv2i32(<vscale x 2 x half> %f)
+    ret <vscale x 2 x i32> %x
+}
+
+define <vscale x 4 x i32> @test_signed_v4f16_v4i32(<vscale x 4 x half> %f) {
+; CHECK-LABEL: test_signed_v4f16_v4i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, .LCPI15_0
+; CHECK-NEXT:    add x8, x8, :lo12:.LCPI15_0
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    adrp x9, .LCPI15_1
+; CHECK-NEXT:    add x9, x9, :lo12:.LCPI15_1
+; CHECK-NEXT:    ld1rh { z1.s }, p0/z, [x8]
+; CHECK-NEXT:    mov w8, #-2147483648
+; CHECK-NEXT:    ld1rh { z2.s }, p0/z, [x9]
+; CHECK-NEXT:    mov z3.s, w8
+; CHECK-NEXT:    mov w8, #2147483647
+; CHECK-NEXT:    fcmge p1.h, p0/z, z0.h, z1.h
+; CHECK-NEXT:    movprfx z1, z0
+; CHECK-NEXT:    fcvtzs z1.s, p0/m, z0.h
+; CHECK-NEXT:    not p1.b, p0/z, p1.b
+; CHECK-NEXT:    mov z1.s, p1/m, z3.s
+; CHECK-NEXT:    fcmgt p1.h, p0/z, z0.h, z2.h
+; CHECK-NEXT:    mov z2.s, w8
+; CHECK-NEXT:    fcmuo p0.h, p0/z, z0.h, z0.h
+; CHECK-NEXT:    mov z1.s, p1/m, z2.s
+; CHECK-NEXT:    mov z1.s, p0/m, #0 // =0x0
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ret
+    %x = call <vscale x 4 x i32> @llvm.fptosi.sat.nxv4f16.nxv4i32(<vscale x 4 x half> %f)
+    ret <vscale x 4 x i32> %x
+}
+
+define <vscale x 8 x i32> @test_signed_v8f16_v8i32(<vscale x 8 x half> %f) {
+; CHECK-LABEL: test_signed_v8f16_v8i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, .LCPI16_0
+; CHECK-NEXT:    add x8, x8, :lo12:.LCPI16_0
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    adrp x9, .LCPI16_1
+; CHECK-NEXT:    add x9, x9, :lo12:.LCPI16_1
+; CHECK-NEXT:    uunpklo z2.s, z0.h
+; CHECK-NEXT:    ld1rh { z1.s }, p0/z, [x8]
+; CHECK-NEXT:    mov w8, #-2147483648
+; CHECK-NEXT:    uunpkhi z6.s, z0.h
+; CHECK-NEXT:    ld1rh { z3.s }, p0/z, [x9]
+; CHECK-NEXT:    movprfx z4, z2
+; CHECK-NEXT:    fcvtzs z4.s, p0/m, z2.h
+; CHECK-NEXT:    mov z5.s, w8
+; CHECK-NEXT:    mov w8, #2147483647
+; CHECK-NEXT:    fcmge p1.h, p0/z, z2.h, z1.h
+; CHECK-NEXT:    fcmge p2.h, p0/z, z6.h, z1.h
+; CHECK-NEXT:    not p1.b, p0/z, p1.b
+; CHECK-NEXT:    not p2.b, p0/z, p2.b
+; CHECK-NEXT:    mov z4.s, p1/m, z5.s
+; CHECK-NEXT:    fcmgt p1.h, p0/z, z2.h, z3.h
+; CHECK-NEXT:    mov z7.s, w8
+; CHECK-NEXT:    movprfx z0, z6
+; CHECK-NEXT:    fcvtzs z0.s, p0/m, z6.h
+; CHECK-NEXT:    sel z1.s, p2, z5.s, z0.s
+; CHECK-NEXT:    fcmgt p2.h, p0/z, z6.h, z3.h
+; CHECK-NEXT:    sel z0.s, p1, z7.s, z4.s
+; CHECK-NEXT:    mov z1.s, p2/m, z7.s
+; CHECK-NEXT:    fcmuo p1.h, p0/z, z2.h, z2.h
+; CHECK-NEXT:    fcmuo p0.h, p0/z, z6.h, z6.h
+; CHECK-NEXT:    mov z0.s, p1/m, #0 // =0x0
+; CHECK-NEXT:    mov z1.s, p0/m, #0 // =0x0
+; CHECK-NEXT:    ret
+    %x = call <vscale x 8 x i32> @llvm.fptosi.sat.nxv8f16.nxv8i32(<vscale x 8 x half> %f)
+    ret <vscale x 8 x i32> %x
+}
+
+define <vscale x 4 x i16> @test_signed_v4f16_v4i16(<vscale x 4 x half> %f) {
+; CHECK-LABEL: test_signed_v4f16_v4i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, .LCPI17_0
+; CHECK-NEXT:    add x8, x8, :lo12:.LCPI17_0
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    ld1rh { z1.s }, p0/z, [x8]
+; CHECK-NEXT:    adrp x8, .LCPI17_1
+; CHECK-NEXT:    add x8, x8, :lo12:.LCPI17_1
+; CHECK-NEXT:    ld1rh { z2.s }, p0/z, [x8]
+; CHECK-NEXT:    mov w8, #32767
+; CHECK-NEXT:    fcmge p1.h, p0/z, z0.h, z1.h
+; CHECK-NEXT:    movprfx z1, z0
+; CHECK-NEXT:    fcvtzs z1.s, p0/m, z0.h
+; CHECK-NEXT:    not p1.b, p0/z, p1.b
+; CHECK-NEXT:    mov z1.s, p1/m, #-32768 // =0xffffffffffff8000
+; CHECK-NEXT:    fcmgt p1.h, p0/z, z0.h, z2.h
+; CHECK-NEXT:    mov z2.s, w8
+; CHECK-NEXT:    mov z1.s, p1/m, z2.s
+; CHECK-NEXT:    fcmuo p0.h, p0/z, z0.h, z0.h
+; CHECK-NEXT:    mov z1.s, p0/m, #0 // =0x0
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ret
+    %x = call <vscale x 4 x i16> @llvm.fptosi.sat.nxv4f16.nxv4i16(<vscale x 4 x half> %f)
+    ret <vscale x 4 x i16> %x
+}
+
+define <vscale x 8 x i16> @test_signed_v8f16_v8i16(<vscale x 8 x half> %f) {
+; CHECK-LABEL: test_signed_v8f16_v8i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, .LCPI18_0
+; CHECK-NEXT:    add x8, x8, :lo12:.LCPI18_0
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    ld1rh { z1.h }, p0/z, [x8]
+; CHECK-NEXT:    adrp x8, .LCPI18_1
+; CHECK-NEXT:    add x8, x8, :lo12:.LCPI18_1
+; CHECK-NEXT:    ld1rh { z2.h }, p0/z, [x8]
+; CHECK-NEXT:    mov w8, #32767
+; CHECK-NEXT:    fcmge p1.h, p0/z, z0.h, z1.h
+; CHECK-NEXT:    movprfx z1, z0
+; CHECK-NEXT:    fcvtzs z1.h, p0/m, z0.h
+; CHECK-NEXT:    not p1.b, p0/z, p1.b
+; CHECK-NEXT:    mov z1.h, p1/m, #-32768 // =0xffffffffffff8000
+; CHECK-NEXT:    fcmgt p1.h, p0/z, z0.h, z2.h
+; CHECK-NEXT:    mov z2.h, w8
+; CHECK-NEXT:    mov z1.h, p1/m, z2.h
+; CHECK-NEXT:    fcmuo p0.h, p0/z, z0.h, z0.h
+; CHECK-NEXT:    mov z1.h, p0/m, #0 // =0x0
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ret
+    %x = call <vscale x 8 x i16> @llvm.fptosi.sat.nxv8f16.nxv8i16(<vscale x 8 x half> %f)
+    ret <vscale x 8 x i16> %x
+}
+
+define <vscale x 2 x i64> @test_signed_v2f16_v2i64(<vscale x 2 x half> %f) {
+; CHECK-LABEL: test_signed_v2f16_v2i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, .LCPI19_0
+; CHECK-NEXT:    add x8, x8, :lo12:.LCPI19_0
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov z3.d, #0x8000000000000000
+; CHECK-NEXT:    ld1rh { z1.d }, p0/z, [x8]
+; CHECK-NEXT:    adrp x8, .LCPI19_1
+; CHECK-NEXT:    add x8, x8, :lo12:.LCPI19_1
+; CHECK-NEXT:    ld1rh { z2.d }, p0/z, [x8]
+; CHECK-NEXT:    fcmge p1.h, p0/z, z0.h, z1.h
+; CHECK-NEXT:    movprfx z1, z0
+; CHECK-NEXT:    fcvtzs z1.d, p0/m, z0.h
+; CHECK-NEXT:    not p1.b, p0/z, p1.b
+; CHECK-NEXT:    mov z1.d, p1/m, z3.d
+; CHECK-NEXT:    fcmgt p1.h, p0/z, z0.h, z2.h
+; CHECK-NEXT:    mov z2.d, #0x7fffffffffffffff
+; CHECK-NEXT:    mov z1.d, p1/m, z2.d
+; CHECK-NEXT:    fcmuo p0.h, p0/z, z0.h, z0.h
+; CHECK-NEXT:    mov z1.d, p0/m, #0 // =0x0
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ret
+    %x = call <vscale x 2 x i64> @llvm.fptosi.sat.nxv2f16.nxv2i64(<vscale x 2 x half> %f)
+    ret <vscale x 2 x i64> %x
+}
+
+define <vscale x 4 x i64> @test_signed_v4f16_v4i64(<vscale x 4 x half> %f) {
+; CHECK-LABEL: test_signed_v4f16_v4i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, .LCPI20_0
+; CHECK-NEXT:    add x8, x8, :lo12:.LCPI20_0
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    uunpklo z4.d, z0.s
+; CHECK-NEXT:    mov z3.d, #0x8000000000000000
+; CHECK-NEXT:    uunpkhi z5.d, z0.s
+; CHECK-NEXT:    ld1rh { z1.d }, p0/z, [x8]
+; CHECK-NEXT:    adrp x8, .LCPI20_1
+; CHECK-NEXT:    add x8, x8, :lo12:.LCPI20_1
+; CHECK-NEXT:    mov z6.d, #0x7fffffffffffffff
+; CHECK-NEXT:    ld1rh { z2.d }, p0/z, [x8]
+; CHECK-NEXT:    fcmge p1.h, p0/z, z4.h, z1.h
+; CHECK-NEXT:    movprfx z0, z4
+; CHECK-NEXT:    fcvtzs z0.d, p0/m, z4.h
+; CHECK-NEXT:    not p1.b, p0/z, p1.b
+; CHECK-NEXT:    mov z0.d, p1/m, z3.d
+; CHECK-NEXT:    fcmge p1.h, p0/z, z5.h, z1.h
+; CHECK-NEXT:    movprfx z1, z5
+; CHECK-NEXT:    fcvtzs z1.d, p0/m, z5.h
+; CHECK-NEXT:    not p1.b, p0/z, p1.b
+; CHECK-NEXT:    fcmgt p2.h, p0/z, z4.h, z2.h
+; CHECK-NEXT:    mov z1.d, p1/m, z3.d
+; CHECK-NEXT:    fcmgt p1.h, p0/z, z5.h, z2.h
+; CHECK-NEXT:    mov z0.d, p2/m, z6.d
+; CHECK-NEXT:    mov z1.d, p1/m, z6.d
+; CHECK-NEXT:    fcmuo p1.h, p0/z, z4.h, z4.h
+; CHECK-NEXT:    fcmuo p0.h, p0/z, z5.h, z5.h
+; CHECK-NEXT:    mov z0.d, p1/m, #0 // =0x0
+; CHECK-NEXT:    mov z1.d, p0/m, #0 // =0x0
+; CHECK-NEXT:    ret
+    %x = call <vscale x 4 x i64> @llvm.fptosi.sat.nxv4f16.nxv4i64(<vscale x 4 x half> %f)
+    ret <vscale x 4 x i64> %x
+}
+

diff  --git a/llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll b/llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll
new file mode 100644
index 0000000000000..fe68fe30315ff
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll
@@ -0,0 +1,556 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64 -mattr=+sve | FileCheck %s
+
+; Float
+
+declare <vscale x 2 x i32> @llvm.fptoui.sat.nxv2f32.nxv2i32(<vscale x 2 x float>)
+declare <vscale x 4 x i32> @llvm.fptoui.sat.nxv4f32.nxv4i32(<vscale x 4 x float>)
+declare <vscale x 8 x i32> @llvm.fptoui.sat.nxv8f32.nxv8i32(<vscale x 8 x float>)
+declare <vscale x 4 x i16> @llvm.fptoui.sat.nxv4f32.nxv4i16(<vscale x 4 x float>)
+declare <vscale x 8 x i16> @llvm.fptoui.sat.nxv8f32.nxv8i16(<vscale x 8 x float>)
+declare <vscale x 2 x i64> @llvm.fptoui.sat.nxv2f32.nxv2i64(<vscale x 2 x float>)
+declare <vscale x 4 x i64> @llvm.fptoui.sat.nxv4f32.nxv4i64(<vscale x 4 x float>)
+
+define <vscale x 2 x i32> @test_signed_v2f32_v2i32(<vscale x 2 x float> %f) {
+; CHECK-LABEL: test_signed_v2f32_v2i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #1333788671
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fcmge p1.s, p0/z, z0.s, #0.0
+; CHECK-NEXT:    mov z1.s, w8
+; CHECK-NEXT:    fcmgt p2.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.s
+; CHECK-NEXT:    not p0.b, p0/z, p1.b
+; CHECK-NEXT:    mov z1.d, #0xffffffff
+; CHECK-NEXT:    mov z0.d, p0/m, #0 // =0x0
+; CHECK-NEXT:    mov z0.d, p2/m, z1.d
+; CHECK-NEXT:    ret
+    %x = call <vscale x 2 x i32> @llvm.fptoui.sat.nxv2f32.nxv2i32(<vscale x 2 x float> %f)
+    ret <vscale x 2 x i32> %x
+}
+
+define <vscale x 4 x i32> @test_signed_v4f32_v4i32(<vscale x 4 x float> %f) {
+; CHECK-LABEL: test_signed_v4f32_v4i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #1333788671
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fcmge p1.s, p0/z, z0.s, #0.0
+; CHECK-NEXT:    movprfx z1, z0
+; CHECK-NEXT:    fcvtzu z1.s, p0/m, z0.s
+; CHECK-NEXT:    not p1.b, p0/z, p1.b
+; CHECK-NEXT:    mov z2.s, w8
+; CHECK-NEXT:    mov z1.s, p1/m, #0 // =0x0
+; CHECK-NEXT:    fcmgt p0.s, p0/z, z0.s, z2.s
+; CHECK-NEXT:    mov z1.s, p0/m, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ret
+    %x = call <vscale x 4 x i32> @llvm.fptoui.sat.nxv4f32.nxv4i32(<vscale x 4 x float> %f)
+    ret <vscale x 4 x i32> %x
+}
+
+define <vscale x 8 x i32> @test_signed_v8f32_v8i32(<vscale x 8 x float> %f) {
+; CHECK-LABEL: test_signed_v8f32_v8i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #1333788671
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fcmge p1.s, p0/z, z0.s, #0.0
+; CHECK-NEXT:    fcmge p2.s, p0/z, z1.s, #0.0
+; CHECK-NEXT:    movprfx z2, z0
+; CHECK-NEXT:    fcvtzu z2.s, p0/m, z0.s
+; CHECK-NEXT:    not p1.b, p0/z, p1.b
+; CHECK-NEXT:    mov z4.s, w8
+; CHECK-NEXT:    movprfx z3, z1
+; CHECK-NEXT:    fcvtzu z3.s, p0/m, z1.s
+; CHECK-NEXT:    not p2.b, p0/z, p2.b
+; CHECK-NEXT:    mov z2.s, p1/m, #0 // =0x0
+; CHECK-NEXT:    fcmgt p1.s, p0/z, z0.s, z4.s
+; CHECK-NEXT:    fcmgt p0.s, p0/z, z1.s, z4.s
+; CHECK-NEXT:    mov z3.s, p2/m, #0 // =0x0
+; CHECK-NEXT:    mov z2.s, p1/m, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    mov z3.s, p0/m, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    mov z1.d, z3.d
+; CHECK-NEXT:    ret
+    %x = call <vscale x 8 x i32> @llvm.fptoui.sat.nxv8f32.nxv8i32(<vscale x 8 x float> %f)
+    ret <vscale x 8 x i32> %x
+}
+
+define <vscale x 4 x i16> @test_signed_v4f32_v4i16(<vscale x 4 x float> %f) {
+; CHECK-LABEL: test_signed_v4f32_v4i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #65280
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    movk w8, #18303, lsl #16
+; CHECK-NEXT:    mov w9, #65535
+; CHECK-NEXT:    fcmge p1.s, p0/z, z0.s, #0.0
+; CHECK-NEXT:    mov z1.s, w8
+; CHECK-NEXT:    fcmgt p2.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    fcvtzu z0.s, p0/m, z0.s
+; CHECK-NEXT:    not p0.b, p0/z, p1.b
+; CHECK-NEXT:    mov z1.s, w9
+; CHECK-NEXT:    mov z0.s, p0/m, #0 // =0x0
+; CHECK-NEXT:    mov z0.s, p2/m, z1.s
+; CHECK-NEXT:    ret
+    %x = call <vscale x 4 x i16> @llvm.fptoui.sat.nxv4f32.nxv4i16(<vscale x 4 x float> %f)
+    ret <vscale x 4 x i16> %x
+}
+
+define <vscale x 8 x i16> @test_signed_v8f32_v8i16(<vscale x 8 x float> %f) {
+; CHECK-LABEL: test_signed_v8f32_v8i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #65280
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    movk w8, #18303, lsl #16
+; CHECK-NEXT:    fcmge p1.s, p0/z, z1.s, #0.0
+; CHECK-NEXT:    movprfx z3, z1
+; CHECK-NEXT:    fcvtzu z3.s, p0/m, z1.s
+; CHECK-NEXT:    not p1.b, p0/z, p1.b
+; CHECK-NEXT:    mov z3.s, p1/m, #0 // =0x0
+; CHECK-NEXT:    fcmge p1.s, p0/z, z0.s, #0.0
+; CHECK-NEXT:    mov z2.s, w8
+; CHECK-NEXT:    mov w8, #65535
+; CHECK-NEXT:    fcmgt p2.s, p0/z, z1.s, z2.s
+; CHECK-NEXT:    movprfx z1, z0
+; CHECK-NEXT:    fcvtzu z1.s, p0/m, z0.s
+; CHECK-NEXT:    not p1.b, p0/z, p1.b
+; CHECK-NEXT:    fcmgt p0.s, p0/z, z0.s, z2.s
+; CHECK-NEXT:    mov z0.s, w8
+; CHECK-NEXT:    mov z1.s, p1/m, #0 // =0x0
+; CHECK-NEXT:    sel z2.s, p2, z0.s, z3.s
+; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z2.h
+; CHECK-NEXT:    ret
+    %x = call <vscale x 8 x i16> @llvm.fptoui.sat.nxv8f32.nxv8i16(<vscale x 8 x float> %f)
+    ret <vscale x 8 x i16> %x
+}
+
+define <vscale x 2 x i64> @test_signed_v2f32_v2i64(<vscale x 2 x float> %f) {
+; CHECK-LABEL: test_signed_v2f32_v2i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #1602224127
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fcmge p1.s, p0/z, z0.s, #0.0
+; CHECK-NEXT:    movprfx z1, z0
+; CHECK-NEXT:    fcvtzu z1.d, p0/m, z0.s
+; CHECK-NEXT:    not p1.b, p0/z, p1.b
+; CHECK-NEXT:    mov z2.s, w8
+; CHECK-NEXT:    mov z1.d, p1/m, #0 // =0x0
+; CHECK-NEXT:    fcmgt p0.s, p0/z, z0.s, z2.s
+; CHECK-NEXT:    mov z1.d, p0/m, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ret
+    %x = call <vscale x 2 x i64> @llvm.fptoui.sat.nxv2f32.nxv2i64(<vscale x 2 x float> %f)
+    ret <vscale x 2 x i64> %x
+}
+
+define <vscale x 4 x i64> @test_signed_v4f32_v4i64(<vscale x 4 x float> %f) {
+; CHECK-LABEL: test_signed_v4f32_v4i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #1602224127
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    uunpklo z1.d, z0.s
+; CHECK-NEXT:    uunpkhi z3.d, z0.s
+; CHECK-NEXT:    fcmge p2.s, p0/z, z1.s, #0.0
+; CHECK-NEXT:    movprfx z0, z1
+; CHECK-NEXT:    fcvtzu z0.d, p0/m, z1.s
+; CHECK-NEXT:    not p2.b, p0/z, p2.b
+; CHECK-NEXT:    mov z2.s, w8
+; CHECK-NEXT:    mov z0.d, p2/m, #0 // =0x0
+; CHECK-NEXT:    fcmge p2.s, p0/z, z3.s, #0.0
+; CHECK-NEXT:    fcmgt p1.s, p0/z, z1.s, z2.s
+; CHECK-NEXT:    movprfx z1, z3
+; CHECK-NEXT:    fcvtzu z1.d, p0/m, z3.s
+; CHECK-NEXT:    not p2.b, p0/z, p2.b
+; CHECK-NEXT:    fcmgt p0.s, p0/z, z3.s, z2.s
+; CHECK-NEXT:    mov z1.d, p2/m, #0 // =0x0
+; CHECK-NEXT:    mov z0.d, p1/m, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    mov z1.d, p0/m, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    ret
+    %x = call <vscale x 4 x i64> @llvm.fptoui.sat.nxv4f32.nxv4i64(<vscale x 4 x float> %f)
+    ret <vscale x 4 x i64> %x
+}
+
+; Double
+
+declare <vscale x 2 x i32> @llvm.fptoui.sat.nxv2f64.nxv2i32(<vscale x 2 x double>)
+declare <vscale x 4 x i32> @llvm.fptoui.sat.nxv4f64.nxv4i32(<vscale x 4 x double>)
+declare <vscale x 8 x i32> @llvm.fptoui.sat.nxv8f64.nxv8i32(<vscale x 8 x double>)
+declare <vscale x 4 x i16> @llvm.fptoui.sat.nxv4f64.nxv4i16(<vscale x 4 x double>)
+declare <vscale x 8 x i16> @llvm.fptoui.sat.nxv8f64.nxv8i16(<vscale x 8 x double>)
+declare <vscale x 2 x i64> @llvm.fptoui.sat.nxv2f64.nxv2i64(<vscale x 2 x double>)
+declare <vscale x 4 x i64> @llvm.fptoui.sat.nxv4f64.nxv4i64(<vscale x 4 x double>)
+
+define <vscale x 2 x i32> @test_signed_v2f64_v2i32(<vscale x 2 x double> %f) {
+; CHECK-LABEL: test_signed_v2f64_v2i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, #281474974613504
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    movk x8, #16879, lsl #48
+; CHECK-NEXT:    fcmge p1.d, p0/z, z0.d, #0.0
+; CHECK-NEXT:    mov z1.d, x8
+; CHECK-NEXT:    fcmgt p2.d, p0/z, z0.d, z1.d
+; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.d
+; CHECK-NEXT:    not p0.b, p0/z, p1.b
+; CHECK-NEXT:    mov z1.d, #0xffffffff
+; CHECK-NEXT:    mov z0.d, p0/m, #0 // =0x0
+; CHECK-NEXT:    mov z0.d, p2/m, z1.d
+; CHECK-NEXT:    ret
+    %x = call <vscale x 2 x i32> @llvm.fptoui.sat.nxv2f64.nxv2i32(<vscale x 2 x double> %f)
+    ret <vscale x 2 x i32> %x
+}
+
+define <vscale x 4 x i32> @test_signed_v4f64_v4i32(<vscale x 4 x double> %f) {
+; CHECK-LABEL: test_signed_v4f64_v4i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, #281474974613504
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    movk x8, #16879, lsl #48
+; CHECK-NEXT:    fcmge p1.d, p0/z, z1.d, #0.0
+; CHECK-NEXT:    movprfx z3, z1
+; CHECK-NEXT:    fcvtzu z3.d, p0/m, z1.d
+; CHECK-NEXT:    not p1.b, p0/z, p1.b
+; CHECK-NEXT:    mov z3.d, p1/m, #0 // =0x0
+; CHECK-NEXT:    fcmge p1.d, p0/z, z0.d, #0.0
+; CHECK-NEXT:    mov z2.d, x8
+; CHECK-NEXT:    not p1.b, p0/z, p1.b
+; CHECK-NEXT:    fcmgt p2.d, p0/z, z1.d, z2.d
+; CHECK-NEXT:    movprfx z1, z0
+; CHECK-NEXT:    fcvtzu z1.d, p0/m, z0.d
+; CHECK-NEXT:    fcmgt p0.d, p0/z, z0.d, z2.d
+; CHECK-NEXT:    mov z0.d, #0xffffffff
+; CHECK-NEXT:    mov z1.d, p1/m, #0 // =0x0
+; CHECK-NEXT:    sel z2.d, p2, z0.d, z3.d
+; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
+; CHECK-NEXT:    uzp1 z0.s, z0.s, z2.s
+; CHECK-NEXT:    ret
+    %x = call <vscale x 4 x i32> @llvm.fptoui.sat.nxv4f64.nxv4i32(<vscale x 4 x double> %f)
+    ret <vscale x 4 x i32> %x
+}
+
+define <vscale x 8 x i32> @test_signed_v8f64_v8i32(<vscale x 8 x double> %f) {
+; CHECK-LABEL: test_signed_v8f64_v8i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, #281474974613504
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    movk x8, #16879, lsl #48
+; CHECK-NEXT:    fcmge p1.d, p0/z, z1.d, #0.0
+; CHECK-NEXT:    movprfx z5, z1
+; CHECK-NEXT:    fcvtzu z5.d, p0/m, z1.d
+; CHECK-NEXT:    not p1.b, p0/z, p1.b
+; CHECK-NEXT:    mov z5.d, p1/m, #0 // =0x0
+; CHECK-NEXT:    fcmge p1.d, p0/z, z0.d, #0.0
+; CHECK-NEXT:    mov z4.d, x8
+; CHECK-NEXT:    movprfx z6, z0
+; CHECK-NEXT:    fcvtzu z6.d, p0/m, z0.d
+; CHECK-NEXT:    fcmgt p2.d, p0/z, z1.d, z4.d
+; CHECK-NEXT:    mov z1.d, #0xffffffff
+; CHECK-NEXT:    not p1.b, p0/z, p1.b
+; CHECK-NEXT:    mov z6.d, p1/m, #0 // =0x0
+; CHECK-NEXT:    fcmgt p1.d, p0/z, z0.d, z4.d
+; CHECK-NEXT:    sel z0.d, p2, z1.d, z5.d
+; CHECK-NEXT:    fcmge p2.d, p0/z, z3.d, #0.0
+; CHECK-NEXT:    sel z5.d, p1, z1.d, z6.d
+; CHECK-NEXT:    fcmgt p1.d, p0/z, z3.d, z4.d
+; CHECK-NEXT:    fcvtzu z3.d, p0/m, z3.d
+; CHECK-NEXT:    not p2.b, p0/z, p2.b
+; CHECK-NEXT:    mov z3.d, p2/m, #0 // =0x0
+; CHECK-NEXT:    fcmge p2.d, p0/z, z2.d, #0.0
+; CHECK-NEXT:    movprfx z6, z2
+; CHECK-NEXT:    fcvtzu z6.d, p0/m, z2.d
+; CHECK-NEXT:    not p2.b, p0/z, p2.b
+; CHECK-NEXT:    fcmgt p0.d, p0/z, z2.d, z4.d
+; CHECK-NEXT:    mov z6.d, p2/m, #0 // =0x0
+; CHECK-NEXT:    sel z2.d, p1, z1.d, z3.d
+; CHECK-NEXT:    sel z1.d, p0, z1.d, z6.d
+; CHECK-NEXT:    uzp1 z0.s, z5.s, z0.s
+; CHECK-NEXT:    uzp1 z1.s, z1.s, z2.s
+; CHECK-NEXT:    ret
+    %x = call <vscale x 8 x i32> @llvm.fptoui.sat.nxv8f64.nxv8i32(<vscale x 8 x double> %f)
+    ret <vscale x 8 x i32> %x
+}
+
+define <vscale x 4 x i16> @test_signed_v4f64_v4i16(<vscale x 4 x double> %f) {
+; CHECK-LABEL: test_signed_v4f64_v4i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, #281337537757184
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    movk x8, #16623, lsl #48
+; CHECK-NEXT:    fcmge p1.d, p0/z, z1.d, #0.0
+; CHECK-NEXT:    movprfx z3, z1
+; CHECK-NEXT:    fcvtzu z3.d, p0/m, z1.d
+; CHECK-NEXT:    not p1.b, p0/z, p1.b
+; CHECK-NEXT:    mov z3.d, p1/m, #0 // =0x0
+; CHECK-NEXT:    fcmge p1.d, p0/z, z0.d, #0.0
+; CHECK-NEXT:    mov z2.d, x8
+; CHECK-NEXT:    not p1.b, p0/z, p1.b
+; CHECK-NEXT:    fcmgt p2.d, p0/z, z1.d, z2.d
+; CHECK-NEXT:    movprfx z1, z0
+; CHECK-NEXT:    fcvtzu z1.d, p0/m, z0.d
+; CHECK-NEXT:    fcmgt p0.d, p0/z, z0.d, z2.d
+; CHECK-NEXT:    mov z0.d, #65535 // =0xffff
+; CHECK-NEXT:    mov z1.d, p1/m, #0 // =0x0
+; CHECK-NEXT:    sel z2.d, p2, z0.d, z3.d
+; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
+; CHECK-NEXT:    uzp1 z0.s, z0.s, z2.s
+; CHECK-NEXT:    ret
+    %x = call <vscale x 4 x i16> @llvm.fptoui.sat.nxv4f64.nxv4i16(<vscale x 4 x double> %f)
+    ret <vscale x 4 x i16> %x
+}
+
+define <vscale x 8 x i16> @test_signed_v8f64_v8i16(<vscale x 8 x double> %f) {
+; CHECK-LABEL: test_signed_v8f64_v8i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, #281337537757184
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    movk x8, #16623, lsl #48
+; CHECK-NEXT:    fcmge p1.d, p0/z, z3.d, #0.0
+; CHECK-NEXT:    movprfx z5, z3
+; CHECK-NEXT:    fcvtzu z5.d, p0/m, z3.d
+; CHECK-NEXT:    not p1.b, p0/z, p1.b
+; CHECK-NEXT:    mov z5.d, p1/m, #0 // =0x0
+; CHECK-NEXT:    fcmge p1.d, p0/z, z2.d, #0.0
+; CHECK-NEXT:    mov z4.d, x8
+; CHECK-NEXT:    movprfx z6, z2
+; CHECK-NEXT:    fcvtzu z6.d, p0/m, z2.d
+; CHECK-NEXT:    fcmgt p2.d, p0/z, z3.d, z4.d
+; CHECK-NEXT:    mov z3.d, #65535 // =0xffff
+; CHECK-NEXT:    not p1.b, p0/z, p1.b
+; CHECK-NEXT:    mov z6.d, p1/m, #0 // =0x0
+; CHECK-NEXT:    fcmgt p1.d, p0/z, z2.d, z4.d
+; CHECK-NEXT:    sel z2.d, p2, z3.d, z5.d
+; CHECK-NEXT:    fcmge p2.d, p0/z, z1.d, #0.0
+; CHECK-NEXT:    sel z5.d, p1, z3.d, z6.d
+; CHECK-NEXT:    fcmgt p1.d, p0/z, z1.d, z4.d
+; CHECK-NEXT:    fcvtzu z1.d, p0/m, z1.d
+; CHECK-NEXT:    not p2.b, p0/z, p2.b
+; CHECK-NEXT:    mov z1.d, p2/m, #0 // =0x0
+; CHECK-NEXT:    fcmge p2.d, p0/z, z0.d, #0.0
+; CHECK-NEXT:    movprfx z6, z0
+; CHECK-NEXT:    fcvtzu z6.d, p0/m, z0.d
+; CHECK-NEXT:    not p2.b, p0/z, p2.b
+; CHECK-NEXT:    fcmgt p0.d, p0/z, z0.d, z4.d
+; CHECK-NEXT:    mov z6.d, p2/m, #0 // =0x0
+; CHECK-NEXT:    sel z0.d, p1, z3.d, z1.d
+; CHECK-NEXT:    sel z1.d, p0, z3.d, z6.d
+; CHECK-NEXT:    uzp1 z2.s, z5.s, z2.s
+; CHECK-NEXT:    uzp1 z0.s, z1.s, z0.s
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z2.h
+; CHECK-NEXT:    ret
+    %x = call <vscale x 8 x i16> @llvm.fptoui.sat.nxv8f64.nxv8i16(<vscale x 8 x double> %f)
+    ret <vscale x 8 x i16> %x
+}
+
+define <vscale x 2 x i64> @test_signed_v2f64_v2i64(<vscale x 2 x double> %f) {
+; CHECK-LABEL: test_signed_v2f64_v2i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, #4895412794951729151
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fcmge p1.d, p0/z, z0.d, #0.0
+; CHECK-NEXT:    movprfx z1, z0
+; CHECK-NEXT:    fcvtzu z1.d, p0/m, z0.d
+; CHECK-NEXT:    not p1.b, p0/z, p1.b
+; CHECK-NEXT:    mov z2.d, x8
+; CHECK-NEXT:    mov z1.d, p1/m, #0 // =0x0
+; CHECK-NEXT:    fcmgt p0.d, p0/z, z0.d, z2.d
+; CHECK-NEXT:    mov z1.d, p0/m, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ret
+    %x = call <vscale x 2 x i64> @llvm.fptoui.sat.nxv2f64.nxv2i64(<vscale x 2 x double> %f)
+    ret <vscale x 2 x i64> %x
+}
+
+define <vscale x 4 x i64> @test_signed_v4f64_v4i64(<vscale x 4 x double> %f) {
+; CHECK-LABEL: test_signed_v4f64_v4i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, #4895412794951729151
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fcmge p1.d, p0/z, z0.d, #0.0
+; CHECK-NEXT:    fcmge p2.d, p0/z, z1.d, #0.0
+; CHECK-NEXT:    movprfx z2, z0
+; CHECK-NEXT:    fcvtzu z2.d, p0/m, z0.d
+; CHECK-NEXT:    not p1.b, p0/z, p1.b
+; CHECK-NEXT:    mov z4.d, x8
+; CHECK-NEXT:    movprfx z3, z1
+; CHECK-NEXT:    fcvtzu z3.d, p0/m, z1.d
+; CHECK-NEXT:    not p2.b, p0/z, p2.b
+; CHECK-NEXT:    mov z2.d, p1/m, #0 // =0x0
+; CHECK-NEXT:    fcmgt p1.d, p0/z, z0.d, z4.d
+; CHECK-NEXT:    fcmgt p0.d, p0/z, z1.d, z4.d
+; CHECK-NEXT:    mov z3.d, p2/m, #0 // =0x0
+; CHECK-NEXT:    mov z2.d, p1/m, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    mov z3.d, p0/m, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    mov z1.d, z3.d
+; CHECK-NEXT:    ret
+    %x = call <vscale x 4 x i64> @llvm.fptoui.sat.nxv4f64.nxv4i64(<vscale x 4 x double> %f)
+    ret <vscale x 4 x i64> %x
+}
+
+
+; half
+
+declare <vscale x 2 x i32> @llvm.fptoui.sat.nxv2f16.nxv2i32(<vscale x 2 x half>)
+declare <vscale x 4 x i32> @llvm.fptoui.sat.nxv4f16.nxv4i32(<vscale x 4 x half>)
+declare <vscale x 8 x i32> @llvm.fptoui.sat.nxv8f16.nxv8i32(<vscale x 8 x half>)
+declare <vscale x 4 x i16> @llvm.fptoui.sat.nxv4f16.nxv4i16(<vscale x 4 x half>)
+declare <vscale x 8 x i16> @llvm.fptoui.sat.nxv8f16.nxv8i16(<vscale x 8 x half>)
+declare <vscale x 2 x i64> @llvm.fptoui.sat.nxv2f16.nxv2i64(<vscale x 2 x half>)
+declare <vscale x 4 x i64> @llvm.fptoui.sat.nxv4f16.nxv4i64(<vscale x 4 x half>)
+
+define <vscale x 2 x i32> @test_signed_v2f16_v2i32(<vscale x 2 x half> %f) {
+; CHECK-LABEL: test_signed_v2f16_v2i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, .LCPI14_0
+; CHECK-NEXT:    add x8, x8, :lo12:.LCPI14_0
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fcmge p1.h, p0/z, z0.h, #0.0
+; CHECK-NEXT:    ld1rh { z1.d }, p0/z, [x8]
+; CHECK-NEXT:    movprfx z2, z0
+; CHECK-NEXT:    fcvtzu z2.d, p0/m, z0.h
+; CHECK-NEXT:    not p1.b, p0/z, p1.b
+; CHECK-NEXT:    mov z2.d, p1/m, #0 // =0x0
+; CHECK-NEXT:    fcmgt p0.h, p0/z, z0.h, z1.h
+; CHECK-NEXT:    mov z0.d, #0xffffffff
+; CHECK-NEXT:    sel z0.d, p0, z0.d, z2.d
+; CHECK-NEXT:    ret
+    %x = call <vscale x 2 x i32> @llvm.fptoui.sat.nxv2f16.nxv2i32(<vscale x 2 x half> %f)
+    ret <vscale x 2 x i32> %x
+}
+
+define <vscale x 4 x i32> @test_signed_v4f16_v4i32(<vscale x 4 x half> %f) {
+; CHECK-LABEL: test_signed_v4f16_v4i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, .LCPI15_0
+; CHECK-NEXT:    add x8, x8, :lo12:.LCPI15_0
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fcmge p1.h, p0/z, z0.h, #0.0
+; CHECK-NEXT:    ld1rh { z2.s }, p0/z, [x8]
+; CHECK-NEXT:    movprfx z1, z0
+; CHECK-NEXT:    fcvtzu z1.s, p0/m, z0.h
+; CHECK-NEXT:    not p1.b, p0/z, p1.b
+; CHECK-NEXT:    mov z1.s, p1/m, #0 // =0x0
+; CHECK-NEXT:    fcmgt p0.h, p0/z, z0.h, z2.h
+; CHECK-NEXT:    mov z1.s, p0/m, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ret
+    %x = call <vscale x 4 x i32> @llvm.fptoui.sat.nxv4f16.nxv4i32(<vscale x 4 x half> %f)
+    ret <vscale x 4 x i32> %x
+}
+
+define <vscale x 8 x i32> @test_signed_v8f16_v8i32(<vscale x 8 x half> %f) {
+; CHECK-LABEL: test_signed_v8f16_v8i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, .LCPI16_0
+; CHECK-NEXT:    add x8, x8, :lo12:.LCPI16_0
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    uunpklo z3.s, z0.h
+; CHECK-NEXT:    uunpkhi z4.s, z0.h
+; CHECK-NEXT:    fcmge p1.h, p0/z, z3.h, #0.0
+; CHECK-NEXT:    ld1rh { z2.s }, p0/z, [x8]
+; CHECK-NEXT:    fcmge p2.h, p0/z, z4.h, #0.0
+; CHECK-NEXT:    movprfx z0, z3
+; CHECK-NEXT:    fcvtzu z0.s, p0/m, z3.h
+; CHECK-NEXT:    movprfx z1, z4
+; CHECK-NEXT:    fcvtzu z1.s, p0/m, z4.h
+; CHECK-NEXT:    not p1.b, p0/z, p1.b
+; CHECK-NEXT:    not p2.b, p0/z, p2.b
+; CHECK-NEXT:    mov z0.s, p1/m, #0 // =0x0
+; CHECK-NEXT:    mov z1.s, p2/m, #0 // =0x0
+; CHECK-NEXT:    fcmgt p1.h, p0/z, z3.h, z2.h
+; CHECK-NEXT:    fcmgt p0.h, p0/z, z4.h, z2.h
+; CHECK-NEXT:    mov z0.s, p1/m, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    mov z1.s, p0/m, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    ret
+    %x = call <vscale x 8 x i32> @llvm.fptoui.sat.nxv8f16.nxv8i32(<vscale x 8 x half> %f)
+    ret <vscale x 8 x i32> %x
+}
+
+define <vscale x 4 x i16> @test_signed_v4f16_v4i16(<vscale x 4 x half> %f) {
+; CHECK-LABEL: test_signed_v4f16_v4i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, .LCPI17_0
+; CHECK-NEXT:    add x8, x8, :lo12:.LCPI17_0
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fcmge p1.h, p0/z, z0.h, #0.0
+; CHECK-NEXT:    ld1rh { z1.s }, p0/z, [x8]
+; CHECK-NEXT:    mov w8, #65535
+; CHECK-NEXT:    movprfx z2, z0
+; CHECK-NEXT:    fcvtzu z2.s, p0/m, z0.h
+; CHECK-NEXT:    not p1.b, p0/z, p1.b
+; CHECK-NEXT:    mov z2.s, p1/m, #0 // =0x0
+; CHECK-NEXT:    fcmgt p0.h, p0/z, z0.h, z1.h
+; CHECK-NEXT:    mov z0.s, w8
+; CHECK-NEXT:    sel z0.s, p0, z0.s, z2.s
+; CHECK-NEXT:    ret
+    %x = call <vscale x 4 x i16> @llvm.fptoui.sat.nxv4f16.nxv4i16(<vscale x 4 x half> %f)
+    ret <vscale x 4 x i16> %x
+}
+
+define <vscale x 8 x i16> @test_signed_v8f16_v8i16(<vscale x 8 x half> %f) {
+; CHECK-LABEL: test_signed_v8f16_v8i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, .LCPI18_0
+; CHECK-NEXT:    add x8, x8, :lo12:.LCPI18_0
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    fcmge p1.h, p0/z, z0.h, #0.0
+; CHECK-NEXT:    ld1rh { z2.h }, p0/z, [x8]
+; CHECK-NEXT:    movprfx z1, z0
+; CHECK-NEXT:    fcvtzu z1.h, p0/m, z0.h
+; CHECK-NEXT:    not p1.b, p0/z, p1.b
+; CHECK-NEXT:    mov z1.h, p1/m, #0 // =0x0
+; CHECK-NEXT:    fcmgt p0.h, p0/z, z0.h, z2.h
+; CHECK-NEXT:    mov z1.h, p0/m, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ret
+    %x = call <vscale x 8 x i16> @llvm.fptoui.sat.nxv8f16.nxv8i16(<vscale x 8 x half> %f)
+    ret <vscale x 8 x i16> %x
+}
+
+define <vscale x 2 x i64> @test_signed_v2f16_v2i64(<vscale x 2 x half> %f) {
+; CHECK-LABEL: test_signed_v2f16_v2i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, .LCPI19_0
+; CHECK-NEXT:    add x8, x8, :lo12:.LCPI19_0
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fcmge p1.h, p0/z, z0.h, #0.0
+; CHECK-NEXT:    ld1rh { z2.d }, p0/z, [x8]
+; CHECK-NEXT:    movprfx z1, z0
+; CHECK-NEXT:    fcvtzu z1.d, p0/m, z0.h
+; CHECK-NEXT:    not p1.b, p0/z, p1.b
+; CHECK-NEXT:    mov z1.d, p1/m, #0 // =0x0
+; CHECK-NEXT:    fcmgt p0.h, p0/z, z0.h, z2.h
+; CHECK-NEXT:    mov z1.d, p0/m, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ret
+    %x = call <vscale x 2 x i64> @llvm.fptoui.sat.nxv2f16.nxv2i64(<vscale x 2 x half> %f)
+    ret <vscale x 2 x i64> %x
+}
+
+define <vscale x 4 x i64> @test_signed_v4f16_v4i64(<vscale x 4 x half> %f) {
+; CHECK-LABEL: test_signed_v4f16_v4i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, .LCPI20_0
+; CHECK-NEXT:    add x8, x8, :lo12:.LCPI20_0
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    uunpklo z3.d, z0.s
+; CHECK-NEXT:    uunpkhi z4.d, z0.s
+; CHECK-NEXT:    fcmge p1.h, p0/z, z3.h, #0.0
+; CHECK-NEXT:    ld1rh { z2.d }, p0/z, [x8]
+; CHECK-NEXT:    fcmge p2.h, p0/z, z4.h, #0.0
+; CHECK-NEXT:    movprfx z0, z3
+; CHECK-NEXT:    fcvtzu z0.d, p0/m, z3.h
+; CHECK-NEXT:    movprfx z1, z4
+; CHECK-NEXT:    fcvtzu z1.d, p0/m, z4.h
+; CHECK-NEXT:    not p1.b, p0/z, p1.b
+; CHECK-NEXT:    not p2.b, p0/z, p2.b
+; CHECK-NEXT:    mov z0.d, p1/m, #0 // =0x0
+; CHECK-NEXT:    mov z1.d, p2/m, #0 // =0x0
+; CHECK-NEXT:    fcmgt p1.h, p0/z, z3.h, z2.h
+; CHECK-NEXT:    fcmgt p0.h, p0/z, z4.h, z2.h
+; CHECK-NEXT:    mov z0.d, p1/m, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    mov z1.d, p0/m, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    ret
+    %x = call <vscale x 4 x i64> @llvm.fptoui.sat.nxv4f16.nxv4i64(<vscale x 4 x half> %f)
+    ret <vscale x 4 x i64> %x
+}
+

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll
new file mode 100644
index 0000000000000..ba6f3204a3e47
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll
@@ -0,0 +1,952 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=ilp32d \
+; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK32
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=lp64d \
+; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK64
+
+; Float
+
+declare <vscale x 2 x i32> @llvm.fptosi.sat.nxv2f32.nxv2i32(<vscale x 2 x float>)
+declare <vscale x 4 x i32> @llvm.fptosi.sat.nxv4f32.nxv4i32(<vscale x 4 x float>)
+declare <vscale x 8 x i32> @llvm.fptosi.sat.nxv8f32.nxv8i32(<vscale x 8 x float>)
+declare <vscale x 4 x i16> @llvm.fptosi.sat.nxv4f32.nxv4i16(<vscale x 4 x float>)
+declare <vscale x 8 x i16> @llvm.fptosi.sat.nxv8f32.nxv8i16(<vscale x 8 x float>)
+declare <vscale x 2 x i64> @llvm.fptosi.sat.nxv2f32.nxv2i64(<vscale x 2 x float>)
+declare <vscale x 4 x i64> @llvm.fptosi.sat.nxv4f32.nxv4i64(<vscale x 4 x float>)
+
+define <vscale x 2 x i32> @test_signed_v2f32_v2i32(<vscale x 2 x float> %f) {
+; CHECK32-LABEL: test_signed_v2f32_v2i32:
+; CHECK32:       # %bb.0:
+; CHECK32-NEXT:    lui a0, %hi(.LCPI0_0)
+; CHECK32-NEXT:    flw ft0, %lo(.LCPI0_0)(a0)
+; CHECK32-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
+; CHECK32-NEXT:    vmfge.vf v9, v8, ft0
+; CHECK32-NEXT:    vmnot.m v0, v9
+; CHECK32-NEXT:    lui a0, %hi(.LCPI0_1)
+; CHECK32-NEXT:    flw ft0, %lo(.LCPI0_1)(a0)
+; CHECK32-NEXT:    vfcvt.rtz.x.f.v v9, v8
+; CHECK32-NEXT:    lui a0, 524288
+; CHECK32-NEXT:    vmerge.vxm v9, v9, a0, v0
+; CHECK32-NEXT:    vmfgt.vf v0, v8, ft0
+; CHECK32-NEXT:    addi a0, a0, -1
+; CHECK32-NEXT:    vmfne.vv v8, v8, v8
+; CHECK32-NEXT:    vmerge.vxm v9, v9, a0, v0
+; CHECK32-NEXT:    vmv.v.v v0, v8
+; CHECK32-NEXT:    vmerge.vim v8, v9, 0, v0
+; CHECK32-NEXT:    ret
+;
+; CHECK64-LABEL: test_signed_v2f32_v2i32:
+; CHECK64:       # %bb.0:
+; CHECK64-NEXT:    lui a0, %hi(.LCPI0_0)
+; CHECK64-NEXT:    flw ft0, %lo(.LCPI0_0)(a0)
+; CHECK64-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
+; CHECK64-NEXT:    vmfge.vf v9, v8, ft0
+; CHECK64-NEXT:    vmnot.m v0, v9
+; CHECK64-NEXT:    vfcvt.rtz.x.f.v v9, v8
+; CHECK64-NEXT:    lui a0, %hi(.LCPI0_1)
+; CHECK64-NEXT:    flw ft0, %lo(.LCPI0_1)(a0)
+; CHECK64-NEXT:    li a0, 1
+; CHECK64-NEXT:    slli a0, a0, 31
+; CHECK64-NEXT:    vmerge.vxm v9, v9, a0, v0
+; CHECK64-NEXT:    vmfgt.vf v0, v8, ft0
+; CHECK64-NEXT:    lui a0, 524288
+; CHECK64-NEXT:    addiw a0, a0, -1
+; CHECK64-NEXT:    vmfne.vv v8, v8, v8
+; CHECK64-NEXT:    vmerge.vxm v9, v9, a0, v0
+; CHECK64-NEXT:    vmv.v.v v0, v8
+; CHECK64-NEXT:    vmerge.vim v8, v9, 0, v0
+; CHECK64-NEXT:    ret
+    %x = call <vscale x 2 x i32> @llvm.fptosi.sat.nxv2f32.nxv2i32(<vscale x 2 x float> %f)
+    ret <vscale x 2 x i32> %x
+}
+
+define <vscale x 4 x i32> @test_signed_v4f32_v4i32(<vscale x 4 x float> %f) {
+; CHECK32-LABEL: test_signed_v4f32_v4i32:
+; CHECK32:       # %bb.0:
+; CHECK32-NEXT:    lui a0, %hi(.LCPI1_0)
+; CHECK32-NEXT:    flw ft0, %lo(.LCPI1_0)(a0)
+; CHECK32-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
+; CHECK32-NEXT:    vmfge.vf v10, v8, ft0
+; CHECK32-NEXT:    vmnot.m v0, v10
+; CHECK32-NEXT:    lui a0, %hi(.LCPI1_1)
+; CHECK32-NEXT:    flw ft0, %lo(.LCPI1_1)(a0)
+; CHECK32-NEXT:    vfcvt.rtz.x.f.v v10, v8
+; CHECK32-NEXT:    lui a0, 524288
+; CHECK32-NEXT:    vmerge.vxm v12, v10, a0, v0
+; CHECK32-NEXT:    vmfgt.vf v0, v8, ft0
+; CHECK32-NEXT:    addi a0, a0, -1
+; CHECK32-NEXT:    vmfne.vv v10, v8, v8
+; CHECK32-NEXT:    vmerge.vxm v8, v12, a0, v0
+; CHECK32-NEXT:    vmv1r.v v0, v10
+; CHECK32-NEXT:    vmerge.vim v8, v8, 0, v0
+; CHECK32-NEXT:    ret
+;
+; CHECK64-LABEL: test_signed_v4f32_v4i32:
+; CHECK64:       # %bb.0:
+; CHECK64-NEXT:    lui a0, %hi(.LCPI1_0)
+; CHECK64-NEXT:    flw ft0, %lo(.LCPI1_0)(a0)
+; CHECK64-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
+; CHECK64-NEXT:    vmfge.vf v10, v8, ft0
+; CHECK64-NEXT:    vmnot.m v0, v10
+; CHECK64-NEXT:    vfcvt.rtz.x.f.v v10, v8
+; CHECK64-NEXT:    lui a0, %hi(.LCPI1_1)
+; CHECK64-NEXT:    flw ft0, %lo(.LCPI1_1)(a0)
+; CHECK64-NEXT:    li a0, 1
+; CHECK64-NEXT:    slli a0, a0, 31
+; CHECK64-NEXT:    vmerge.vxm v12, v10, a0, v0
+; CHECK64-NEXT:    vmfgt.vf v0, v8, ft0
+; CHECK64-NEXT:    lui a0, 524288
+; CHECK64-NEXT:    addiw a0, a0, -1
+; CHECK64-NEXT:    vmfne.vv v10, v8, v8
+; CHECK64-NEXT:    vmerge.vxm v8, v12, a0, v0
+; CHECK64-NEXT:    vmv1r.v v0, v10
+; CHECK64-NEXT:    vmerge.vim v8, v8, 0, v0
+; CHECK64-NEXT:    ret
+    %x = call <vscale x 4 x i32> @llvm.fptosi.sat.nxv4f32.nxv4i32(<vscale x 4 x float> %f)
+    ret <vscale x 4 x i32> %x
+}
+
+define <vscale x 8 x i32> @test_signed_v8f32_v8i32(<vscale x 8 x float> %f) {
+; CHECK32-LABEL: test_signed_v8f32_v8i32:
+; CHECK32:       # %bb.0:
+; CHECK32-NEXT:    lui a0, %hi(.LCPI2_0)
+; CHECK32-NEXT:    flw ft0, %lo(.LCPI2_0)(a0)
+; CHECK32-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
+; CHECK32-NEXT:    vmfge.vf v12, v8, ft0
+; CHECK32-NEXT:    vmnot.m v0, v12
+; CHECK32-NEXT:    lui a0, %hi(.LCPI2_1)
+; CHECK32-NEXT:    flw ft0, %lo(.LCPI2_1)(a0)
+; CHECK32-NEXT:    vfcvt.rtz.x.f.v v12, v8
+; CHECK32-NEXT:    lui a0, 524288
+; CHECK32-NEXT:    vmerge.vxm v16, v12, a0, v0
+; CHECK32-NEXT:    vmfgt.vf v0, v8, ft0
+; CHECK32-NEXT:    addi a0, a0, -1
+; CHECK32-NEXT:    vmfne.vv v12, v8, v8
+; CHECK32-NEXT:    vmerge.vxm v8, v16, a0, v0
+; CHECK32-NEXT:    vmv1r.v v0, v12
+; CHECK32-NEXT:    vmerge.vim v8, v8, 0, v0
+; CHECK32-NEXT:    ret
+;
+; CHECK64-LABEL: test_signed_v8f32_v8i32:
+; CHECK64:       # %bb.0:
+; CHECK64-NEXT:    lui a0, %hi(.LCPI2_0)
+; CHECK64-NEXT:    flw ft0, %lo(.LCPI2_0)(a0)
+; CHECK64-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
+; CHECK64-NEXT:    vmfge.vf v12, v8, ft0
+; CHECK64-NEXT:    vmnot.m v0, v12
+; CHECK64-NEXT:    vfcvt.rtz.x.f.v v12, v8
+; CHECK64-NEXT:    lui a0, %hi(.LCPI2_1)
+; CHECK64-NEXT:    flw ft0, %lo(.LCPI2_1)(a0)
+; CHECK64-NEXT:    li a0, 1
+; CHECK64-NEXT:    slli a0, a0, 31
+; CHECK64-NEXT:    vmerge.vxm v16, v12, a0, v0
+; CHECK64-NEXT:    vmfgt.vf v0, v8, ft0
+; CHECK64-NEXT:    lui a0, 524288
+; CHECK64-NEXT:    addiw a0, a0, -1
+; CHECK64-NEXT:    vmfne.vv v12, v8, v8
+; CHECK64-NEXT:    vmerge.vxm v8, v16, a0, v0
+; CHECK64-NEXT:    vmv1r.v v0, v12
+; CHECK64-NEXT:    vmerge.vim v8, v8, 0, v0
+; CHECK64-NEXT:    ret
+    %x = call <vscale x 8 x i32> @llvm.fptosi.sat.nxv8f32.nxv8i32(<vscale x 8 x float> %f)
+    ret <vscale x 8 x i32> %x
+}
+
+define <vscale x 4 x i16> @test_signed_v4f32_v4i16(<vscale x 4 x float> %f) {
+; CHECK-LABEL: test_signed_v4f32_v4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, %hi(.LCPI3_0)
+; CHECK-NEXT:    flw ft0, %lo(.LCPI3_0)(a0)
+; CHECK-NEXT:    lui a0, %hi(.LCPI3_1)
+; CHECK-NEXT:    flw ft1, %lo(.LCPI3_1)(a0)
+; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
+; CHECK-NEXT:    vfmax.vf v10, v8, ft0
+; CHECK-NEXT:    vfmin.vf v10, v10, ft1
+; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v12, v10
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; CHECK-NEXT:    vmfne.vv v0, v8, v8
+; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
+; CHECK-NEXT:    vmerge.vim v8, v12, 0, v0
+; CHECK-NEXT:    ret
+    %x = call <vscale x 4 x i16> @llvm.fptosi.sat.nxv4f32.nxv4i16(<vscale x 4 x float> %f)
+    ret <vscale x 4 x i16> %x
+}
+
+define <vscale x 8 x i16> @test_signed_v8f32_v8i16(<vscale x 8 x float> %f) {
+; CHECK-LABEL: test_signed_v8f32_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, %hi(.LCPI4_0)
+; CHECK-NEXT:    flw ft0, %lo(.LCPI4_0)(a0)
+; CHECK-NEXT:    lui a0, %hi(.LCPI4_1)
+; CHECK-NEXT:    flw ft1, %lo(.LCPI4_1)(a0)
+; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
+; CHECK-NEXT:    vfmax.vf v12, v8, ft0
+; CHECK-NEXT:    vfmin.vf v12, v12, ft1
+; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v16, v12
+; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-NEXT:    vmfne.vv v0, v8, v8
+; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
+; CHECK-NEXT:    vmerge.vim v8, v16, 0, v0
+; CHECK-NEXT:    ret
+    %x = call <vscale x 8 x i16> @llvm.fptosi.sat.nxv8f32.nxv8i16(<vscale x 8 x float> %f)
+    ret <vscale x 8 x i16> %x
+}
+
+define <vscale x 2 x i64> @test_signed_v2f32_v2i64(<vscale x 2 x float> %f) {
+; CHECK32-LABEL: test_signed_v2f32_v2i64:
+; CHECK32:       # %bb.0:
+; CHECK32-NEXT:    addi sp, sp, -16
+; CHECK32-NEXT:    .cfi_def_cfa_offset 16
+; CHECK32-NEXT:    lui a0, 524288
+; CHECK32-NEXT:    sw a0, 12(sp)
+; CHECK32-NEXT:    sw zero, 8(sp)
+; CHECK32-NEXT:    li a1, -1
+; CHECK32-NEXT:    sw a1, 8(sp)
+; CHECK32-NEXT:    lui a1, %hi(.LCPI5_0)
+; CHECK32-NEXT:    flw ft0, %lo(.LCPI5_0)(a1)
+; CHECK32-NEXT:    addi a0, a0, -1
+; CHECK32-NEXT:    sw a0, 12(sp)
+; CHECK32-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
+; CHECK32-NEXT:    vmfge.vf v9, v8, ft0
+; CHECK32-NEXT:    addi a0, sp, 8
+; CHECK32-NEXT:    vlse64.v v10, (a0), zero
+; CHECK32-NEXT:    vmnot.m v0, v9
+; CHECK32-NEXT:    vfwcvt.rtz.x.f.v v12, v8
+; CHECK32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; CHECK32-NEXT:    vmerge.vvm v10, v12, v10, v0
+; CHECK32-NEXT:    lui a1, %hi(.LCPI5_1)
+; CHECK32-NEXT:    flw ft0, %lo(.LCPI5_1)(a1)
+; CHECK32-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
+; CHECK32-NEXT:    vlse64.v v12, (a0), zero
+; CHECK32-NEXT:    vmfgt.vf v0, v8, ft0
+; CHECK32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; CHECK32-NEXT:    vmerge.vvm v10, v10, v12, v0
+; CHECK32-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
+; CHECK32-NEXT:    vmfne.vv v0, v8, v8
+; CHECK32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; CHECK32-NEXT:    vmerge.vim v8, v10, 0, v0
+; CHECK32-NEXT:    addi sp, sp, 16
+; CHECK32-NEXT:    ret
+;
+; CHECK64-LABEL: test_signed_v2f32_v2i64:
+; CHECK64:       # %bb.0:
+; CHECK64-NEXT:    lui a0, %hi(.LCPI5_0)
+; CHECK64-NEXT:    flw ft0, %lo(.LCPI5_0)(a0)
+; CHECK64-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
+; CHECK64-NEXT:    vmfge.vf v9, v8, ft0
+; CHECK64-NEXT:    vmnot.m v0, v9
+; CHECK64-NEXT:    vfwcvt.rtz.x.f.v v10, v8
+; CHECK64-NEXT:    li a0, -1
+; CHECK64-NEXT:    lui a1, %hi(.LCPI5_1)
+; CHECK64-NEXT:    flw ft0, %lo(.LCPI5_1)(a1)
+; CHECK64-NEXT:    slli a1, a0, 63
+; CHECK64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; CHECK64-NEXT:    vmerge.vxm v10, v10, a1, v0
+; CHECK64-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
+; CHECK64-NEXT:    vmfgt.vf v0, v8, ft0
+; CHECK64-NEXT:    srli a0, a0, 1
+; CHECK64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; CHECK64-NEXT:    vmerge.vxm v10, v10, a0, v0
+; CHECK64-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
+; CHECK64-NEXT:    vmfne.vv v0, v8, v8
+; CHECK64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; CHECK64-NEXT:    vmerge.vim v8, v10, 0, v0
+; CHECK64-NEXT:    ret
+    %x = call <vscale x 2 x i64> @llvm.fptosi.sat.nxv2f32.nxv2i64(<vscale x 2 x float> %f)
+    ret <vscale x 2 x i64> %x
+}
+
+define <vscale x 4 x i64> @test_signed_v4f32_v4i64(<vscale x 4 x float> %f) {
+; CHECK32-LABEL: test_signed_v4f32_v4i64:
+; CHECK32:       # %bb.0:
+; CHECK32-NEXT:    addi sp, sp, -16
+; CHECK32-NEXT:    .cfi_def_cfa_offset 16
+; CHECK32-NEXT:    lui a0, 524288
+; CHECK32-NEXT:    sw a0, 12(sp)
+; CHECK32-NEXT:    sw zero, 8(sp)
+; CHECK32-NEXT:    li a1, -1
+; CHECK32-NEXT:    sw a1, 8(sp)
+; CHECK32-NEXT:    lui a1, %hi(.LCPI6_0)
+; CHECK32-NEXT:    flw ft0, %lo(.LCPI6_0)(a1)
+; CHECK32-NEXT:    addi a0, a0, -1
+; CHECK32-NEXT:    sw a0, 12(sp)
+; CHECK32-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
+; CHECK32-NEXT:    vmfge.vf v10, v8, ft0
+; CHECK32-NEXT:    addi a0, sp, 8
+; CHECK32-NEXT:    vlse64.v v12, (a0), zero
+; CHECK32-NEXT:    vmnot.m v0, v10
+; CHECK32-NEXT:    vfwcvt.rtz.x.f.v v16, v8
+; CHECK32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; CHECK32-NEXT:    vmerge.vvm v12, v16, v12, v0
+; CHECK32-NEXT:    lui a1, %hi(.LCPI6_1)
+; CHECK32-NEXT:    flw ft0, %lo(.LCPI6_1)(a1)
+; CHECK32-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; CHECK32-NEXT:    vlse64.v v16, (a0), zero
+; CHECK32-NEXT:    vmfgt.vf v0, v8, ft0
+; CHECK32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; CHECK32-NEXT:    vmerge.vvm v12, v12, v16, v0
+; CHECK32-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; CHECK32-NEXT:    vmfne.vv v0, v8, v8
+; CHECK32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; CHECK32-NEXT:    vmerge.vim v8, v12, 0, v0
+; CHECK32-NEXT:    addi sp, sp, 16
+; CHECK32-NEXT:    ret
+;
+; CHECK64-LABEL: test_signed_v4f32_v4i64:
+; CHECK64:       # %bb.0:
+; CHECK64-NEXT:    lui a0, %hi(.LCPI6_0)
+; CHECK64-NEXT:    flw ft0, %lo(.LCPI6_0)(a0)
+; CHECK64-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
+; CHECK64-NEXT:    vmfge.vf v10, v8, ft0
+; CHECK64-NEXT:    vmnot.m v0, v10
+; CHECK64-NEXT:    vfwcvt.rtz.x.f.v v12, v8
+; CHECK64-NEXT:    li a0, -1
+; CHECK64-NEXT:    lui a1, %hi(.LCPI6_1)
+; CHECK64-NEXT:    flw ft0, %lo(.LCPI6_1)(a1)
+; CHECK64-NEXT:    slli a1, a0, 63
+; CHECK64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; CHECK64-NEXT:    vmerge.vxm v12, v12, a1, v0
+; CHECK64-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; CHECK64-NEXT:    vmfgt.vf v0, v8, ft0
+; CHECK64-NEXT:    srli a0, a0, 1
+; CHECK64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; CHECK64-NEXT:    vmerge.vxm v12, v12, a0, v0
+; CHECK64-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; CHECK64-NEXT:    vmfne.vv v0, v8, v8
+; CHECK64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; CHECK64-NEXT:    vmerge.vim v8, v12, 0, v0
+; CHECK64-NEXT:    ret
+    %x = call <vscale x 4 x i64> @llvm.fptosi.sat.nxv4f32.nxv4i64(<vscale x 4 x float> %f)
+    ret <vscale x 4 x i64> %x
+}
+
+; Double
+
+declare <vscale x 2 x i32> @llvm.fptosi.sat.nxv2f64.nxv2i32(<vscale x 2 x double>)
+declare <vscale x 4 x i32> @llvm.fptosi.sat.nxv4f64.nxv4i32(<vscale x 4 x double>)
+declare <vscale x 8 x i32> @llvm.fptosi.sat.nxv8f64.nxv8i32(<vscale x 8 x double>)
+declare <vscale x 4 x i16> @llvm.fptosi.sat.nxv4f64.nxv4i16(<vscale x 4 x double>)
+declare <vscale x 8 x i16> @llvm.fptosi.sat.nxv8f64.nxv8i16(<vscale x 8 x double>)
+declare <vscale x 2 x i64> @llvm.fptosi.sat.nxv2f64.nxv2i64(<vscale x 2 x double>)
+declare <vscale x 4 x i64> @llvm.fptosi.sat.nxv4f64.nxv4i64(<vscale x 4 x double>)
+
+define <vscale x 2 x i32> @test_signed_v2f64_v2i32(<vscale x 2 x double> %f) {
+; CHECK-LABEL: test_signed_v2f64_v2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, %hi(.LCPI7_0)
+; CHECK-NEXT:    fld ft0, %lo(.LCPI7_0)(a0)
+; CHECK-NEXT:    lui a0, %hi(.LCPI7_1)
+; CHECK-NEXT:    fld ft1, %lo(.LCPI7_1)(a0)
+; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
+; CHECK-NEXT:    vfmax.vf v10, v8, ft0
+; CHECK-NEXT:    vfmin.vf v10, v10, ft1
+; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v12, v10
+; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; CHECK-NEXT:    vmfne.vv v0, v8, v8
+; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
+; CHECK-NEXT:    vmerge.vim v8, v12, 0, v0
+; CHECK-NEXT:    ret
+    %x = call <vscale x 2 x i32> @llvm.fptosi.sat.nxv2f64.nxv2i32(<vscale x 2 x double> %f)
+    ret <vscale x 2 x i32> %x
+}
+
+define <vscale x 4 x i32> @test_signed_v4f64_v4i32(<vscale x 4 x double> %f) {
+; CHECK-LABEL: test_signed_v4f64_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, %hi(.LCPI8_0)
+; CHECK-NEXT:    fld ft0, %lo(.LCPI8_0)(a0)
+; CHECK-NEXT:    lui a0, %hi(.LCPI8_1)
+; CHECK-NEXT:    fld ft1, %lo(.LCPI8_1)(a0)
+; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
+; CHECK-NEXT:    vfmax.vf v12, v8, ft0
+; CHECK-NEXT:    vfmin.vf v12, v12, ft1
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v16, v12
+; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; CHECK-NEXT:    vmfne.vv v0, v8, v8
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; CHECK-NEXT:    vmerge.vim v8, v16, 0, v0
+; CHECK-NEXT:    ret
+    %x = call <vscale x 4 x i32> @llvm.fptosi.sat.nxv4f64.nxv4i32(<vscale x 4 x double> %f)
+    ret <vscale x 4 x i32> %x
+}
+
+define <vscale x 8 x i32> @test_signed_v8f64_v8i32(<vscale x 8 x double> %f) {
+; CHECK-LABEL: test_signed_v8f64_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, %hi(.LCPI9_0)
+; CHECK-NEXT:    fld ft0, %lo(.LCPI9_0)(a0)
+; CHECK-NEXT:    lui a0, %hi(.LCPI9_1)
+; CHECK-NEXT:    fld ft1, %lo(.LCPI9_1)(a0)
+; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
+; CHECK-NEXT:    vfmax.vf v16, v8, ft0
+; CHECK-NEXT:    vfmin.vf v16, v16, ft1
+; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v24, v16
+; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; CHECK-NEXT:    vmfne.vv v0, v8, v8
+; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-NEXT:    vmerge.vim v8, v24, 0, v0
+; CHECK-NEXT:    ret
+    %x = call <vscale x 8 x i32> @llvm.fptosi.sat.nxv8f64.nxv8i32(<vscale x 8 x double> %f)
+    ret <vscale x 8 x i32> %x
+}
+
+define <vscale x 4 x i16> @test_signed_v4f64_v4i16(<vscale x 4 x double> %f) {
+; CHECK-LABEL: test_signed_v4f64_v4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, %hi(.LCPI10_0)
+; CHECK-NEXT:    fld ft0, %lo(.LCPI10_0)(a0)
+; CHECK-NEXT:    lui a0, %hi(.LCPI10_1)
+; CHECK-NEXT:    fld ft1, %lo(.LCPI10_1)(a0)
+; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
+; CHECK-NEXT:    vfmax.vf v12, v8, ft0
+; CHECK-NEXT:    vfmin.vf v12, v12, ft1
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v16, v12
+; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
+; CHECK-NEXT:    vncvt.x.x.w v12, v16
+; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; CHECK-NEXT:    vmfne.vv v0, v8, v8
+; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
+; CHECK-NEXT:    vmerge.vim v8, v12, 0, v0
+; CHECK-NEXT:    ret
+    %x = call <vscale x 4 x i16> @llvm.fptosi.sat.nxv4f64.nxv4i16(<vscale x 4 x double> %f)
+    ret <vscale x 4 x i16> %x
+}
+
+define <vscale x 8 x i16> @test_signed_v8f64_v8i16(<vscale x 8 x double> %f) {
+; CHECK-LABEL: test_signed_v8f64_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, %hi(.LCPI11_0)
+; CHECK-NEXT:    fld ft0, %lo(.LCPI11_0)(a0)
+; CHECK-NEXT:    lui a0, %hi(.LCPI11_1)
+; CHECK-NEXT:    fld ft1, %lo(.LCPI11_1)(a0)
+; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
+; CHECK-NEXT:    vfmax.vf v16, v8, ft0
+; CHECK-NEXT:    vfmin.vf v16, v16, ft1
+; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v24, v16
+; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
+; CHECK-NEXT:    vncvt.x.x.w v16, v24
+; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; CHECK-NEXT:    vmfne.vv v0, v8, v8
+; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
+; CHECK-NEXT:    vmerge.vim v8, v16, 0, v0
+; CHECK-NEXT:    ret
+    %x = call <vscale x 8 x i16> @llvm.fptosi.sat.nxv8f64.nxv8i16(<vscale x 8 x double> %f)
+    ret <vscale x 8 x i16> %x
+}
+
+define <vscale x 2 x i64> @test_signed_v2f64_v2i64(<vscale x 2 x double> %f) {
+; CHECK32-LABEL: test_signed_v2f64_v2i64:
+; CHECK32:       # %bb.0:
+; CHECK32-NEXT:    addi sp, sp, -16
+; CHECK32-NEXT:    .cfi_def_cfa_offset 16
+; CHECK32-NEXT:    lui a0, 524288
+; CHECK32-NEXT:    sw a0, 12(sp)
+; CHECK32-NEXT:    sw zero, 8(sp)
+; CHECK32-NEXT:    li a1, -1
+; CHECK32-NEXT:    sw a1, 8(sp)
+; CHECK32-NEXT:    lui a1, %hi(.LCPI12_0)
+; CHECK32-NEXT:    fld ft0, %lo(.LCPI12_0)(a1)
+; CHECK32-NEXT:    addi a0, a0, -1
+; CHECK32-NEXT:    sw a0, 12(sp)
+; CHECK32-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
+; CHECK32-NEXT:    vmfge.vf v10, v8, ft0
+; CHECK32-NEXT:    vmnot.m v0, v10
+; CHECK32-NEXT:    addi a0, sp, 8
+; CHECK32-NEXT:    vlse64.v v10, (a0), zero
+; CHECK32-NEXT:    lui a1, %hi(.LCPI12_1)
+; CHECK32-NEXT:    fld ft0, %lo(.LCPI12_1)(a1)
+; CHECK32-NEXT:    vfcvt.rtz.x.f.v v12, v8
+; CHECK32-NEXT:    vlse64.v v14, (a0), zero
+; CHECK32-NEXT:    vmerge.vvm v12, v12, v10, v0
+; CHECK32-NEXT:    vmfgt.vf v0, v8, ft0
+; CHECK32-NEXT:    vmfne.vv v10, v8, v8
+; CHECK32-NEXT:    vmerge.vvm v8, v12, v14, v0
+; CHECK32-NEXT:    vmv1r.v v0, v10
+; CHECK32-NEXT:    vmerge.vim v8, v8, 0, v0
+; CHECK32-NEXT:    addi sp, sp, 16
+; CHECK32-NEXT:    ret
+;
+; CHECK64-LABEL: test_signed_v2f64_v2i64:
+; CHECK64:       # %bb.0:
+; CHECK64-NEXT:    lui a0, %hi(.LCPI12_0)
+; CHECK64-NEXT:    fld ft0, %lo(.LCPI12_0)(a0)
+; CHECK64-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
+; CHECK64-NEXT:    vmfge.vf v10, v8, ft0
+; CHECK64-NEXT:    vmnot.m v0, v10
+; CHECK64-NEXT:    vfcvt.rtz.x.f.v v10, v8
+; CHECK64-NEXT:    lui a0, %hi(.LCPI12_1)
+; CHECK64-NEXT:    fld ft0, %lo(.LCPI12_1)(a0)
+; CHECK64-NEXT:    li a0, -1
+; CHECK64-NEXT:    slli a1, a0, 63
+; CHECK64-NEXT:    vmerge.vxm v12, v10, a1, v0
+; CHECK64-NEXT:    vmfgt.vf v0, v8, ft0
+; CHECK64-NEXT:    srli a0, a0, 1
+; CHECK64-NEXT:    vmfne.vv v10, v8, v8
+; CHECK64-NEXT:    vmerge.vxm v8, v12, a0, v0
+; CHECK64-NEXT:    vmv1r.v v0, v10
+; CHECK64-NEXT:    vmerge.vim v8, v8, 0, v0
+; CHECK64-NEXT:    ret
+    %x = call <vscale x 2 x i64> @llvm.fptosi.sat.nxv2f64.nxv2i64(<vscale x 2 x double> %f)
+    ret <vscale x 2 x i64> %x
+}
+
+define <vscale x 4 x i64> @test_signed_v4f64_v4i64(<vscale x 4 x double> %f) {
+; CHECK32-LABEL: test_signed_v4f64_v4i64:
+; CHECK32:       # %bb.0:
+; CHECK32-NEXT:    addi sp, sp, -16
+; CHECK32-NEXT:    .cfi_def_cfa_offset 16
+; CHECK32-NEXT:    lui a0, 524288
+; CHECK32-NEXT:    sw a0, 12(sp)
+; CHECK32-NEXT:    sw zero, 8(sp)
+; CHECK32-NEXT:    li a1, -1
+; CHECK32-NEXT:    sw a1, 8(sp)
+; CHECK32-NEXT:    lui a1, %hi(.LCPI13_0)
+; CHECK32-NEXT:    fld ft0, %lo(.LCPI13_0)(a1)
+; CHECK32-NEXT:    addi a0, a0, -1
+; CHECK32-NEXT:    sw a0, 12(sp)
+; CHECK32-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
+; CHECK32-NEXT:    vmfge.vf v12, v8, ft0
+; CHECK32-NEXT:    vmnot.m v0, v12
+; CHECK32-NEXT:    addi a0, sp, 8
+; CHECK32-NEXT:    vlse64.v v12, (a0), zero
+; CHECK32-NEXT:    lui a1, %hi(.LCPI13_1)
+; CHECK32-NEXT:    fld ft0, %lo(.LCPI13_1)(a1)
+; CHECK32-NEXT:    vfcvt.rtz.x.f.v v16, v8
+; CHECK32-NEXT:    vlse64.v v20, (a0), zero
+; CHECK32-NEXT:    vmerge.vvm v16, v16, v12, v0
+; CHECK32-NEXT:    vmfgt.vf v0, v8, ft0
+; CHECK32-NEXT:    vmfne.vv v12, v8, v8
+; CHECK32-NEXT:    vmerge.vvm v8, v16, v20, v0
+; CHECK32-NEXT:    vmv1r.v v0, v12
+; CHECK32-NEXT:    vmerge.vim v8, v8, 0, v0
+; CHECK32-NEXT:    addi sp, sp, 16
+; CHECK32-NEXT:    ret
+;
+; CHECK64-LABEL: test_signed_v4f64_v4i64:
+; CHECK64:       # %bb.0:
+; CHECK64-NEXT:    lui a0, %hi(.LCPI13_0)
+; CHECK64-NEXT:    fld ft0, %lo(.LCPI13_0)(a0)
+; CHECK64-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
+; CHECK64-NEXT:    vmfge.vf v12, v8, ft0
+; CHECK64-NEXT:    vmnot.m v0, v12
+; CHECK64-NEXT:    vfcvt.rtz.x.f.v v12, v8
+; CHECK64-NEXT:    lui a0, %hi(.LCPI13_1)
+; CHECK64-NEXT:    fld ft0, %lo(.LCPI13_1)(a0)
+; CHECK64-NEXT:    li a0, -1
+; CHECK64-NEXT:    slli a1, a0, 63
+; CHECK64-NEXT:    vmerge.vxm v16, v12, a1, v0
+; CHECK64-NEXT:    vmfgt.vf v0, v8, ft0
+; CHECK64-NEXT:    srli a0, a0, 1
+; CHECK64-NEXT:    vmfne.vv v12, v8, v8
+; CHECK64-NEXT:    vmerge.vxm v8, v16, a0, v0
+; CHECK64-NEXT:    vmv1r.v v0, v12
+; CHECK64-NEXT:    vmerge.vim v8, v8, 0, v0
+; CHECK64-NEXT:    ret
+    %x = call <vscale x 4 x i64> @llvm.fptosi.sat.nxv4f64.nxv4i64(<vscale x 4 x double> %f)
+    ret <vscale x 4 x i64> %x
+}
+
+
+; half
+
+declare <vscale x 2 x i32> @llvm.fptosi.sat.nxv2f16.nxv2i32(<vscale x 2 x half>)
+declare <vscale x 4 x i32> @llvm.fptosi.sat.nxv4f16.nxv4i32(<vscale x 4 x half>)
+declare <vscale x 8 x i32> @llvm.fptosi.sat.nxv8f16.nxv8i32(<vscale x 8 x half>)
+declare <vscale x 4 x i16> @llvm.fptosi.sat.nxv4f16.nxv4i16(<vscale x 4 x half>)
+declare <vscale x 8 x i16> @llvm.fptosi.sat.nxv8f16.nxv8i16(<vscale x 8 x half>)
+declare <vscale x 2 x i64> @llvm.fptosi.sat.nxv2f16.nxv2i64(<vscale x 2 x half>)
+declare <vscale x 4 x i64> @llvm.fptosi.sat.nxv4f16.nxv4i64(<vscale x 4 x half>)
+
+define <vscale x 2 x i32> @test_signed_v2f16_v2i32(<vscale x 2 x half> %f) {
+; CHECK32-LABEL: test_signed_v2f16_v2i32:
+; CHECK32:       # %bb.0:
+; CHECK32-NEXT:    lui a0, %hi(.LCPI14_0)
+; CHECK32-NEXT:    flh ft0, %lo(.LCPI14_0)(a0)
+; CHECK32-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
+; CHECK32-NEXT:    vmfge.vf v9, v8, ft0
+; CHECK32-NEXT:    vmnot.m v0, v9
+; CHECK32-NEXT:    vfwcvt.rtz.x.f.v v9, v8
+; CHECK32-NEXT:    lui a0, 524288
+; CHECK32-NEXT:    lui a1, %hi(.LCPI14_1)
+; CHECK32-NEXT:    flh ft0, %lo(.LCPI14_1)(a1)
+; CHECK32-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
+; CHECK32-NEXT:    vmerge.vxm v9, v9, a0, v0
+; CHECK32-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
+; CHECK32-NEXT:    vmfgt.vf v0, v8, ft0
+; CHECK32-NEXT:    addi a0, a0, -1
+; CHECK32-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
+; CHECK32-NEXT:    vmerge.vxm v9, v9, a0, v0
+; CHECK32-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
+; CHECK32-NEXT:    vmfne.vv v0, v8, v8
+; CHECK32-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
+; CHECK32-NEXT:    vmerge.vim v8, v9, 0, v0
+; CHECK32-NEXT:    ret
+;
+; CHECK64-LABEL: test_signed_v2f16_v2i32:
+; CHECK64:       # %bb.0:
+; CHECK64-NEXT:    lui a0, %hi(.LCPI14_0)
+; CHECK64-NEXT:    flh ft0, %lo(.LCPI14_0)(a0)
+; CHECK64-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
+; CHECK64-NEXT:    vmfge.vf v9, v8, ft0
+; CHECK64-NEXT:    vmnot.m v0, v9
+; CHECK64-NEXT:    vfwcvt.rtz.x.f.v v9, v8
+; CHECK64-NEXT:    lui a0, %hi(.LCPI14_1)
+; CHECK64-NEXT:    flh ft0, %lo(.LCPI14_1)(a0)
+; CHECK64-NEXT:    li a0, 1
+; CHECK64-NEXT:    slli a0, a0, 31
+; CHECK64-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
+; CHECK64-NEXT:    vmerge.vxm v9, v9, a0, v0
+; CHECK64-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
+; CHECK64-NEXT:    vmfgt.vf v0, v8, ft0
+; CHECK64-NEXT:    lui a0, 524288
+; CHECK64-NEXT:    addiw a0, a0, -1
+; CHECK64-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
+; CHECK64-NEXT:    vmerge.vxm v9, v9, a0, v0
+; CHECK64-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
+; CHECK64-NEXT:    vmfne.vv v0, v8, v8
+; CHECK64-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
+; CHECK64-NEXT:    vmerge.vim v8, v9, 0, v0
+; CHECK64-NEXT:    ret
+    %x = call <vscale x 2 x i32> @llvm.fptosi.sat.nxv2f16.nxv2i32(<vscale x 2 x half> %f)
+    ret <vscale x 2 x i32> %x
+}
+
+define <vscale x 4 x i32> @test_signed_v4f16_v4i32(<vscale x 4 x half> %f) {
+; CHECK32-LABEL: test_signed_v4f16_v4i32:
+; CHECK32:       # %bb.0:
+; CHECK32-NEXT:    lui a0, %hi(.LCPI15_0)
+; CHECK32-NEXT:    flh ft0, %lo(.LCPI15_0)(a0)
+; CHECK32-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
+; CHECK32-NEXT:    vmfge.vf v9, v8, ft0
+; CHECK32-NEXT:    vmnot.m v0, v9
+; CHECK32-NEXT:    vfwcvt.rtz.x.f.v v10, v8
+; CHECK32-NEXT:    lui a0, 524288
+; CHECK32-NEXT:    lui a1, %hi(.LCPI15_1)
+; CHECK32-NEXT:    flh ft0, %lo(.LCPI15_1)(a1)
+; CHECK32-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; CHECK32-NEXT:    vmerge.vxm v10, v10, a0, v0
+; CHECK32-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
+; CHECK32-NEXT:    vmfgt.vf v0, v8, ft0
+; CHECK32-NEXT:    addi a0, a0, -1
+; CHECK32-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; CHECK32-NEXT:    vmerge.vxm v10, v10, a0, v0
+; CHECK32-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
+; CHECK32-NEXT:    vmfne.vv v0, v8, v8
+; CHECK32-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; CHECK32-NEXT:    vmerge.vim v8, v10, 0, v0
+; CHECK32-NEXT:    ret
+;
+; CHECK64-LABEL: test_signed_v4f16_v4i32:
+; CHECK64:       # %bb.0:
+; CHECK64-NEXT:    lui a0, %hi(.LCPI15_0)
+; CHECK64-NEXT:    flh ft0, %lo(.LCPI15_0)(a0)
+; CHECK64-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
+; CHECK64-NEXT:    vmfge.vf v9, v8, ft0
+; CHECK64-NEXT:    vmnot.m v0, v9
+; CHECK64-NEXT:    vfwcvt.rtz.x.f.v v10, v8
+; CHECK64-NEXT:    lui a0, %hi(.LCPI15_1)
+; CHECK64-NEXT:    flh ft0, %lo(.LCPI15_1)(a0)
+; CHECK64-NEXT:    li a0, 1
+; CHECK64-NEXT:    slli a0, a0, 31
+; CHECK64-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; CHECK64-NEXT:    vmerge.vxm v10, v10, a0, v0
+; CHECK64-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
+; CHECK64-NEXT:    vmfgt.vf v0, v8, ft0
+; CHECK64-NEXT:    lui a0, 524288
+; CHECK64-NEXT:    addiw a0, a0, -1
+; CHECK64-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; CHECK64-NEXT:    vmerge.vxm v10, v10, a0, v0
+; CHECK64-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
+; CHECK64-NEXT:    vmfne.vv v0, v8, v8
+; CHECK64-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; CHECK64-NEXT:    vmerge.vim v8, v10, 0, v0
+; CHECK64-NEXT:    ret
+    %x = call <vscale x 4 x i32> @llvm.fptosi.sat.nxv4f16.nxv4i32(<vscale x 4 x half> %f)
+    ret <vscale x 4 x i32> %x
+}
+
+define <vscale x 8 x i32> @test_signed_v8f16_v8i32(<vscale x 8 x half> %f) {
+; CHECK32-LABEL: test_signed_v8f16_v8i32:
+; CHECK32:       # %bb.0:
+; CHECK32-NEXT:    lui a0, %hi(.LCPI16_0)
+; CHECK32-NEXT:    flh ft0, %lo(.LCPI16_0)(a0)
+; CHECK32-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
+; CHECK32-NEXT:    vmfge.vf v10, v8, ft0
+; CHECK32-NEXT:    vmnot.m v0, v10
+; CHECK32-NEXT:    vfwcvt.rtz.x.f.v v12, v8
+; CHECK32-NEXT:    lui a0, 524288
+; CHECK32-NEXT:    lui a1, %hi(.LCPI16_1)
+; CHECK32-NEXT:    flh ft0, %lo(.LCPI16_1)(a1)
+; CHECK32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; CHECK32-NEXT:    vmerge.vxm v12, v12, a0, v0
+; CHECK32-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
+; CHECK32-NEXT:    vmfgt.vf v0, v8, ft0
+; CHECK32-NEXT:    addi a0, a0, -1
+; CHECK32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; CHECK32-NEXT:    vmerge.vxm v12, v12, a0, v0
+; CHECK32-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
+; CHECK32-NEXT:    vmfne.vv v0, v8, v8
+; CHECK32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; CHECK32-NEXT:    vmerge.vim v8, v12, 0, v0
+; CHECK32-NEXT:    ret
+;
+; CHECK64-LABEL: test_signed_v8f16_v8i32:
+; CHECK64:       # %bb.0:
+; CHECK64-NEXT:    lui a0, %hi(.LCPI16_0)
+; CHECK64-NEXT:    flh ft0, %lo(.LCPI16_0)(a0)
+; CHECK64-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
+; CHECK64-NEXT:    vmfge.vf v10, v8, ft0
+; CHECK64-NEXT:    vmnot.m v0, v10
+; CHECK64-NEXT:    vfwcvt.rtz.x.f.v v12, v8
+; CHECK64-NEXT:    lui a0, %hi(.LCPI16_1)
+; CHECK64-NEXT:    flh ft0, %lo(.LCPI16_1)(a0)
+; CHECK64-NEXT:    li a0, 1
+; CHECK64-NEXT:    slli a0, a0, 31
+; CHECK64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; CHECK64-NEXT:    vmerge.vxm v12, v12, a0, v0
+; CHECK64-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
+; CHECK64-NEXT:    vmfgt.vf v0, v8, ft0
+; CHECK64-NEXT:    lui a0, 524288
+; CHECK64-NEXT:    addiw a0, a0, -1
+; CHECK64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; CHECK64-NEXT:    vmerge.vxm v12, v12, a0, v0
+; CHECK64-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
+; CHECK64-NEXT:    vmfne.vv v0, v8, v8
+; CHECK64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; CHECK64-NEXT:    vmerge.vim v8, v12, 0, v0
+; CHECK64-NEXT:    ret
+    %x = call <vscale x 8 x i32> @llvm.fptosi.sat.nxv8f16.nxv8i32(<vscale x 8 x half> %f)
+    ret <vscale x 8 x i32> %x
+}
+
+define <vscale x 4 x i16> @test_signed_v4f16_v4i16(<vscale x 4 x half> %f) {
+; CHECK32-LABEL: test_signed_v4f16_v4i16:
+; CHECK32:       # %bb.0:
+; CHECK32-NEXT:    lui a0, %hi(.LCPI17_0)
+; CHECK32-NEXT:    flh ft0, %lo(.LCPI17_0)(a0)
+; CHECK32-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
+; CHECK32-NEXT:    vmfge.vf v9, v8, ft0
+; CHECK32-NEXT:    vmnot.m v0, v9
+; CHECK32-NEXT:    lui a0, %hi(.LCPI17_1)
+; CHECK32-NEXT:    flh ft0, %lo(.LCPI17_1)(a0)
+; CHECK32-NEXT:    vfcvt.rtz.x.f.v v9, v8
+; CHECK32-NEXT:    lui a0, 8
+; CHECK32-NEXT:    vmerge.vxm v9, v9, a0, v0
+; CHECK32-NEXT:    vmfgt.vf v0, v8, ft0
+; CHECK32-NEXT:    addi a0, a0, -1
+; CHECK32-NEXT:    vmfne.vv v8, v8, v8
+; CHECK32-NEXT:    vmerge.vxm v9, v9, a0, v0
+; CHECK32-NEXT:    vmv.v.v v0, v8
+; CHECK32-NEXT:    vmerge.vim v8, v9, 0, v0
+; CHECK32-NEXT:    ret
+;
+; CHECK64-LABEL: test_signed_v4f16_v4i16:
+; CHECK64:       # %bb.0:
+; CHECK64-NEXT:    lui a0, %hi(.LCPI17_0)
+; CHECK64-NEXT:    flh ft0, %lo(.LCPI17_0)(a0)
+; CHECK64-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
+; CHECK64-NEXT:    vmfge.vf v9, v8, ft0
+; CHECK64-NEXT:    vmnot.m v0, v9
+; CHECK64-NEXT:    lui a0, %hi(.LCPI17_1)
+; CHECK64-NEXT:    flh ft0, %lo(.LCPI17_1)(a0)
+; CHECK64-NEXT:    vfcvt.rtz.x.f.v v9, v8
+; CHECK64-NEXT:    lui a0, 8
+; CHECK64-NEXT:    vmerge.vxm v9, v9, a0, v0
+; CHECK64-NEXT:    vmfgt.vf v0, v8, ft0
+; CHECK64-NEXT:    addiw a0, a0, -1
+; CHECK64-NEXT:    vmfne.vv v8, v8, v8
+; CHECK64-NEXT:    vmerge.vxm v9, v9, a0, v0
+; CHECK64-NEXT:    vmv.v.v v0, v8
+; CHECK64-NEXT:    vmerge.vim v8, v9, 0, v0
+; CHECK64-NEXT:    ret
+    %x = call <vscale x 4 x i16> @llvm.fptosi.sat.nxv4f16.nxv4i16(<vscale x 4 x half> %f)
+    ret <vscale x 4 x i16> %x
+}
+
+define <vscale x 8 x i16> @test_signed_v8f16_v8i16(<vscale x 8 x half> %f) {
+; CHECK32-LABEL: test_signed_v8f16_v8i16:
+; CHECK32:       # %bb.0:
+; CHECK32-NEXT:    lui a0, %hi(.LCPI18_0)
+; CHECK32-NEXT:    flh ft0, %lo(.LCPI18_0)(a0)
+; CHECK32-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
+; CHECK32-NEXT:    vmfge.vf v10, v8, ft0
+; CHECK32-NEXT:    vmnot.m v0, v10
+; CHECK32-NEXT:    lui a0, %hi(.LCPI18_1)
+; CHECK32-NEXT:    flh ft0, %lo(.LCPI18_1)(a0)
+; CHECK32-NEXT:    vfcvt.rtz.x.f.v v10, v8
+; CHECK32-NEXT:    lui a0, 8
+; CHECK32-NEXT:    vmerge.vxm v12, v10, a0, v0
+; CHECK32-NEXT:    vmfgt.vf v0, v8, ft0
+; CHECK32-NEXT:    addi a0, a0, -1
+; CHECK32-NEXT:    vmfne.vv v10, v8, v8
+; CHECK32-NEXT:    vmerge.vxm v8, v12, a0, v0
+; CHECK32-NEXT:    vmv1r.v v0, v10
+; CHECK32-NEXT:    vmerge.vim v8, v8, 0, v0
+; CHECK32-NEXT:    ret
+;
+; CHECK64-LABEL: test_signed_v8f16_v8i16:
+; CHECK64:       # %bb.0:
+; CHECK64-NEXT:    lui a0, %hi(.LCPI18_0)
+; CHECK64-NEXT:    flh ft0, %lo(.LCPI18_0)(a0)
+; CHECK64-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
+; CHECK64-NEXT:    vmfge.vf v10, v8, ft0
+; CHECK64-NEXT:    vmnot.m v0, v10
+; CHECK64-NEXT:    lui a0, %hi(.LCPI18_1)
+; CHECK64-NEXT:    flh ft0, %lo(.LCPI18_1)(a0)
+; CHECK64-NEXT:    vfcvt.rtz.x.f.v v10, v8
+; CHECK64-NEXT:    lui a0, 8
+; CHECK64-NEXT:    vmerge.vxm v12, v10, a0, v0
+; CHECK64-NEXT:    vmfgt.vf v0, v8, ft0
+; CHECK64-NEXT:    addiw a0, a0, -1
+; CHECK64-NEXT:    vmfne.vv v10, v8, v8
+; CHECK64-NEXT:    vmerge.vxm v8, v12, a0, v0
+; CHECK64-NEXT:    vmv1r.v v0, v10
+; CHECK64-NEXT:    vmerge.vim v8, v8, 0, v0
+; CHECK64-NEXT:    ret
+    %x = call <vscale x 8 x i16> @llvm.fptosi.sat.nxv8f16.nxv8i16(<vscale x 8 x half> %f)
+    ret <vscale x 8 x i16> %x
+}
+
+define <vscale x 2 x i64> @test_signed_v2f16_v2i64(<vscale x 2 x half> %f) {
+; CHECK32-LABEL: test_signed_v2f16_v2i64:
+; CHECK32:       # %bb.0:
+; CHECK32-NEXT:    addi sp, sp, -16
+; CHECK32-NEXT:    .cfi_def_cfa_offset 16
+; CHECK32-NEXT:    lui a0, 524288
+; CHECK32-NEXT:    sw a0, 12(sp)
+; CHECK32-NEXT:    sw zero, 8(sp)
+; CHECK32-NEXT:    li a1, -1
+; CHECK32-NEXT:    sw a1, 8(sp)
+; CHECK32-NEXT:    lui a1, %hi(.LCPI19_0)
+; CHECK32-NEXT:    flh ft0, %lo(.LCPI19_0)(a1)
+; CHECK32-NEXT:    addi a0, a0, -1
+; CHECK32-NEXT:    sw a0, 12(sp)
+; CHECK32-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
+; CHECK32-NEXT:    vmfge.vf v9, v8, ft0
+; CHECK32-NEXT:    vmnot.m v0, v9
+; CHECK32-NEXT:    addi a0, sp, 8
+; CHECK32-NEXT:    vlse64.v v10, (a0), zero
+; CHECK32-NEXT:    vfwcvt.f.f.v v9, v8
+; CHECK32-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
+; CHECK32-NEXT:    vfwcvt.rtz.x.f.v v12, v9
+; CHECK32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; CHECK32-NEXT:    vmerge.vvm v10, v12, v10, v0
+; CHECK32-NEXT:    lui a1, %hi(.LCPI19_1)
+; CHECK32-NEXT:    flh ft0, %lo(.LCPI19_1)(a1)
+; CHECK32-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
+; CHECK32-NEXT:    vlse64.v v12, (a0), zero
+; CHECK32-NEXT:    vmfgt.vf v0, v8, ft0
+; CHECK32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; CHECK32-NEXT:    vmerge.vvm v10, v10, v12, v0
+; CHECK32-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
+; CHECK32-NEXT:    vmfne.vv v0, v8, v8
+; CHECK32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; CHECK32-NEXT:    vmerge.vim v8, v10, 0, v0
+; CHECK32-NEXT:    addi sp, sp, 16
+; CHECK32-NEXT:    ret
+;
+; CHECK64-LABEL: test_signed_v2f16_v2i64:
+; CHECK64:       # %bb.0:
+; CHECK64-NEXT:    lui a0, %hi(.LCPI19_0)
+; CHECK64-NEXT:    flh ft0, %lo(.LCPI19_0)(a0)
+; CHECK64-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
+; CHECK64-NEXT:    vmfge.vf v9, v8, ft0
+; CHECK64-NEXT:    vmnot.m v0, v9
+; CHECK64-NEXT:    vfwcvt.f.f.v v9, v8
+; CHECK64-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
+; CHECK64-NEXT:    vfwcvt.rtz.x.f.v v10, v9
+; CHECK64-NEXT:    li a0, -1
+; CHECK64-NEXT:    lui a1, %hi(.LCPI19_1)
+; CHECK64-NEXT:    flh ft0, %lo(.LCPI19_1)(a1)
+; CHECK64-NEXT:    slli a1, a0, 63
+; CHECK64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; CHECK64-NEXT:    vmerge.vxm v10, v10, a1, v0
+; CHECK64-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
+; CHECK64-NEXT:    vmfgt.vf v0, v8, ft0
+; CHECK64-NEXT:    srli a0, a0, 1
+; CHECK64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; CHECK64-NEXT:    vmerge.vxm v10, v10, a0, v0
+; CHECK64-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
+; CHECK64-NEXT:    vmfne.vv v0, v8, v8
+; CHECK64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; CHECK64-NEXT:    vmerge.vim v8, v10, 0, v0
+; CHECK64-NEXT:    ret
+    %x = call <vscale x 2 x i64> @llvm.fptosi.sat.nxv2f16.nxv2i64(<vscale x 2 x half> %f)
+    ret <vscale x 2 x i64> %x
+}
+
+define <vscale x 4 x i64> @test_signed_v4f16_v4i64(<vscale x 4 x half> %f) {
+; CHECK32-LABEL: test_signed_v4f16_v4i64:
+; CHECK32:       # %bb.0:
+; CHECK32-NEXT:    addi sp, sp, -16
+; CHECK32-NEXT:    .cfi_def_cfa_offset 16
+; CHECK32-NEXT:    lui a0, 524288
+; CHECK32-NEXT:    sw a0, 12(sp)
+; CHECK32-NEXT:    sw zero, 8(sp)
+; CHECK32-NEXT:    li a1, -1
+; CHECK32-NEXT:    sw a1, 8(sp)
+; CHECK32-NEXT:    lui a1, %hi(.LCPI20_0)
+; CHECK32-NEXT:    flh ft0, %lo(.LCPI20_0)(a1)
+; CHECK32-NEXT:    addi a0, a0, -1
+; CHECK32-NEXT:    sw a0, 12(sp)
+; CHECK32-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
+; CHECK32-NEXT:    vmfge.vf v9, v8, ft0
+; CHECK32-NEXT:    vmnot.m v0, v9
+; CHECK32-NEXT:    addi a0, sp, 8
+; CHECK32-NEXT:    vlse64.v v12, (a0), zero
+; CHECK32-NEXT:    vfwcvt.f.f.v v10, v8
+; CHECK32-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; CHECK32-NEXT:    vfwcvt.rtz.x.f.v v16, v10
+; CHECK32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; CHECK32-NEXT:    vmerge.vvm v12, v16, v12, v0
+; CHECK32-NEXT:    lui a1, %hi(.LCPI20_1)
+; CHECK32-NEXT:    flh ft0, %lo(.LCPI20_1)(a1)
+; CHECK32-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
+; CHECK32-NEXT:    vlse64.v v16, (a0), zero
+; CHECK32-NEXT:    vmfgt.vf v0, v8, ft0
+; CHECK32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; CHECK32-NEXT:    vmerge.vvm v12, v12, v16, v0
+; CHECK32-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
+; CHECK32-NEXT:    vmfne.vv v0, v8, v8
+; CHECK32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; CHECK32-NEXT:    vmerge.vim v8, v12, 0, v0
+; CHECK32-NEXT:    addi sp, sp, 16
+; CHECK32-NEXT:    ret
+;
+; CHECK64-LABEL: test_signed_v4f16_v4i64:
+; CHECK64:       # %bb.0:
+; CHECK64-NEXT:    lui a0, %hi(.LCPI20_0)
+; CHECK64-NEXT:    flh ft0, %lo(.LCPI20_0)(a0)
+; CHECK64-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
+; CHECK64-NEXT:    vmfge.vf v9, v8, ft0
+; CHECK64-NEXT:    vmnot.m v0, v9
+; CHECK64-NEXT:    vfwcvt.f.f.v v10, v8
+; CHECK64-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; CHECK64-NEXT:    vfwcvt.rtz.x.f.v v12, v10
+; CHECK64-NEXT:    li a0, -1
+; CHECK64-NEXT:    lui a1, %hi(.LCPI20_1)
+; CHECK64-NEXT:    flh ft0, %lo(.LCPI20_1)(a1)
+; CHECK64-NEXT:    slli a1, a0, 63
+; CHECK64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; CHECK64-NEXT:    vmerge.vxm v12, v12, a1, v0
+; CHECK64-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
+; CHECK64-NEXT:    vmfgt.vf v0, v8, ft0
+; CHECK64-NEXT:    srli a0, a0, 1
+; CHECK64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; CHECK64-NEXT:    vmerge.vxm v12, v12, a0, v0
+; CHECK64-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
+; CHECK64-NEXT:    vmfne.vv v0, v8, v8
+; CHECK64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; CHECK64-NEXT:    vmerge.vim v8, v12, 0, v0
+; CHECK64-NEXT:    ret
+    %x = call <vscale x 4 x i64> @llvm.fptosi.sat.nxv4f16.nxv4i64(<vscale x 4 x half> %f)
+    ret <vscale x 4 x i64> %x
+}
+

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fptoui-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fptoui-sat.ll
new file mode 100644
index 0000000000000..6278e9429f395
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fptoui-sat.ll
@@ -0,0 +1,524 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=ilp32d \
+; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK32
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=lp64d \
+; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK64
+
+; Float
+
+declare <vscale x 2 x i32> @llvm.fptoui.sat.nxv2f32.nxv2i32(<vscale x 2 x float>)
+declare <vscale x 4 x i32> @llvm.fptoui.sat.nxv4f32.nxv4i32(<vscale x 4 x float>)
+declare <vscale x 8 x i32> @llvm.fptoui.sat.nxv8f32.nxv8i32(<vscale x 8 x float>)
+declare <vscale x 4 x i16> @llvm.fptoui.sat.nxv4f32.nxv4i16(<vscale x 4 x float>)
+declare <vscale x 8 x i16> @llvm.fptoui.sat.nxv8f32.nxv8i16(<vscale x 8 x float>)
+declare <vscale x 2 x i64> @llvm.fptoui.sat.nxv2f32.nxv2i64(<vscale x 2 x float>)
+declare <vscale x 4 x i64> @llvm.fptoui.sat.nxv4f32.nxv4i64(<vscale x 4 x float>)
+
+define <vscale x 2 x i32> @test_signed_v2f32_v2i32(<vscale x 2 x float> %f) {
+; CHECK-LABEL: test_signed_v2f32_v2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, %hi(.LCPI0_0)
+; CHECK-NEXT:    flw ft0, %lo(.LCPI0_0)(a0)
+; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
+; CHECK-NEXT:    vmfgt.vf v9, v8, ft0
+; CHECK-NEXT:    fmv.w.x ft0, zero
+; CHECK-NEXT:    vmfge.vf v10, v8, ft0
+; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vfcvt.rtz.xu.f.v v8, v8
+; CHECK-NEXT:    vmerge.vim v8, v8, 0, v0
+; CHECK-NEXT:    vmv.v.v v0, v9
+; CHECK-NEXT:    vmerge.vim v8, v8, -1, v0
+; CHECK-NEXT:    ret
+    %x = call <vscale x 2 x i32> @llvm.fptoui.sat.nxv2f32.nxv2i32(<vscale x 2 x float> %f)
+    ret <vscale x 2 x i32> %x
+}
+
+define <vscale x 4 x i32> @test_signed_v4f32_v4i32(<vscale x 4 x float> %f) {
+; CHECK-LABEL: test_signed_v4f32_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, %hi(.LCPI1_0)
+; CHECK-NEXT:    flw ft0, %lo(.LCPI1_0)(a0)
+; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
+; CHECK-NEXT:    vmfgt.vf v10, v8, ft0
+; CHECK-NEXT:    fmv.w.x ft0, zero
+; CHECK-NEXT:    vmfge.vf v11, v8, ft0
+; CHECK-NEXT:    vmnot.m v0, v11
+; CHECK-NEXT:    vfcvt.rtz.xu.f.v v8, v8
+; CHECK-NEXT:    vmerge.vim v8, v8, 0, v0
+; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmerge.vim v8, v8, -1, v0
+; CHECK-NEXT:    ret
+    %x = call <vscale x 4 x i32> @llvm.fptoui.sat.nxv4f32.nxv4i32(<vscale x 4 x float> %f)
+    ret <vscale x 4 x i32> %x
+}
+
+define <vscale x 8 x i32> @test_signed_v8f32_v8i32(<vscale x 8 x float> %f) {
+; CHECK-LABEL: test_signed_v8f32_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, %hi(.LCPI2_0)
+; CHECK-NEXT:    flw ft0, %lo(.LCPI2_0)(a0)
+; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
+; CHECK-NEXT:    vmfgt.vf v12, v8, ft0
+; CHECK-NEXT:    fmv.w.x ft0, zero
+; CHECK-NEXT:    vmfge.vf v13, v8, ft0
+; CHECK-NEXT:    vmnot.m v0, v13
+; CHECK-NEXT:    vfcvt.rtz.xu.f.v v8, v8
+; CHECK-NEXT:    vmerge.vim v8, v8, 0, v0
+; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmerge.vim v8, v8, -1, v0
+; CHECK-NEXT:    ret
+    %x = call <vscale x 8 x i32> @llvm.fptoui.sat.nxv8f32.nxv8i32(<vscale x 8 x float> %f)
+    ret <vscale x 8 x i32> %x
+}
+
+define <vscale x 4 x i16> @test_signed_v4f32_v4i16(<vscale x 4 x float> %f) {
+; CHECK-LABEL: test_signed_v4f32_v4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, %hi(.LCPI3_0)
+; CHECK-NEXT:    flw ft0, %lo(.LCPI3_0)(a0)
+; CHECK-NEXT:    fmv.w.x ft1, zero
+; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v8, ft1
+; CHECK-NEXT:    vfmin.vf v10, v8, ft0
+; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v10
+; CHECK-NEXT:    ret
+    %x = call <vscale x 4 x i16> @llvm.fptoui.sat.nxv4f32.nxv4i16(<vscale x 4 x float> %f)
+    ret <vscale x 4 x i16> %x
+}
+
+define <vscale x 8 x i16> @test_signed_v8f32_v8i16(<vscale x 8 x float> %f) {
+; CHECK-LABEL: test_signed_v8f32_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, %hi(.LCPI4_0)
+; CHECK-NEXT:    flw ft0, %lo(.LCPI4_0)(a0)
+; CHECK-NEXT:    fmv.w.x ft1, zero
+; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v8, ft1
+; CHECK-NEXT:    vfmin.vf v12, v8, ft0
+; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v12
+; CHECK-NEXT:    ret
+    %x = call <vscale x 8 x i16> @llvm.fptoui.sat.nxv8f32.nxv8i16(<vscale x 8 x float> %f)
+    ret <vscale x 8 x i16> %x
+}
+
+define <vscale x 2 x i64> @test_signed_v2f32_v2i64(<vscale x 2 x float> %f) {
+; CHECK-LABEL: test_signed_v2f32_v2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, %hi(.LCPI5_0)
+; CHECK-NEXT:    flw ft0, %lo(.LCPI5_0)(a0)
+; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
+; CHECK-NEXT:    vmfgt.vf v9, v8, ft0
+; CHECK-NEXT:    fmv.w.x ft0, zero
+; CHECK-NEXT:    vmfge.vf v10, v8, ft0
+; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v10, v8
+; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; CHECK-NEXT:    vmerge.vim v10, v10, 0, v0
+; CHECK-NEXT:    vmv1r.v v0, v9
+; CHECK-NEXT:    vmerge.vim v8, v10, -1, v0
+; CHECK-NEXT:    ret
+    %x = call <vscale x 2 x i64> @llvm.fptoui.sat.nxv2f32.nxv2i64(<vscale x 2 x float> %f)
+    ret <vscale x 2 x i64> %x
+}
+
+define <vscale x 4 x i64> @test_signed_v4f32_v4i64(<vscale x 4 x float> %f) {
+; CHECK-LABEL: test_signed_v4f32_v4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, %hi(.LCPI6_0)
+; CHECK-NEXT:    flw ft0, %lo(.LCPI6_0)(a0)
+; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
+; CHECK-NEXT:    vmfgt.vf v10, v8, ft0
+; CHECK-NEXT:    fmv.w.x ft0, zero
+; CHECK-NEXT:    vmfge.vf v11, v8, ft0
+; CHECK-NEXT:    vmnot.m v0, v11
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v12, v8
+; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; CHECK-NEXT:    vmerge.vim v12, v12, 0, v0
+; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmerge.vim v8, v12, -1, v0
+; CHECK-NEXT:    ret
+    %x = call <vscale x 4 x i64> @llvm.fptoui.sat.nxv4f32.nxv4i64(<vscale x 4 x float> %f)
+    ret <vscale x 4 x i64> %x
+}
+
+; Double
+
+declare <vscale x 2 x i32> @llvm.fptoui.sat.nxv2f64.nxv2i32(<vscale x 2 x double>)
+declare <vscale x 4 x i32> @llvm.fptoui.sat.nxv4f64.nxv4i32(<vscale x 4 x double>)
+declare <vscale x 8 x i32> @llvm.fptoui.sat.nxv8f64.nxv8i32(<vscale x 8 x double>)
+declare <vscale x 4 x i16> @llvm.fptoui.sat.nxv4f64.nxv4i16(<vscale x 4 x double>)
+declare <vscale x 8 x i16> @llvm.fptoui.sat.nxv8f64.nxv8i16(<vscale x 8 x double>)
+declare <vscale x 2 x i64> @llvm.fptoui.sat.nxv2f64.nxv2i64(<vscale x 2 x double>)
+declare <vscale x 4 x i64> @llvm.fptoui.sat.nxv4f64.nxv4i64(<vscale x 4 x double>)
+
+define <vscale x 2 x i32> @test_signed_v2f64_v2i32(<vscale x 2 x double> %f) {
+; CHECK32-LABEL: test_signed_v2f64_v2i32:
+; CHECK32:       # %bb.0:
+; CHECK32-NEXT:    lui a0, %hi(.LCPI7_0)
+; CHECK32-NEXT:    fld ft0, %lo(.LCPI7_0)(a0)
+; CHECK32-NEXT:    fcvt.d.w ft1, zero
+; CHECK32-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
+; CHECK32-NEXT:    vfmax.vf v8, v8, ft1
+; CHECK32-NEXT:    vfmin.vf v10, v8, ft0
+; CHECK32-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
+; CHECK32-NEXT:    vfncvt.rtz.xu.f.w v8, v10
+; CHECK32-NEXT:    ret
+;
+; CHECK64-LABEL: test_signed_v2f64_v2i32:
+; CHECK64:       # %bb.0:
+; CHECK64-NEXT:    lui a0, %hi(.LCPI7_0)
+; CHECK64-NEXT:    fld ft0, %lo(.LCPI7_0)(a0)
+; CHECK64-NEXT:    fmv.d.x ft1, zero
+; CHECK64-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
+; CHECK64-NEXT:    vfmax.vf v8, v8, ft1
+; CHECK64-NEXT:    vfmin.vf v10, v8, ft0
+; CHECK64-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
+; CHECK64-NEXT:    vfncvt.rtz.xu.f.w v8, v10
+; CHECK64-NEXT:    ret
+    %x = call <vscale x 2 x i32> @llvm.fptoui.sat.nxv2f64.nxv2i32(<vscale x 2 x double> %f)
+    ret <vscale x 2 x i32> %x
+}
+
+define <vscale x 4 x i32> @test_signed_v4f64_v4i32(<vscale x 4 x double> %f) {
+; CHECK32-LABEL: test_signed_v4f64_v4i32:
+; CHECK32:       # %bb.0:
+; CHECK32-NEXT:    lui a0, %hi(.LCPI8_0)
+; CHECK32-NEXT:    fld ft0, %lo(.LCPI8_0)(a0)
+; CHECK32-NEXT:    fcvt.d.w ft1, zero
+; CHECK32-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
+; CHECK32-NEXT:    vfmax.vf v8, v8, ft1
+; CHECK32-NEXT:    vfmin.vf v12, v8, ft0
+; CHECK32-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; CHECK32-NEXT:    vfncvt.rtz.xu.f.w v8, v12
+; CHECK32-NEXT:    ret
+;
+; CHECK64-LABEL: test_signed_v4f64_v4i32:
+; CHECK64:       # %bb.0:
+; CHECK64-NEXT:    lui a0, %hi(.LCPI8_0)
+; CHECK64-NEXT:    fld ft0, %lo(.LCPI8_0)(a0)
+; CHECK64-NEXT:    fmv.d.x ft1, zero
+; CHECK64-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
+; CHECK64-NEXT:    vfmax.vf v8, v8, ft1
+; CHECK64-NEXT:    vfmin.vf v12, v8, ft0
+; CHECK64-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; CHECK64-NEXT:    vfncvt.rtz.xu.f.w v8, v12
+; CHECK64-NEXT:    ret
+    %x = call <vscale x 4 x i32> @llvm.fptoui.sat.nxv4f64.nxv4i32(<vscale x 4 x double> %f)
+    ret <vscale x 4 x i32> %x
+}
+
+define <vscale x 8 x i32> @test_signed_v8f64_v8i32(<vscale x 8 x double> %f) {
+; CHECK32-LABEL: test_signed_v8f64_v8i32:
+; CHECK32:       # %bb.0:
+; CHECK32-NEXT:    lui a0, %hi(.LCPI9_0)
+; CHECK32-NEXT:    fld ft0, %lo(.LCPI9_0)(a0)
+; CHECK32-NEXT:    fcvt.d.w ft1, zero
+; CHECK32-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
+; CHECK32-NEXT:    vfmax.vf v8, v8, ft1
+; CHECK32-NEXT:    vfmin.vf v16, v8, ft0
+; CHECK32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; CHECK32-NEXT:    vfncvt.rtz.xu.f.w v8, v16
+; CHECK32-NEXT:    ret
+;
+; CHECK64-LABEL: test_signed_v8f64_v8i32:
+; CHECK64:       # %bb.0:
+; CHECK64-NEXT:    lui a0, %hi(.LCPI9_0)
+; CHECK64-NEXT:    fld ft0, %lo(.LCPI9_0)(a0)
+; CHECK64-NEXT:    fmv.d.x ft1, zero
+; CHECK64-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
+; CHECK64-NEXT:    vfmax.vf v8, v8, ft1
+; CHECK64-NEXT:    vfmin.vf v16, v8, ft0
+; CHECK64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; CHECK64-NEXT:    vfncvt.rtz.xu.f.w v8, v16
+; CHECK64-NEXT:    ret
+    %x = call <vscale x 8 x i32> @llvm.fptoui.sat.nxv8f64.nxv8i32(<vscale x 8 x double> %f)
+    ret <vscale x 8 x i32> %x
+}
+
+define <vscale x 4 x i16> @test_signed_v4f64_v4i16(<vscale x 4 x double> %f) {
+; CHECK32-LABEL: test_signed_v4f64_v4i16:
+; CHECK32:       # %bb.0:
+; CHECK32-NEXT:    lui a0, %hi(.LCPI10_0)
+; CHECK32-NEXT:    fld ft0, %lo(.LCPI10_0)(a0)
+; CHECK32-NEXT:    fcvt.d.w ft1, zero
+; CHECK32-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
+; CHECK32-NEXT:    vfmax.vf v8, v8, ft1
+; CHECK32-NEXT:    vfmin.vf v8, v8, ft0
+; CHECK32-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; CHECK32-NEXT:    vfncvt.rtz.xu.f.w v12, v8
+; CHECK32-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
+; CHECK32-NEXT:    vncvt.x.x.w v8, v12
+; CHECK32-NEXT:    ret
+;
+; CHECK64-LABEL: test_signed_v4f64_v4i16:
+; CHECK64:       # %bb.0:
+; CHECK64-NEXT:    lui a0, %hi(.LCPI10_0)
+; CHECK64-NEXT:    fld ft0, %lo(.LCPI10_0)(a0)
+; CHECK64-NEXT:    fmv.d.x ft1, zero
+; CHECK64-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
+; CHECK64-NEXT:    vfmax.vf v8, v8, ft1
+; CHECK64-NEXT:    vfmin.vf v8, v8, ft0
+; CHECK64-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; CHECK64-NEXT:    vfncvt.rtz.xu.f.w v12, v8
+; CHECK64-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
+; CHECK64-NEXT:    vncvt.x.x.w v8, v12
+; CHECK64-NEXT:    ret
+    %x = call <vscale x 4 x i16> @llvm.fptoui.sat.nxv4f64.nxv4i16(<vscale x 4 x double> %f)
+    ret <vscale x 4 x i16> %x
+}
+
+define <vscale x 8 x i16> @test_signed_v8f64_v8i16(<vscale x 8 x double> %f) {
+; CHECK32-LABEL: test_signed_v8f64_v8i16:
+; CHECK32:       # %bb.0:
+; CHECK32-NEXT:    lui a0, %hi(.LCPI11_0)
+; CHECK32-NEXT:    fld ft0, %lo(.LCPI11_0)(a0)
+; CHECK32-NEXT:    fcvt.d.w ft1, zero
+; CHECK32-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
+; CHECK32-NEXT:    vfmax.vf v8, v8, ft1
+; CHECK32-NEXT:    vfmin.vf v8, v8, ft0
+; CHECK32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; CHECK32-NEXT:    vfncvt.rtz.xu.f.w v16, v8
+; CHECK32-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
+; CHECK32-NEXT:    vncvt.x.x.w v8, v16
+; CHECK32-NEXT:    ret
+;
+; CHECK64-LABEL: test_signed_v8f64_v8i16:
+; CHECK64:       # %bb.0:
+; CHECK64-NEXT:    lui a0, %hi(.LCPI11_0)
+; CHECK64-NEXT:    fld ft0, %lo(.LCPI11_0)(a0)
+; CHECK64-NEXT:    fmv.d.x ft1, zero
+; CHECK64-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
+; CHECK64-NEXT:    vfmax.vf v8, v8, ft1
+; CHECK64-NEXT:    vfmin.vf v8, v8, ft0
+; CHECK64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; CHECK64-NEXT:    vfncvt.rtz.xu.f.w v16, v8
+; CHECK64-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
+; CHECK64-NEXT:    vncvt.x.x.w v8, v16
+; CHECK64-NEXT:    ret
+    %x = call <vscale x 8 x i16> @llvm.fptoui.sat.nxv8f64.nxv8i16(<vscale x 8 x double> %f)
+    ret <vscale x 8 x i16> %x
+}
+
+define <vscale x 2 x i64> @test_signed_v2f64_v2i64(<vscale x 2 x double> %f) {
+; CHECK32-LABEL: test_signed_v2f64_v2i64:
+; CHECK32:       # %bb.0:
+; CHECK32-NEXT:    lui a0, %hi(.LCPI12_0)
+; CHECK32-NEXT:    fld ft0, %lo(.LCPI12_0)(a0)
+; CHECK32-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
+; CHECK32-NEXT:    vmfgt.vf v10, v8, ft0
+; CHECK32-NEXT:    fcvt.d.w ft0, zero
+; CHECK32-NEXT:    vmfge.vf v11, v8, ft0
+; CHECK32-NEXT:    vmnot.m v0, v11
+; CHECK32-NEXT:    vfcvt.rtz.xu.f.v v8, v8
+; CHECK32-NEXT:    vmerge.vim v8, v8, 0, v0
+; CHECK32-NEXT:    vmv1r.v v0, v10
+; CHECK32-NEXT:    vmerge.vim v8, v8, -1, v0
+; CHECK32-NEXT:    ret
+;
+; CHECK64-LABEL: test_signed_v2f64_v2i64:
+; CHECK64:       # %bb.0:
+; CHECK64-NEXT:    lui a0, %hi(.LCPI12_0)
+; CHECK64-NEXT:    fld ft0, %lo(.LCPI12_0)(a0)
+; CHECK64-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
+; CHECK64-NEXT:    vmfgt.vf v10, v8, ft0
+; CHECK64-NEXT:    fmv.d.x ft0, zero
+; CHECK64-NEXT:    vmfge.vf v11, v8, ft0
+; CHECK64-NEXT:    vmnot.m v0, v11
+; CHECK64-NEXT:    vfcvt.rtz.xu.f.v v8, v8
+; CHECK64-NEXT:    vmerge.vim v8, v8, 0, v0
+; CHECK64-NEXT:    vmv1r.v v0, v10
+; CHECK64-NEXT:    vmerge.vim v8, v8, -1, v0
+; CHECK64-NEXT:    ret
+    %x = call <vscale x 2 x i64> @llvm.fptoui.sat.nxv2f64.nxv2i64(<vscale x 2 x double> %f)
+    ret <vscale x 2 x i64> %x
+}
+
+define <vscale x 4 x i64> @test_signed_v4f64_v4i64(<vscale x 4 x double> %f) {
+; CHECK32-LABEL: test_signed_v4f64_v4i64:
+; CHECK32:       # %bb.0:
+; CHECK32-NEXT:    lui a0, %hi(.LCPI13_0)
+; CHECK32-NEXT:    fld ft0, %lo(.LCPI13_0)(a0)
+; CHECK32-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
+; CHECK32-NEXT:    vmfgt.vf v12, v8, ft0
+; CHECK32-NEXT:    fcvt.d.w ft0, zero
+; CHECK32-NEXT:    vmfge.vf v13, v8, ft0
+; CHECK32-NEXT:    vmnot.m v0, v13
+; CHECK32-NEXT:    vfcvt.rtz.xu.f.v v8, v8
+; CHECK32-NEXT:    vmerge.vim v8, v8, 0, v0
+; CHECK32-NEXT:    vmv1r.v v0, v12
+; CHECK32-NEXT:    vmerge.vim v8, v8, -1, v0
+; CHECK32-NEXT:    ret
+;
+; CHECK64-LABEL: test_signed_v4f64_v4i64:
+; CHECK64:       # %bb.0:
+; CHECK64-NEXT:    lui a0, %hi(.LCPI13_0)
+; CHECK64-NEXT:    fld ft0, %lo(.LCPI13_0)(a0)
+; CHECK64-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
+; CHECK64-NEXT:    vmfgt.vf v12, v8, ft0
+; CHECK64-NEXT:    fmv.d.x ft0, zero
+; CHECK64-NEXT:    vmfge.vf v13, v8, ft0
+; CHECK64-NEXT:    vmnot.m v0, v13
+; CHECK64-NEXT:    vfcvt.rtz.xu.f.v v8, v8
+; CHECK64-NEXT:    vmerge.vim v8, v8, 0, v0
+; CHECK64-NEXT:    vmv1r.v v0, v12
+; CHECK64-NEXT:    vmerge.vim v8, v8, -1, v0
+; CHECK64-NEXT:    ret
+    %x = call <vscale x 4 x i64> @llvm.fptoui.sat.nxv4f64.nxv4i64(<vscale x 4 x double> %f)
+    ret <vscale x 4 x i64> %x
+}
+
+
+; half
+
+declare <vscale x 2 x i32> @llvm.fptoui.sat.nxv2f16.nxv2i32(<vscale x 2 x half>)
+declare <vscale x 4 x i32> @llvm.fptoui.sat.nxv4f16.nxv4i32(<vscale x 4 x half>)
+declare <vscale x 8 x i32> @llvm.fptoui.sat.nxv8f16.nxv8i32(<vscale x 8 x half>)
+declare <vscale x 4 x i16> @llvm.fptoui.sat.nxv4f16.nxv4i16(<vscale x 4 x half>)
+declare <vscale x 8 x i16> @llvm.fptoui.sat.nxv8f16.nxv8i16(<vscale x 8 x half>)
+declare <vscale x 2 x i64> @llvm.fptoui.sat.nxv2f16.nxv2i64(<vscale x 2 x half>)
+declare <vscale x 4 x i64> @llvm.fptoui.sat.nxv4f16.nxv4i64(<vscale x 4 x half>)
+
+define <vscale x 2 x i32> @test_signed_v2f16_v2i32(<vscale x 2 x half> %f) {
+; CHECK-LABEL: test_signed_v2f16_v2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, %hi(.LCPI14_0)
+; CHECK-NEXT:    flh ft0, %lo(.LCPI14_0)(a0)
+; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
+; CHECK-NEXT:    vmfgt.vf v9, v8, ft0
+; CHECK-NEXT:    fmv.h.x ft0, zero
+; CHECK-NEXT:    vmfge.vf v10, v8, ft0
+; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v10, v8
+; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
+; CHECK-NEXT:    vmerge.vim v8, v10, 0, v0
+; CHECK-NEXT:    vmv1r.v v0, v9
+; CHECK-NEXT:    vmerge.vim v8, v8, -1, v0
+; CHECK-NEXT:    ret
+    %x = call <vscale x 2 x i32> @llvm.fptoui.sat.nxv2f16.nxv2i32(<vscale x 2 x half> %f)
+    ret <vscale x 2 x i32> %x
+}
+
+define <vscale x 4 x i32> @test_signed_v4f16_v4i32(<vscale x 4 x half> %f) {
+; CHECK-LABEL: test_signed_v4f16_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, %hi(.LCPI15_0)
+; CHECK-NEXT:    flh ft0, %lo(.LCPI15_0)(a0)
+; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
+; CHECK-NEXT:    vmfgt.vf v9, v8, ft0
+; CHECK-NEXT:    fmv.h.x ft0, zero
+; CHECK-NEXT:    vmfge.vf v10, v8, ft0
+; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v10, v8
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; CHECK-NEXT:    vmerge.vim v10, v10, 0, v0
+; CHECK-NEXT:    vmv1r.v v0, v9
+; CHECK-NEXT:    vmerge.vim v8, v10, -1, v0
+; CHECK-NEXT:    ret
+    %x = call <vscale x 4 x i32> @llvm.fptoui.sat.nxv4f16.nxv4i32(<vscale x 4 x half> %f)
+    ret <vscale x 4 x i32> %x
+}
+
+define <vscale x 8 x i32> @test_signed_v8f16_v8i32(<vscale x 8 x half> %f) {
+; CHECK-LABEL: test_signed_v8f16_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, %hi(.LCPI16_0)
+; CHECK-NEXT:    flh ft0, %lo(.LCPI16_0)(a0)
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
+; CHECK-NEXT:    vmfgt.vf v10, v8, ft0
+; CHECK-NEXT:    fmv.h.x ft0, zero
+; CHECK-NEXT:    vmfge.vf v11, v8, ft0
+; CHECK-NEXT:    vmnot.m v0, v11
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v12, v8
+; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-NEXT:    vmerge.vim v12, v12, 0, v0
+; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmerge.vim v8, v12, -1, v0
+; CHECK-NEXT:    ret
+    %x = call <vscale x 8 x i32> @llvm.fptoui.sat.nxv8f16.nxv8i32(<vscale x 8 x half> %f)
+    ret <vscale x 8 x i32> %x
+}
+
+define <vscale x 4 x i16> @test_signed_v4f16_v4i16(<vscale x 4 x half> %f) {
+; CHECK-LABEL: test_signed_v4f16_v4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, %hi(.LCPI17_0)
+; CHECK-NEXT:    flh ft0, %lo(.LCPI17_0)(a0)
+; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
+; CHECK-NEXT:    vmfgt.vf v9, v8, ft0
+; CHECK-NEXT:    fmv.h.x ft0, zero
+; CHECK-NEXT:    vmfge.vf v10, v8, ft0
+; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vfcvt.rtz.xu.f.v v8, v8
+; CHECK-NEXT:    vmerge.vim v8, v8, 0, v0
+; CHECK-NEXT:    vmv.v.v v0, v9
+; CHECK-NEXT:    vmerge.vim v8, v8, -1, v0
+; CHECK-NEXT:    ret
+    %x = call <vscale x 4 x i16> @llvm.fptoui.sat.nxv4f16.nxv4i16(<vscale x 4 x half> %f)
+    ret <vscale x 4 x i16> %x
+}
+
+define <vscale x 8 x i16> @test_signed_v8f16_v8i16(<vscale x 8 x half> %f) {
+; CHECK-LABEL: test_signed_v8f16_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, %hi(.LCPI18_0)
+; CHECK-NEXT:    flh ft0, %lo(.LCPI18_0)(a0)
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
+; CHECK-NEXT:    vmfgt.vf v10, v8, ft0
+; CHECK-NEXT:    fmv.h.x ft0, zero
+; CHECK-NEXT:    vmfge.vf v11, v8, ft0
+; CHECK-NEXT:    vmnot.m v0, v11
+; CHECK-NEXT:    vfcvt.rtz.xu.f.v v8, v8
+; CHECK-NEXT:    vmerge.vim v8, v8, 0, v0
+; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmerge.vim v8, v8, -1, v0
+; CHECK-NEXT:    ret
+    %x = call <vscale x 8 x i16> @llvm.fptoui.sat.nxv8f16.nxv8i16(<vscale x 8 x half> %f)
+    ret <vscale x 8 x i16> %x
+}
+
+define <vscale x 2 x i64> @test_signed_v2f16_v2i64(<vscale x 2 x half> %f) {
+; CHECK-LABEL: test_signed_v2f16_v2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, %hi(.LCPI19_0)
+; CHECK-NEXT:    flh ft0, %lo(.LCPI19_0)(a0)
+; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
+; CHECK-NEXT:    vmfgt.vf v9, v8, ft0
+; CHECK-NEXT:    fmv.h.x ft0, zero
+; CHECK-NEXT:    vmfge.vf v10, v8, ft0
+; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vfwcvt.f.f.v v10, v8
+; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v12, v10
+; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; CHECK-NEXT:    vmerge.vim v10, v12, 0, v0
+; CHECK-NEXT:    vmv1r.v v0, v9
+; CHECK-NEXT:    vmerge.vim v8, v10, -1, v0
+; CHECK-NEXT:    ret
+    %x = call <vscale x 2 x i64> @llvm.fptoui.sat.nxv2f16.nxv2i64(<vscale x 2 x half> %f)
+    ret <vscale x 2 x i64> %x
+}
+
+define <vscale x 4 x i64> @test_signed_v4f16_v4i64(<vscale x 4 x half> %f) {
+; CHECK-LABEL: test_signed_v4f16_v4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, %hi(.LCPI20_0)
+; CHECK-NEXT:    flh ft0, %lo(.LCPI20_0)(a0)
+; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
+; CHECK-NEXT:    vmfgt.vf v9, v8, ft0
+; CHECK-NEXT:    fmv.h.x ft0, zero
+; CHECK-NEXT:    vmfge.vf v10, v8, ft0
+; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vfwcvt.f.f.v v10, v8
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v12, v10
+; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; CHECK-NEXT:    vmerge.vim v12, v12, 0, v0
+; CHECK-NEXT:    vmv1r.v v0, v9
+; CHECK-NEXT:    vmerge.vim v8, v12, -1, v0
+; CHECK-NEXT:    ret
+    %x = call <vscale x 4 x i64> @llvm.fptoui.sat.nxv4f16.nxv4i64(<vscale x 4 x half> %f)
+    ret <vscale x 4 x i64> %x
+}
+


        


More information about the llvm-commits mailing list