[llvm] [GlobalISel][AArch64] Add G_FPTOSI_SAT/G_FPTOUI_SAT (PR #96297)

David Green via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 6 19:51:28 PDT 2024


https://github.com/davemgreen updated https://github.com/llvm/llvm-project/pull/96297

>From afac44ab88ddc544c55e202246fac32c53387884 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Sat, 7 Sep 2024 03:51:13 +0100
Subject: [PATCH] [GlobalISel][AArch6] Add G_FPTOSI_SAT/G_FPTOUI_SAT

This is an implementation of the saturating fp to int conversions for
GlobalISel. On AArch64 the converstion instrctions work this way, producing
saturating results.  LegalizerHelper::lowerFPTOINT_SAT is ported from SDAG.

AArch64 has a lot of existing tests for fptosi_sat, covering a wide range of
types. I have tried to make most of them work all at once, but a few fall back
due to other missing features such as f128 handling for min/max.
---
 llvm/docs/GlobalISel/GenericOpcode.rst        |    5 +
 .../CodeGen/GlobalISel/GenericMachineInstrs.h |    2 +
 .../llvm/CodeGen/GlobalISel/LegalizerHelper.h |    1 +
 .../CodeGen/GlobalISel/MachineIRBuilder.h     |   10 +
 llvm/include/llvm/Support/TargetOpcodes.def   |    6 +
 llvm/include/llvm/Target/GenericOpcodes.td    |   12 +
 .../Target/GlobalISel/SelectionDAGCompat.td   |    2 +
 .../include/llvm/Target/TargetSelectionDAG.td |    2 +
 llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp  |    8 +
 .../CodeGen/GlobalISel/LegalizerHelper.cpp    |  150 +
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   |   54 +-
 .../AArch64/GISel/AArch64LegalizerInfo.cpp    |   49 +
 .../AArch64/GISel/AArch64RegisterBankInfo.cpp |    4 +
 .../GlobalISel/legalizer-info-validation.mir  |    7 +
 .../test/CodeGen/AArch64/fptosi-sat-scalar.ll | 1060 ++-
 .../test/CodeGen/AArch64/fptosi-sat-vector.ll | 7029 +++++++++++------
 .../test/CodeGen/AArch64/fptoui-sat-scalar.ll |  783 +-
 .../test/CodeGen/AArch64/fptoui-sat-vector.ll | 5524 ++++++++-----
 llvm/test/TableGen/GlobalISelEmitter.td       |    2 +-
 19 files changed, 9907 insertions(+), 4803 deletions(-)

diff --git a/llvm/docs/GlobalISel/GenericOpcode.rst b/llvm/docs/GlobalISel/GenericOpcode.rst
index d32aeff5a69bb1..4c9a807c598c02 100644
--- a/llvm/docs/GlobalISel/GenericOpcode.rst
+++ b/llvm/docs/GlobalISel/GenericOpcode.rst
@@ -504,6 +504,11 @@ G_FPTOSI, G_FPTOUI, G_SITOFP, G_UITOFP
 
 Convert between integer and floating point.
 
+G_FPTOSI_SAT, G_FPTOUI_SAT
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Saturating convert between integer and floating point.
+
 G_FABS
 ^^^^^^
 
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index ef1171d9f1f64d..b7c545e5136f43 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -823,6 +823,8 @@ class GCastOp : public GenericMachineInstr {
     case TargetOpcode::G_FPEXT:
     case TargetOpcode::G_FPTOSI:
     case TargetOpcode::G_FPTOUI:
+    case TargetOpcode::G_FPTOSI_SAT:
+    case TargetOpcode::G_FPTOUI_SAT:
     case TargetOpcode::G_FPTRUNC:
     case TargetOpcode::G_INTTOPTR:
     case TargetOpcode::G_PTRTOINT:
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index afd68250f5ca6e..5360850deeffd8 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -398,6 +398,7 @@ class LegalizerHelper {
   LegalizeResult lowerSITOFP(MachineInstr &MI);
   LegalizeResult lowerFPTOUI(MachineInstr &MI);
   LegalizeResult lowerFPTOSI(MachineInstr &MI);
+  LegalizeResult lowerFPTOINT_SAT(MachineInstr &MI);
 
   LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI);
   LegalizeResult lowerFPTRUNC(MachineInstr &MI);
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
index 56a77b8596a18b..e02a967988a575 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -2000,6 +2000,16 @@ class MachineIRBuilder {
     return buildInstr(TargetOpcode::G_FPTOSI, {Dst}, {Src0});
   }
 
+  /// Build and insert \p Res = G_FPTOUI_SAT \p Src0
+  MachineInstrBuilder buildFPTOUI_SAT(const DstOp &Dst, const SrcOp &Src0) {
+    return buildInstr(TargetOpcode::G_FPTOUI_SAT, {Dst}, {Src0});
+  }
+
+  /// Build and insert \p Res = G_FPTOSI_SAT \p Src0
+  MachineInstrBuilder buildFPTOSI_SAT(const DstOp &Dst, const SrcOp &Src0) {
+    return buildInstr(TargetOpcode::G_FPTOSI_SAT, {Dst}, {Src0});
+  }
+
   /// Build and insert \p Dst = G_INTRINSIC_ROUNDEVEN \p Src0, \p Src1
   MachineInstrBuilder
   buildIntrinsicRoundeven(const DstOp &Dst, const SrcOp &Src0,
diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def
index e1883de0c93b4c..cd3d7c2e11f922 100644
--- a/llvm/include/llvm/Support/TargetOpcodes.def
+++ b/llvm/include/llvm/Support/TargetOpcodes.def
@@ -682,6 +682,12 @@ HANDLE_TARGET_OPCODE(G_SITOFP)
 /// Generic unsigned-int to float conversion
 HANDLE_TARGET_OPCODE(G_UITOFP)
 
+/// Generic saturating float to signed-int conversion
+HANDLE_TARGET_OPCODE(G_FPTOSI_SAT)
+
+/// Generic saturating float to unsigned-int conversion
+HANDLE_TARGET_OPCODE(G_FPTOUI_SAT)
+
 /// Generic FP absolute value.
 HANDLE_TARGET_OPCODE(G_FABS)
 
diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td
index 36a0a087ba457c..f5570db373e4a7 100644
--- a/llvm/include/llvm/Target/GenericOpcodes.td
+++ b/llvm/include/llvm/Target/GenericOpcodes.td
@@ -769,6 +769,18 @@ def G_UITOFP : GenericInstruction {
   let hasSideEffects = false;
 }
 
+def G_FPTOSI_SAT : GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type1:$src);
+  let hasSideEffects = false;
+}
+
+def G_FPTOUI_SAT : GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type1:$src);
+  let hasSideEffects = false;
+}
+
 def G_FABS : GenericInstruction {
   let OutOperandList = (outs type0:$dst);
   let InOperandList = (ins type0:$src);
diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index 72d155b483cf2b..bbe8aa93bf32b6 100644
--- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -98,6 +98,8 @@ def : GINodeEquiv<G_FPTOSI, fp_to_sint>;
 def : GINodeEquiv<G_FPTOUI, fp_to_uint>;
 def : GINodeEquiv<G_SITOFP, sint_to_fp>;
 def : GINodeEquiv<G_UITOFP, uint_to_fp>;
+def : GINodeEquiv<G_FPTOSI_SAT, fp_to_sint_sat_gi>;
+def : GINodeEquiv<G_FPTOUI_SAT, fp_to_uint_sat_gi>;
 def : GINodeEquiv<G_FADD, fadd>;
 def : GINodeEquiv<G_FSUB, fsub>;
 def : GINodeEquiv<G_FMA, fma>;
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index dd79002dcbdb48..abb1cc205358f7 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -569,6 +569,8 @@ def fp_to_sint : SDNode<"ISD::FP_TO_SINT" , SDTFPToIntOp>;
 def fp_to_uint : SDNode<"ISD::FP_TO_UINT" , SDTFPToIntOp>;
 def fp_to_sint_sat : SDNode<"ISD::FP_TO_SINT_SAT" , SDTFPToIntSatOp>;
 def fp_to_uint_sat : SDNode<"ISD::FP_TO_UINT_SAT" , SDTFPToIntSatOp>;
+def fp_to_sint_sat_gi : SDNode<"ISD::FP_TO_SINT_SAT" , SDTFPToIntOp>;
+def fp_to_uint_sat_gi : SDNode<"ISD::FP_TO_UINT_SAT" , SDTFPToIntOp>;
 def f16_to_fp  : SDNode<"ISD::FP16_TO_FP" , SDTIntToFPOp>;
 def fp_to_f16  : SDNode<"ISD::FP_TO_FP16" , SDTFPToIntOp>;
 def bf16_to_fp  : SDNode<"ISD::BF16_TO_FP" , SDTIntToFPOp>;
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index b290d7fb4ce4a1..99267fab832b80 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -2340,6 +2340,14 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
                            MachineInstr::copyFlagsFromInstruction(CI));
     return true;
   }
+  case Intrinsic::fptosi_sat:
+    MIRBuilder.buildFPTOSI_SAT(getOrCreateVReg(CI),
+                               getOrCreateVReg(*CI.getArgOperand(0)));
+    return true;
+  case Intrinsic::fptoui_sat:
+    MIRBuilder.buildFPTOUI_SAT(getOrCreateVReg(CI),
+                               getOrCreateVReg(*CI.getArgOperand(0)));
+    return true;
   case Intrinsic::memcpy_inline:
     return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMCPY_INLINE);
   case Intrinsic::memcpy:
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 3640b77ff4068c..9ceae696f5ddec 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -1880,6 +1880,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
   }
   case TargetOpcode::G_FPTOUI:
   case TargetOpcode::G_FPTOSI:
+  case TargetOpcode::G_FPTOUI_SAT:
+  case TargetOpcode::G_FPTOSI_SAT:
     return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
   case TargetOpcode::G_FPEXT:
     if (TypeIdx != 0)
@@ -2872,6 +2874,47 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
     else
       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
 
+    Observer.changedInstr(MI);
+    return Legalized;
+  case TargetOpcode::G_FPTOSI_SAT:
+  case TargetOpcode::G_FPTOUI_SAT:
+    Observer.changingInstr(MI);
+
+    if (TypeIdx == 0) {
+      Register OldDst = MI.getOperand(0).getReg();
+      LLT Ty = MRI.getType(OldDst);
+      Register ExtReg = MRI.createGenericVirtualRegister(WideTy);
+      Register NewDst;
+      MI.getOperand(0).setReg(ExtReg);
+      uint64_t ShortBits = Ty.getScalarSizeInBits();
+      uint64_t WideBits = WideTy.getScalarSizeInBits();
+      MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
+      if (Opcode == TargetOpcode::G_FPTOSI_SAT) {
+        // z = i16 fptosi_sat(a)
+        // ->
+        // x = i32 fptosi_sat(a)
+        // y = smin(x, 32767)
+        // z = smax(y, -32768)
+        auto MaxVal = MIRBuilder.buildConstant(
+            WideTy, APInt::getSignedMaxValue(ShortBits).sext(WideBits));
+        auto MinVal = MIRBuilder.buildConstant(
+            WideTy, APInt::getSignedMinValue(ShortBits).sext(WideBits));
+        Register MidReg =
+            MIRBuilder.buildSMin(WideTy, ExtReg, MaxVal).getReg(0);
+        NewDst = MIRBuilder.buildSMax(WideTy, MidReg, MinVal).getReg(0);
+      } else {
+        // z = i16 fptoui_sat(a)
+        // ->
+        // x = i32 fptoui_sat(a)
+        // y = smin(x, 65535)
+        auto MaxVal = MIRBuilder.buildConstant(
+            WideTy, APInt::getAllOnes(ShortBits).zext(WideBits));
+        NewDst = MIRBuilder.buildUMin(WideTy, ExtReg, MaxVal).getReg(0);
+      }
+      MIRBuilder.buildTrunc(OldDst, NewDst);
+    } else
+      widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
+
     Observer.changedInstr(MI);
     return Legalized;
   case TargetOpcode::G_LOAD:
@@ -4170,6 +4213,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
     return lowerFPTOUI(MI);
   case G_FPTOSI:
     return lowerFPTOSI(MI);
+  case G_FPTOUI_SAT:
+  case G_FPTOSI_SAT:
+    return lowerFPTOINT_SAT(MI);
   case G_FPTRUNC:
     return lowerFPTRUNC(MI);
   case G_FPOWI:
@@ -4986,6 +5032,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
   case G_UITOFP:
   case G_FPTOSI:
   case G_FPTOUI:
+  case G_FPTOSI_SAT:
+  case G_FPTOUI_SAT:
   case G_INTTOPTR:
   case G_PTRTOINT:
   case G_ADDRSPACE_CAST:
@@ -5777,6 +5825,8 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
   case TargetOpcode::G_FPEXT:
   case TargetOpcode::G_FPTOSI:
   case TargetOpcode::G_FPTOUI:
+  case TargetOpcode::G_FPTOSI_SAT:
+  case TargetOpcode::G_FPTOUI_SAT:
   case TargetOpcode::G_SITOFP:
   case TargetOpcode::G_UITOFP: {
     Observer.changingInstr(MI);
@@ -7285,6 +7335,106 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) {
   return Legalized;
 }
 
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerFPTOINT_SAT(MachineInstr &MI) {
+  auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
+
+  bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI_SAT;
+  unsigned SatWidth = DstTy.getScalarSizeInBits();
+
+  // Determine minimum and maximum integer values and their corresponding
+  // floating-point values.
+  APInt MinInt, MaxInt;
+  if (IsSigned) {
+    MinInt = APInt::getSignedMinValue(SatWidth);
+    MaxInt = APInt::getSignedMaxValue(SatWidth);
+  } else {
+    MinInt = APInt::getMinValue(SatWidth);
+    MaxInt = APInt::getMaxValue(SatWidth);
+  }
+
+  const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
+  APFloat MinFloat(Semantics);
+  APFloat MaxFloat(Semantics);
+
+  APFloat::opStatus MinStatus =
+      MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
+  APFloat::opStatus MaxStatus =
+      MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
+  bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
+                             !(MaxStatus & APFloat::opStatus::opInexact);
+
+  // If the integer bounds are exactly representable as floats, emit a
+  // min+max+fptoi sequence. Otherwise we have to use a sequence of comparisons
+  // and selects.
+  if (AreExactFloatBounds) {
+    // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
+    auto MaxC = MIRBuilder.buildFConstant(SrcTy, MinFloat);
+    auto MaxP = MIRBuilder.buildFCmp(CmpInst::FCMP_ULT,
+                                     SrcTy.changeElementSize(1), Src, MaxC);
+    auto Max = MIRBuilder.buildSelect(SrcTy, MaxP, Src, MaxC);
+    // Clamp by MaxFloat from above. NaN cannot occur.
+    auto MinC = MIRBuilder.buildFConstant(SrcTy, MaxFloat);
+    auto MinP =
+        MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, SrcTy.changeElementSize(1), Max,
+                             MinC, MachineInstr::FmNoNans);
+    auto Min =
+        MIRBuilder.buildSelect(SrcTy, MinP, Max, MinC, MachineInstr::FmNoNans);
+    // Convert clamped value to integer. In the unsigned case we're done,
+    // because we mapped NaN to MinFloat, which will cast to zero.
+    if (!IsSigned) {
+      MIRBuilder.buildFPTOUI(Dst, Min);
+      MI.eraseFromParent();
+      return Legalized;
+    }
+
+    // Otherwise, select 0 if Src is NaN.
+    auto FpToInt = MIRBuilder.buildFPTOSI(DstTy, Min);
+    auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_UNO,
+                                       DstTy.changeElementSize(1), Src, Src);
+    MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0),
+                           FpToInt);
+    MI.eraseFromParent();
+    return Legalized;
+  }
+
+  // Result of direct conversion. The assumption here is that the operation is
+  // non-trapping and it's fine to apply it to an out-of-range value if we
+  // select it away later.
+  auto FpToInt = IsSigned ? MIRBuilder.buildFPTOSI(DstTy, Src)
+                          : MIRBuilder.buildFPTOUI(DstTy, Src);
+
+  // If Src ULT MinFloat, select MinInt. In particular, this also selects
+  // MinInt if Src is NaN.
+  auto ULT =
+      MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, SrcTy.changeElementSize(1), Src,
+                           MIRBuilder.buildFConstant(SrcTy, MinFloat));
+  auto Max = MIRBuilder.buildSelect(
+      DstTy, ULT, MIRBuilder.buildConstant(DstTy, MinInt), FpToInt);
+  // If Src OGT MaxFloat, select MaxInt.
+  auto OGT =
+      MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, SrcTy.changeElementSize(1), Src,
+                           MIRBuilder.buildFConstant(SrcTy, MaxFloat));
+
+  // In the unsigned case we are done, because we mapped NaN to MinInt, which
+  // is already zero.
+  if (!IsSigned) {
+    MIRBuilder.buildSelect(Dst, OGT, MIRBuilder.buildConstant(DstTy, MaxInt),
+                           Max, MachineInstr::FmNoNans);
+    MI.eraseFromParent();
+    return Legalized;
+  }
+
+  // Otherwise, select 0 if Src is NaN.
+  auto Min = MIRBuilder.buildSelect(
+      DstTy, OGT, MIRBuilder.buildConstant(DstTy, MaxInt), Max);
+  auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_UNO,
+                                     DstTy.changeElementSize(1), Src, Src);
+  MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0), Min);
+  MI.eraseFromParent();
+  return Legalized;
+}
+
 // f64 -> f16 conversion using round-to-nearest-even rounding mode.
 LegalizerHelper::LegalizeResult
 LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index ccef85bfaa8afc..d23af22f983614 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -4724,7 +4724,7 @@ defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>;
 defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>;
 
 // AArch64's FCVT instructions saturate when out of range.
-multiclass FPToIntegerSatPats<SDNode to_int_sat, string INST> {
+multiclass FPToIntegerSatPats<SDNode to_int_sat, SDNode to_int_sat_gi, string INST> {
   let Predicates = [HasFullFP16] in {
   def : Pat<(i32 (to_int_sat f16:$Rn, i32)),
             (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
@@ -4740,6 +4740,21 @@ multiclass FPToIntegerSatPats<SDNode to_int_sat, string INST> {
   def : Pat<(i64 (to_int_sat f64:$Rn, i64)),
             (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
 
+  let Predicates = [HasFullFP16] in {
+  def : Pat<(i32 (to_int_sat_gi f16:$Rn)),
+            (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
+  def : Pat<(i64 (to_int_sat_gi f16:$Rn)),
+            (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
+  }
+  def : Pat<(i32 (to_int_sat_gi f32:$Rn)),
+            (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
+  def : Pat<(i64 (to_int_sat_gi f32:$Rn)),
+            (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
+  def : Pat<(i32 (to_int_sat_gi f64:$Rn)),
+            (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
+  def : Pat<(i64 (to_int_sat_gi f64:$Rn)),
+            (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
+
   let Predicates = [HasFullFP16] in {
   def : Pat<(i32 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i32:$scale), i32)),
             (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
@@ -4754,10 +4769,25 @@ multiclass FPToIntegerSatPats<SDNode to_int_sat, string INST> {
             (!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
   def : Pat<(i64 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i64:$scale), i64)),
             (!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
+
+  let Predicates = [HasFullFP16] in {
+  def : Pat<(i32 (to_int_sat_gi (fmul f16:$Rn, fixedpoint_f16_i32:$scale))),
+            (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
+  def : Pat<(i64 (to_int_sat_gi (fmul f16:$Rn, fixedpoint_f16_i64:$scale))),
+            (!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
+  }
+  def : Pat<(i32 (to_int_sat_gi (fmul f32:$Rn, fixedpoint_f32_i32:$scale))),
+            (!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
+  def : Pat<(i64 (to_int_sat_gi (fmul f32:$Rn, fixedpoint_f32_i64:$scale))),
+            (!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
+  def : Pat<(i32 (to_int_sat_gi (fmul f64:$Rn, fixedpoint_f64_i32:$scale))),
+            (!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
+  def : Pat<(i64 (to_int_sat_gi (fmul f64:$Rn, fixedpoint_f64_i64:$scale))),
+            (!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
 }
 
-defm : FPToIntegerSatPats<fp_to_sint_sat, "FCVTZS">;
-defm : FPToIntegerSatPats<fp_to_uint_sat, "FCVTZU">;
+defm : FPToIntegerSatPats<fp_to_sint_sat, fp_to_sint_sat_gi, "FCVTZS">;
+defm : FPToIntegerSatPats<fp_to_uint_sat, fp_to_uint_sat_gi, "FCVTZU">;
 
 multiclass FPToIntegerIntPats<Intrinsic round, string INST> {
   let Predicates = [HasFullFP16] in {
@@ -5303,12 +5333,17 @@ defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", any_fp_to_sint>;
 defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", any_fp_to_uint>;
 
 // AArch64's FCVT instructions saturate when out of range.
-multiclass SIMDTwoVectorFPToIntSatPats<SDNode to_int_sat, string INST> {
+multiclass SIMDTwoVectorFPToIntSatPats<SDNode to_int_sat, SDNode to_int_sat_gi, string INST> {
   let Predicates = [HasFullFP16] in {
   def : Pat<(v4i16 (to_int_sat v4f16:$Rn, i16)),
             (!cast<Instruction>(INST # v4f16) v4f16:$Rn)>;
   def : Pat<(v8i16 (to_int_sat v8f16:$Rn, i16)),
             (!cast<Instruction>(INST # v8f16) v8f16:$Rn)>;
+
+  def : Pat<(v4i16 (to_int_sat_gi v4f16:$Rn)),
+            (!cast<Instruction>(INST # v4f16) v4f16:$Rn)>;
+  def : Pat<(v8i16 (to_int_sat_gi v8f16:$Rn)),
+            (!cast<Instruction>(INST # v8f16) v8f16:$Rn)>;
   }
   def : Pat<(v2i32 (to_int_sat v2f32:$Rn, i32)),
             (!cast<Instruction>(INST # v2f32) v2f32:$Rn)>;
@@ -5316,9 +5351,16 @@ multiclass SIMDTwoVectorFPToIntSatPats<SDNode to_int_sat, string INST> {
             (!cast<Instruction>(INST # v4f32) v4f32:$Rn)>;
   def : Pat<(v2i64 (to_int_sat v2f64:$Rn, i64)),
             (!cast<Instruction>(INST # v2f64) v2f64:$Rn)>;
+
+  def : Pat<(v2i32 (to_int_sat_gi v2f32:$Rn)),
+            (!cast<Instruction>(INST # v2f32) v2f32:$Rn)>;
+  def : Pat<(v4i32 (to_int_sat_gi v4f32:$Rn)),
+            (!cast<Instruction>(INST # v4f32) v4f32:$Rn)>;
+  def : Pat<(v2i64 (to_int_sat_gi v2f64:$Rn)),
+            (!cast<Instruction>(INST # v2f64) v2f64:$Rn)>;
 }
-defm : SIMDTwoVectorFPToIntSatPats<fp_to_sint_sat, "FCVTZS">;
-defm : SIMDTwoVectorFPToIntSatPats<fp_to_uint_sat, "FCVTZU">;
+defm : SIMDTwoVectorFPToIntSatPats<fp_to_sint_sat, fp_to_sint_sat_gi, "FCVTZS">;
+defm : SIMDTwoVectorFPToIntSatPats<fp_to_uint_sat, fp_to_uint_sat_gi, "FCVTZU">;
 
 def : Pat<(v4i16 (int_aarch64_neon_fcvtzs v4f16:$Rn)), (FCVTZSv4f16 $Rn)>;
 def : Pat<(v8i16 (int_aarch64_neon_fcvtzs v8f16:$Rn)), (FCVTZSv8f16 $Rn)>;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index db5cd1d32d73d0..b0c1215bd5514a 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -728,6 +728,55 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
       .libcallFor(
           {{s32, s128}, {s64, s128}, {s128, s128}, {s128, s32}, {s128, s64}});
 
+  getActionDefinitionsBuilder({G_FPTOSI_SAT, G_FPTOUI_SAT})
+      .legalFor({{s32, s32},
+                 {s64, s32},
+                 {s32, s64},
+                 {s64, s64},
+                 {v2s64, v2s64},
+                 {v4s32, v4s32},
+                 {v2s32, v2s32}})
+      .legalIf([=](const LegalityQuery &Query) {
+        return HasFP16 &&
+               (Query.Types[1] == s16 || Query.Types[1] == v4s16 ||
+                Query.Types[1] == v8s16) &&
+               (Query.Types[0] == s32 || Query.Types[0] == s64 ||
+                Query.Types[0] == v4s16 || Query.Types[0] == v8s16);
+      })
+      // Handle types larger than i64 by scalarizing/lowering.
+      .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
+      .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
+      // The range of a fp16 value fits into an i17, so we can lower the width
+      // to i64.
+      .narrowScalarIf(
+          [=](const LegalityQuery &Query) {
+            return Query.Types[1] == s16 && Query.Types[0].getSizeInBits() > 64;
+          },
+          changeTo(0, s64))
+      .lowerIf(::any(scalarWiderThan(0, 64), scalarWiderThan(1, 64)), 0)
+      .moreElementsToNextPow2(0)
+      .widenScalarToNextPow2(0, /*MinSize=*/32)
+      .minScalar(0, s32)
+      .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32)
+      .widenScalarIf(
+          [=](const LegalityQuery &Query) {
+            unsigned ITySize = Query.Types[0].getScalarSizeInBits();
+            return (ITySize == 16 || ITySize == 32 || ITySize == 64) &&
+                   ITySize > Query.Types[1].getScalarSizeInBits();
+          },
+          LegalizeMutations::changeElementSizeTo(1, 0))
+      .widenScalarIf(
+          [=](const LegalityQuery &Query) {
+            unsigned FTySize = Query.Types[1].getScalarSizeInBits();
+            return (FTySize == 16 || FTySize == 32 || FTySize == 64) &&
+                   Query.Types[0].getScalarSizeInBits() < FTySize;
+          },
+          LegalizeMutations::changeElementSizeTo(0, 1))
+      .widenScalarOrEltToNextPow2(0)
+      .clampNumElements(0, v4s16, v8s16)
+      .clampNumElements(0, v2s32, v4s32)
+      .clampMaxNumElements(0, s64, 2);
+
   getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
       .legalFor({{s32, s32},
                  {s64, s32},
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index 23e135063147a1..8d63c36eb015f3 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -535,6 +535,8 @@ bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
   switch (MI.getOpcode()) {
   case TargetOpcode::G_FPTOSI:
   case TargetOpcode::G_FPTOUI:
+  case TargetOpcode::G_FPTOSI_SAT:
+  case TargetOpcode::G_FPTOUI_SAT:
   case TargetOpcode::G_FCMP:
   case TargetOpcode::G_LROUND:
   case TargetOpcode::G_LLROUND:
@@ -799,6 +801,8 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
   }
   case TargetOpcode::G_FPTOSI:
   case TargetOpcode::G_FPTOUI:
+  case TargetOpcode::G_FPTOSI_SAT:
+  case TargetOpcode::G_FPTOUI_SAT:
   case TargetOpcode::G_INTRINSIC_LRINT:
   case TargetOpcode::G_INTRINSIC_LLRINT:
     if (MRI.getType(MI.getOperand(0).getReg()).isVector())
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
index b3b85090d11251..ddae45d96f6a78 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
@@ -538,6 +538,13 @@
 # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
 # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
 # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: G_FPTOSI_SAT (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
+# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: G_FPTOUI_SAT (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
+# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
+# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
 # DEBUG-NEXT: G_FABS (opcode {{[0-9]+}}): 1 type index, 0 imm indices
 # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
 # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
index eeb1504d8dc77b..9c52b024d3e259 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-CVT
-; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
+; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-CVT
+; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16
+; RUN: llc < %s -mtriple=aarch64 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-CVT
+; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16
 
 ;
 ; 32-bit float to signed integer
@@ -18,13 +20,23 @@ declare i100 @llvm.fptosi.sat.i100.f32(float)
 declare i128 @llvm.fptosi.sat.i128.f32(float)
 
 define i1 @test_signed_i1_f32(float %f) nounwind {
-; CHECK-LABEL: test_signed_i1_f32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs w8, s0
-; CHECK-NEXT:    ands w8, w8, w8, asr #31
-; CHECK-NEXT:    csinv w8, w8, wzr, ge
-; CHECK-NEXT:    and w0, w8, #0x1
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_i1_f32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs w8, s0
+; CHECK-SD-NEXT:    ands w8, w8, w8, asr #31
+; CHECK-SD-NEXT:    csinv w8, w8, wzr, ge
+; CHECK-SD-NEXT:    and w0, w8, #0x1
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_i1_f32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcvtzs w8, s0
+; CHECK-GI-NEXT:    cmp w8, #0
+; CHECK-GI-NEXT:    csel w8, w8, wzr, lt
+; CHECK-GI-NEXT:    cmp w8, #0
+; CHECK-GI-NEXT:    csinv w8, w8, wzr, ge
+; CHECK-GI-NEXT:    and w0, w8, #0x1
+; CHECK-GI-NEXT:    ret
     %x = call i1 @llvm.fptosi.sat.i1.f32(float %f)
     ret i1 %x
 }
@@ -99,16 +111,27 @@ define i32 @test_signed_i32_f32(float %f) nounwind {
 }
 
 define i50 @test_signed_i50_f32(float %f) nounwind {
-; CHECK-LABEL: test_signed_i50_f32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs x8, s0
-; CHECK-NEXT:    mov x9, #562949953421311 // =0x1ffffffffffff
-; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    csel x8, x8, x9, lt
-; CHECK-NEXT:    mov x9, #-562949953421312 // =0xfffe000000000000
-; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    csel x0, x8, x9, gt
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_i50_f32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs x8, s0
+; CHECK-SD-NEXT:    mov x9, #562949953421311 // =0x1ffffffffffff
+; CHECK-SD-NEXT:    cmp x8, x9
+; CHECK-SD-NEXT:    csel x8, x8, x9, lt
+; CHECK-SD-NEXT:    mov x9, #-562949953421312 // =0xfffe000000000000
+; CHECK-SD-NEXT:    cmp x8, x9
+; CHECK-SD-NEXT:    csel x0, x8, x9, gt
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_i50_f32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcvtzs x8, s0
+; CHECK-GI-NEXT:    mov x9, #562949953421311 // =0x1ffffffffffff
+; CHECK-GI-NEXT:    mov x10, #-562949953421312 // =0xfffe000000000000
+; CHECK-GI-NEXT:    cmp x8, x9
+; CHECK-GI-NEXT:    csel x8, x8, x9, lt
+; CHECK-GI-NEXT:    cmp x8, x10
+; CHECK-GI-NEXT:    csel x0, x8, x10, gt
+; CHECK-GI-NEXT:    ret
     %x = call i50 @llvm.fptosi.sat.i50.f32(float %f)
     ret i50 %x
 }
@@ -123,57 +146,105 @@ define i64 @test_signed_i64_f32(float %f) nounwind {
 }
 
 define i100 @test_signed_i100_f32(float %f) nounwind {
-; CHECK-LABEL: test_signed_i100_f32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
-; CHECK-NEXT:    fmov s8, s0
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    movi v0.2s, #241, lsl #24
-; CHECK-NEXT:    mov w8, #1895825407 // =0x70ffffff
-; CHECK-NEXT:    mov x10, #34359738367 // =0x7ffffffff
-; CHECK-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT:    fcmp s8, s0
-; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    mov x8, #-34359738368 // =0xfffffff800000000
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    csel x8, x8, x1, lt
-; CHECK-NEXT:    fcmp s8, s0
-; CHECK-NEXT:    csel x8, x10, x8, gt
-; CHECK-NEXT:    csinv x9, x9, xzr, le
-; CHECK-NEXT:    fcmp s8, s8
-; CHECK-NEXT:    csel x0, xzr, x9, vs
-; CHECK-NEXT:    csel x1, xzr, x8, vs
-; CHECK-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_i100_f32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    fmov s8, s0
+; CHECK-SD-NEXT:    bl __fixsfti
+; CHECK-SD-NEXT:    movi v0.2s, #241, lsl #24
+; CHECK-SD-NEXT:    mov w8, #1895825407 // =0x70ffffff
+; CHECK-SD-NEXT:    mov x10, #34359738367 // =0x7ffffffff
+; CHECK-SD-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    fcmp s8, s0
+; CHECK-SD-NEXT:    fmov s0, w8
+; CHECK-SD-NEXT:    mov x8, #-34359738368 // =0xfffffff800000000
+; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x8, x8, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s0
+; CHECK-SD-NEXT:    csel x8, x10, x8, gt
+; CHECK-SD-NEXT:    csinv x9, x9, xzr, le
+; CHECK-SD-NEXT:    fcmp s8, s8
+; CHECK-SD-NEXT:    csel x0, xzr, x9, vs
+; CHECK-SD-NEXT:    csel x1, xzr, x8, vs
+; CHECK-SD-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_i100_f32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-GI-NEXT:    fmov s8, s0
+; CHECK-GI-NEXT:    bl __fixsfti
+; CHECK-GI-NEXT:    movi v0.2s, #241, lsl #24
+; CHECK-GI-NEXT:    mov w8, #1895825407 // =0x70ffffff
+; CHECK-GI-NEXT:    mov x10, #34359738367 // =0x7ffffffff
+; CHECK-GI-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    fcmp s8, s0
+; CHECK-GI-NEXT:    fmov s0, w8
+; CHECK-GI-NEXT:    mov x8, #34359738368 // =0x800000000
+; CHECK-GI-NEXT:    csel x9, xzr, x0, lt
+; CHECK-GI-NEXT:    csel x8, x8, x1, lt
+; CHECK-GI-NEXT:    fcmp s8, s0
+; CHECK-GI-NEXT:    csinv x9, x9, xzr, le
+; CHECK-GI-NEXT:    csel x8, x10, x8, gt
+; CHECK-GI-NEXT:    fcmp s8, s8
+; CHECK-GI-NEXT:    csel x0, xzr, x9, vs
+; CHECK-GI-NEXT:    csel x1, xzr, x8, vs
+; CHECK-GI-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
     %x = call i100 @llvm.fptosi.sat.i100.f32(float %f)
     ret i100 %x
 }
 
 define i128 @test_signed_i128_f32(float %f) nounwind {
-; CHECK-LABEL: test_signed_i128_f32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
-; CHECK-NEXT:    fmov s8, s0
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    movi v0.2s, #255, lsl #24
-; CHECK-NEXT:    mov w8, #2130706431 // =0x7effffff
-; CHECK-NEXT:    mov x10, #9223372036854775807 // =0x7fffffffffffffff
-; CHECK-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT:    fcmp s8, s0
-; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    mov x8, #-9223372036854775808 // =0x8000000000000000
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    csel x8, x8, x1, lt
-; CHECK-NEXT:    fcmp s8, s0
-; CHECK-NEXT:    csel x8, x10, x8, gt
-; CHECK-NEXT:    csinv x9, x9, xzr, le
-; CHECK-NEXT:    fcmp s8, s8
-; CHECK-NEXT:    csel x0, xzr, x9, vs
-; CHECK-NEXT:    csel x1, xzr, x8, vs
-; CHECK-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_i128_f32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    fmov s8, s0
+; CHECK-SD-NEXT:    bl __fixsfti
+; CHECK-SD-NEXT:    movi v0.2s, #255, lsl #24
+; CHECK-SD-NEXT:    mov w8, #2130706431 // =0x7effffff
+; CHECK-SD-NEXT:    mov x10, #9223372036854775807 // =0x7fffffffffffffff
+; CHECK-SD-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    fcmp s8, s0
+; CHECK-SD-NEXT:    fmov s0, w8
+; CHECK-SD-NEXT:    mov x8, #-9223372036854775808 // =0x8000000000000000
+; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x8, x8, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s0
+; CHECK-SD-NEXT:    csel x8, x10, x8, gt
+; CHECK-SD-NEXT:    csinv x9, x9, xzr, le
+; CHECK-SD-NEXT:    fcmp s8, s8
+; CHECK-SD-NEXT:    csel x0, xzr, x9, vs
+; CHECK-SD-NEXT:    csel x1, xzr, x8, vs
+; CHECK-SD-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_i128_f32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-GI-NEXT:    fmov s8, s0
+; CHECK-GI-NEXT:    bl __fixsfti
+; CHECK-GI-NEXT:    movi v0.2s, #255, lsl #24
+; CHECK-GI-NEXT:    mov w8, #2130706431 // =0x7effffff
+; CHECK-GI-NEXT:    mov x10, #9223372036854775807 // =0x7fffffffffffffff
+; CHECK-GI-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    fcmp s8, s0
+; CHECK-GI-NEXT:    fmov s0, w8
+; CHECK-GI-NEXT:    mov x8, #-9223372036854775808 // =0x8000000000000000
+; CHECK-GI-NEXT:    csel x9, xzr, x0, lt
+; CHECK-GI-NEXT:    csel x8, x8, x1, lt
+; CHECK-GI-NEXT:    fcmp s8, s0
+; CHECK-GI-NEXT:    csinv x9, x9, xzr, le
+; CHECK-GI-NEXT:    csel x8, x10, x8, gt
+; CHECK-GI-NEXT:    fcmp s8, s8
+; CHECK-GI-NEXT:    csel x0, xzr, x9, vs
+; CHECK-GI-NEXT:    csel x1, xzr, x8, vs
+; CHECK-GI-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
     %x = call i128 @llvm.fptosi.sat.i128.f32(float %f)
     ret i128 %x
 }
@@ -194,13 +265,23 @@ declare i100 @llvm.fptosi.sat.i100.f64(double)
 declare i128 @llvm.fptosi.sat.i128.f64(double)
 
 define i1 @test_signed_i1_f64(double %f) nounwind {
-; CHECK-LABEL: test_signed_i1_f64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs w8, d0
-; CHECK-NEXT:    ands w8, w8, w8, asr #31
-; CHECK-NEXT:    csinv w8, w8, wzr, ge
-; CHECK-NEXT:    and w0, w8, #0x1
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_i1_f64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs w8, d0
+; CHECK-SD-NEXT:    ands w8, w8, w8, asr #31
+; CHECK-SD-NEXT:    csinv w8, w8, wzr, ge
+; CHECK-SD-NEXT:    and w0, w8, #0x1
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_i1_f64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcvtzs w8, d0
+; CHECK-GI-NEXT:    cmp w8, #0
+; CHECK-GI-NEXT:    csel w8, w8, wzr, lt
+; CHECK-GI-NEXT:    cmp w8, #0
+; CHECK-GI-NEXT:    csinv w8, w8, wzr, ge
+; CHECK-GI-NEXT:    and w0, w8, #0x1
+; CHECK-GI-NEXT:    ret
     %x = call i1 @llvm.fptosi.sat.i1.f64(double %f)
     ret i1 %x
 }
@@ -275,16 +356,27 @@ define i32 @test_signed_i32_f64(double %f) nounwind {
 }
 
 define i50 @test_signed_i50_f64(double %f) nounwind {
-; CHECK-LABEL: test_signed_i50_f64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs x8, d0
-; CHECK-NEXT:    mov x9, #562949953421311 // =0x1ffffffffffff
-; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    csel x8, x8, x9, lt
-; CHECK-NEXT:    mov x9, #-562949953421312 // =0xfffe000000000000
-; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    csel x0, x8, x9, gt
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_i50_f64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs x8, d0
+; CHECK-SD-NEXT:    mov x9, #562949953421311 // =0x1ffffffffffff
+; CHECK-SD-NEXT:    cmp x8, x9
+; CHECK-SD-NEXT:    csel x8, x8, x9, lt
+; CHECK-SD-NEXT:    mov x9, #-562949953421312 // =0xfffe000000000000
+; CHECK-SD-NEXT:    cmp x8, x9
+; CHECK-SD-NEXT:    csel x0, x8, x9, gt
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_i50_f64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcvtzs x8, d0
+; CHECK-GI-NEXT:    mov x9, #562949953421311 // =0x1ffffffffffff
+; CHECK-GI-NEXT:    mov x10, #-562949953421312 // =0xfffe000000000000
+; CHECK-GI-NEXT:    cmp x8, x9
+; CHECK-GI-NEXT:    csel x8, x8, x9, lt
+; CHECK-GI-NEXT:    cmp x8, x10
+; CHECK-GI-NEXT:    csel x0, x8, x10, gt
+; CHECK-GI-NEXT:    ret
     %x = call i50 @llvm.fptosi.sat.i50.f64(double %f)
     ret i50 %x
 }
@@ -299,59 +391,109 @@ define i64 @test_signed_i64_f64(double %f) nounwind {
 }
 
 define i100 @test_signed_i100_f64(double %f) nounwind {
-; CHECK-LABEL: test_signed_i100_f64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
-; CHECK-NEXT:    fmov d8, d0
-; CHECK-NEXT:    bl __fixdfti
-; CHECK-NEXT:    mov x8, #-4170333254945079296 // =0xc620000000000000
-; CHECK-NEXT:    mov x10, #34359738367 // =0x7ffffffff
-; CHECK-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    mov x8, #5053038781909696511 // =0x461fffffffffffff
-; CHECK-NEXT:    fcmp d8, d0
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    mov x8, #-34359738368 // =0xfffffff800000000
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    csel x8, x8, x1, lt
-; CHECK-NEXT:    fcmp d8, d0
-; CHECK-NEXT:    csel x8, x10, x8, gt
-; CHECK-NEXT:    csinv x9, x9, xzr, le
-; CHECK-NEXT:    fcmp d8, d8
-; CHECK-NEXT:    csel x0, xzr, x9, vs
-; CHECK-NEXT:    csel x1, xzr, x8, vs
-; CHECK-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_i100_f64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    fmov d8, d0
+; CHECK-SD-NEXT:    bl __fixdfti
+; CHECK-SD-NEXT:    mov x8, #-4170333254945079296 // =0xc620000000000000
+; CHECK-SD-NEXT:    mov x10, #34359738367 // =0x7ffffffff
+; CHECK-SD-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    fmov d0, x8
+; CHECK-SD-NEXT:    mov x8, #5053038781909696511 // =0x461fffffffffffff
+; CHECK-SD-NEXT:    fcmp d8, d0
+; CHECK-SD-NEXT:    fmov d0, x8
+; CHECK-SD-NEXT:    mov x8, #-34359738368 // =0xfffffff800000000
+; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x8, x8, x1, lt
+; CHECK-SD-NEXT:    fcmp d8, d0
+; CHECK-SD-NEXT:    csel x8, x10, x8, gt
+; CHECK-SD-NEXT:    csinv x9, x9, xzr, le
+; CHECK-SD-NEXT:    fcmp d8, d8
+; CHECK-SD-NEXT:    csel x0, xzr, x9, vs
+; CHECK-SD-NEXT:    csel x1, xzr, x8, vs
+; CHECK-SD-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_i100_f64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-GI-NEXT:    fmov d8, d0
+; CHECK-GI-NEXT:    bl __fixdfti
+; CHECK-GI-NEXT:    mov x8, #-4170333254945079296 // =0xc620000000000000
+; CHECK-GI-NEXT:    mov x10, #34359738367 // =0x7ffffffff
+; CHECK-GI-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    fmov d0, x8
+; CHECK-GI-NEXT:    mov x8, #5053038781909696511 // =0x461fffffffffffff
+; CHECK-GI-NEXT:    fcmp d8, d0
+; CHECK-GI-NEXT:    fmov d0, x8
+; CHECK-GI-NEXT:    mov x8, #34359738368 // =0x800000000
+; CHECK-GI-NEXT:    csel x9, xzr, x0, lt
+; CHECK-GI-NEXT:    csel x8, x8, x1, lt
+; CHECK-GI-NEXT:    fcmp d8, d0
+; CHECK-GI-NEXT:    csinv x9, x9, xzr, le
+; CHECK-GI-NEXT:    csel x8, x10, x8, gt
+; CHECK-GI-NEXT:    fcmp d8, d8
+; CHECK-GI-NEXT:    csel x0, xzr, x9, vs
+; CHECK-GI-NEXT:    csel x1, xzr, x8, vs
+; CHECK-GI-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
     %x = call i100 @llvm.fptosi.sat.i100.f64(double %f)
     ret i100 %x
 }
 
 define i128 @test_signed_i128_f64(double %f) nounwind {
-; CHECK-LABEL: test_signed_i128_f64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
-; CHECK-NEXT:    fmov d8, d0
-; CHECK-NEXT:    bl __fixdfti
-; CHECK-NEXT:    mov x8, #-4044232465378705408 // =0xc7e0000000000000
-; CHECK-NEXT:    mov x10, #9223372036854775807 // =0x7fffffffffffffff
-; CHECK-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    mov x8, #5179139571476070399 // =0x47dfffffffffffff
-; CHECK-NEXT:    fcmp d8, d0
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    mov x8, #-9223372036854775808 // =0x8000000000000000
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    csel x8, x8, x1, lt
-; CHECK-NEXT:    fcmp d8, d0
-; CHECK-NEXT:    csel x8, x10, x8, gt
-; CHECK-NEXT:    csinv x9, x9, xzr, le
-; CHECK-NEXT:    fcmp d8, d8
-; CHECK-NEXT:    csel x0, xzr, x9, vs
-; CHECK-NEXT:    csel x1, xzr, x8, vs
-; CHECK-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_i128_f64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    fmov d8, d0
+; CHECK-SD-NEXT:    bl __fixdfti
+; CHECK-SD-NEXT:    mov x8, #-4044232465378705408 // =0xc7e0000000000000
+; CHECK-SD-NEXT:    mov x10, #9223372036854775807 // =0x7fffffffffffffff
+; CHECK-SD-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    fmov d0, x8
+; CHECK-SD-NEXT:    mov x8, #5179139571476070399 // =0x47dfffffffffffff
+; CHECK-SD-NEXT:    fcmp d8, d0
+; CHECK-SD-NEXT:    fmov d0, x8
+; CHECK-SD-NEXT:    mov x8, #-9223372036854775808 // =0x8000000000000000
+; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x8, x8, x1, lt
+; CHECK-SD-NEXT:    fcmp d8, d0
+; CHECK-SD-NEXT:    csel x8, x10, x8, gt
+; CHECK-SD-NEXT:    csinv x9, x9, xzr, le
+; CHECK-SD-NEXT:    fcmp d8, d8
+; CHECK-SD-NEXT:    csel x0, xzr, x9, vs
+; CHECK-SD-NEXT:    csel x1, xzr, x8, vs
+; CHECK-SD-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_i128_f64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-GI-NEXT:    fmov d8, d0
+; CHECK-GI-NEXT:    bl __fixdfti
+; CHECK-GI-NEXT:    mov x8, #-4044232465378705408 // =0xc7e0000000000000
+; CHECK-GI-NEXT:    mov x10, #9223372036854775807 // =0x7fffffffffffffff
+; CHECK-GI-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    fmov d0, x8
+; CHECK-GI-NEXT:    mov x8, #5179139571476070399 // =0x47dfffffffffffff
+; CHECK-GI-NEXT:    fcmp d8, d0
+; CHECK-GI-NEXT:    fmov d0, x8
+; CHECK-GI-NEXT:    mov x8, #-9223372036854775808 // =0x8000000000000000
+; CHECK-GI-NEXT:    csel x9, xzr, x0, lt
+; CHECK-GI-NEXT:    csel x8, x8, x1, lt
+; CHECK-GI-NEXT:    fcmp d8, d0
+; CHECK-GI-NEXT:    csinv x9, x9, xzr, le
+; CHECK-GI-NEXT:    csel x8, x10, x8, gt
+; CHECK-GI-NEXT:    fcmp d8, d8
+; CHECK-GI-NEXT:    csel x0, xzr, x9, vs
+; CHECK-GI-NEXT:    csel x1, xzr, x8, vs
+; CHECK-GI-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
     %x = call i128 @llvm.fptosi.sat.i128.f64(double %f)
     ret i128 %x
 }
@@ -372,245 +514,515 @@ declare i100 @llvm.fptosi.sat.i100.f16(half)
 declare i128 @llvm.fptosi.sat.i128.f16(half)
 
 define i1 @test_signed_i1_f16(half %f) nounwind {
-; CHECK-CVT-LABEL: test_signed_i1_f16:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    fcvtzs w8, s0
-; CHECK-CVT-NEXT:    ands w8, w8, w8, asr #31
-; CHECK-CVT-NEXT:    csinv w8, w8, wzr, ge
-; CHECK-CVT-NEXT:    and w0, w8, #0x1
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_signed_i1_f16:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzs w8, h0
-; CHECK-FP16-NEXT:    ands w8, w8, w8, asr #31
-; CHECK-FP16-NEXT:    csinv w8, w8, wzr, ge
-; CHECK-FP16-NEXT:    and w0, w8, #0x1
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_signed_i1_f16:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvt s0, h0
+; CHECK-SD-CVT-NEXT:    fcvtzs w8, s0
+; CHECK-SD-CVT-NEXT:    ands w8, w8, w8, asr #31
+; CHECK-SD-CVT-NEXT:    csinv w8, w8, wzr, ge
+; CHECK-SD-CVT-NEXT:    and w0, w8, #0x1
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_signed_i1_f16:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzs w8, h0
+; CHECK-SD-FP16-NEXT:    ands w8, w8, w8, asr #31
+; CHECK-SD-FP16-NEXT:    csinv w8, w8, wzr, ge
+; CHECK-SD-FP16-NEXT:    and w0, w8, #0x1
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_i1_f16:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    fcvtzs w8, s0
+; CHECK-GI-CVT-NEXT:    cmp w8, #0
+; CHECK-GI-CVT-NEXT:    csel w8, w8, wzr, lt
+; CHECK-GI-CVT-NEXT:    cmp w8, #0
+; CHECK-GI-CVT-NEXT:    csinv w8, w8, wzr, ge
+; CHECK-GI-CVT-NEXT:    and w0, w8, #0x1
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_signed_i1_f16:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzs w8, h0
+; CHECK-GI-FP16-NEXT:    cmp w8, #0
+; CHECK-GI-FP16-NEXT:    csel w8, w8, wzr, lt
+; CHECK-GI-FP16-NEXT:    cmp w8, #0
+; CHECK-GI-FP16-NEXT:    csinv w8, w8, wzr, ge
+; CHECK-GI-FP16-NEXT:    and w0, w8, #0x1
+; CHECK-GI-FP16-NEXT:    ret
     %x = call i1 @llvm.fptosi.sat.i1.f16(half %f)
     ret i1 %x
 }
 
 define i8 @test_signed_i8_f16(half %f) nounwind {
-; CHECK-CVT-LABEL: test_signed_i8_f16:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    mov w8, #127 // =0x7f
-; CHECK-CVT-NEXT:    fcvtzs w9, s0
-; CHECK-CVT-NEXT:    cmp w9, #127
-; CHECK-CVT-NEXT:    csel w8, w9, w8, lt
-; CHECK-CVT-NEXT:    mov w9, #-128 // =0xffffff80
-; CHECK-CVT-NEXT:    cmn w8, #128
-; CHECK-CVT-NEXT:    csel w0, w8, w9, gt
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_signed_i8_f16:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzs w9, h0
-; CHECK-FP16-NEXT:    mov w8, #127 // =0x7f
-; CHECK-FP16-NEXT:    cmp w9, #127
-; CHECK-FP16-NEXT:    csel w8, w9, w8, lt
-; CHECK-FP16-NEXT:    mov w9, #-128 // =0xffffff80
-; CHECK-FP16-NEXT:    cmn w8, #128
-; CHECK-FP16-NEXT:    csel w0, w8, w9, gt
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_signed_i8_f16:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvt s0, h0
+; CHECK-SD-CVT-NEXT:    mov w8, #127 // =0x7f
+; CHECK-SD-CVT-NEXT:    fcvtzs w9, s0
+; CHECK-SD-CVT-NEXT:    cmp w9, #127
+; CHECK-SD-CVT-NEXT:    csel w8, w9, w8, lt
+; CHECK-SD-CVT-NEXT:    mov w9, #-128 // =0xffffff80
+; CHECK-SD-CVT-NEXT:    cmn w8, #128
+; CHECK-SD-CVT-NEXT:    csel w0, w8, w9, gt
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_signed_i8_f16:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzs w9, h0
+; CHECK-SD-FP16-NEXT:    mov w8, #127 // =0x7f
+; CHECK-SD-FP16-NEXT:    cmp w9, #127
+; CHECK-SD-FP16-NEXT:    csel w8, w9, w8, lt
+; CHECK-SD-FP16-NEXT:    mov w9, #-128 // =0xffffff80
+; CHECK-SD-FP16-NEXT:    cmn w8, #128
+; CHECK-SD-FP16-NEXT:    csel w0, w8, w9, gt
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_i8_f16:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    mov w8, #127 // =0x7f
+; CHECK-GI-CVT-NEXT:    fcvtzs w9, s0
+; CHECK-GI-CVT-NEXT:    cmp w9, #127
+; CHECK-GI-CVT-NEXT:    csel w8, w9, w8, lt
+; CHECK-GI-CVT-NEXT:    mov w9, #-128 // =0xffffff80
+; CHECK-GI-CVT-NEXT:    cmn w8, #128
+; CHECK-GI-CVT-NEXT:    csel w0, w8, w9, gt
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_signed_i8_f16:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzs w9, h0
+; CHECK-GI-FP16-NEXT:    mov w8, #127 // =0x7f
+; CHECK-GI-FP16-NEXT:    cmp w9, #127
+; CHECK-GI-FP16-NEXT:    csel w8, w9, w8, lt
+; CHECK-GI-FP16-NEXT:    mov w9, #-128 // =0xffffff80
+; CHECK-GI-FP16-NEXT:    cmn w8, #128
+; CHECK-GI-FP16-NEXT:    csel w0, w8, w9, gt
+; CHECK-GI-FP16-NEXT:    ret
     %x = call i8 @llvm.fptosi.sat.i8.f16(half %f)
     ret i8 %x
 }
 
 define i13 @test_signed_i13_f16(half %f) nounwind {
-; CHECK-CVT-LABEL: test_signed_i13_f16:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    mov w8, #4095 // =0xfff
-; CHECK-CVT-NEXT:    fcvtzs w9, s0
-; CHECK-CVT-NEXT:    cmp w9, #4095
-; CHECK-CVT-NEXT:    csel w8, w9, w8, lt
-; CHECK-CVT-NEXT:    mov w9, #-4096 // =0xfffff000
-; CHECK-CVT-NEXT:    cmn w8, #1, lsl #12 // =4096
-; CHECK-CVT-NEXT:    csel w0, w8, w9, gt
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_signed_i13_f16:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzs w9, h0
-; CHECK-FP16-NEXT:    mov w8, #4095 // =0xfff
-; CHECK-FP16-NEXT:    cmp w9, #4095
-; CHECK-FP16-NEXT:    csel w8, w9, w8, lt
-; CHECK-FP16-NEXT:    mov w9, #-4096 // =0xfffff000
-; CHECK-FP16-NEXT:    cmn w8, #1, lsl #12 // =4096
-; CHECK-FP16-NEXT:    csel w0, w8, w9, gt
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_signed_i13_f16:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvt s0, h0
+; CHECK-SD-CVT-NEXT:    mov w8, #4095 // =0xfff
+; CHECK-SD-CVT-NEXT:    fcvtzs w9, s0
+; CHECK-SD-CVT-NEXT:    cmp w9, #4095
+; CHECK-SD-CVT-NEXT:    csel w8, w9, w8, lt
+; CHECK-SD-CVT-NEXT:    mov w9, #-4096 // =0xfffff000
+; CHECK-SD-CVT-NEXT:    cmn w8, #1, lsl #12 // =4096
+; CHECK-SD-CVT-NEXT:    csel w0, w8, w9, gt
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_signed_i13_f16:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzs w9, h0
+; CHECK-SD-FP16-NEXT:    mov w8, #4095 // =0xfff
+; CHECK-SD-FP16-NEXT:    cmp w9, #4095
+; CHECK-SD-FP16-NEXT:    csel w8, w9, w8, lt
+; CHECK-SD-FP16-NEXT:    mov w9, #-4096 // =0xfffff000
+; CHECK-SD-FP16-NEXT:    cmn w8, #1, lsl #12 // =4096
+; CHECK-SD-FP16-NEXT:    csel w0, w8, w9, gt
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_i13_f16:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    mov w8, #4095 // =0xfff
+; CHECK-GI-CVT-NEXT:    fcvtzs w9, s0
+; CHECK-GI-CVT-NEXT:    cmp w9, #4095
+; CHECK-GI-CVT-NEXT:    csel w8, w9, w8, lt
+; CHECK-GI-CVT-NEXT:    mov w9, #-4096 // =0xfffff000
+; CHECK-GI-CVT-NEXT:    cmn w8, #1, lsl #12 // =4096
+; CHECK-GI-CVT-NEXT:    csel w0, w8, w9, gt
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_signed_i13_f16:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzs w9, h0
+; CHECK-GI-FP16-NEXT:    mov w8, #4095 // =0xfff
+; CHECK-GI-FP16-NEXT:    cmp w9, #4095
+; CHECK-GI-FP16-NEXT:    csel w8, w9, w8, lt
+; CHECK-GI-FP16-NEXT:    mov w9, #-4096 // =0xfffff000
+; CHECK-GI-FP16-NEXT:    cmn w8, #1, lsl #12 // =4096
+; CHECK-GI-FP16-NEXT:    csel w0, w8, w9, gt
+; CHECK-GI-FP16-NEXT:    ret
     %x = call i13 @llvm.fptosi.sat.i13.f16(half %f)
     ret i13 %x
 }
 
 define i16 @test_signed_i16_f16(half %f) nounwind {
-; CHECK-CVT-LABEL: test_signed_i16_f16:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    mov w9, #32767 // =0x7fff
-; CHECK-CVT-NEXT:    fcvtzs w8, s0
-; CHECK-CVT-NEXT:    cmp w8, w9
-; CHECK-CVT-NEXT:    csel w8, w8, w9, lt
-; CHECK-CVT-NEXT:    mov w9, #-32768 // =0xffff8000
-; CHECK-CVT-NEXT:    cmn w8, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT:    csel w0, w8, w9, gt
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_signed_i16_f16:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzs w8, h0
-; CHECK-FP16-NEXT:    mov w9, #32767 // =0x7fff
-; CHECK-FP16-NEXT:    cmp w8, w9
-; CHECK-FP16-NEXT:    csel w8, w8, w9, lt
-; CHECK-FP16-NEXT:    mov w9, #-32768 // =0xffff8000
-; CHECK-FP16-NEXT:    cmn w8, #8, lsl #12 // =32768
-; CHECK-FP16-NEXT:    csel w0, w8, w9, gt
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_signed_i16_f16:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvt s0, h0
+; CHECK-SD-CVT-NEXT:    mov w9, #32767 // =0x7fff
+; CHECK-SD-CVT-NEXT:    fcvtzs w8, s0
+; CHECK-SD-CVT-NEXT:    cmp w8, w9
+; CHECK-SD-CVT-NEXT:    csel w8, w8, w9, lt
+; CHECK-SD-CVT-NEXT:    mov w9, #-32768 // =0xffff8000
+; CHECK-SD-CVT-NEXT:    cmn w8, #8, lsl #12 // =32768
+; CHECK-SD-CVT-NEXT:    csel w0, w8, w9, gt
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_signed_i16_f16:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzs w8, h0
+; CHECK-SD-FP16-NEXT:    mov w9, #32767 // =0x7fff
+; CHECK-SD-FP16-NEXT:    cmp w8, w9
+; CHECK-SD-FP16-NEXT:    csel w8, w8, w9, lt
+; CHECK-SD-FP16-NEXT:    mov w9, #-32768 // =0xffff8000
+; CHECK-SD-FP16-NEXT:    cmn w8, #8, lsl #12 // =32768
+; CHECK-SD-FP16-NEXT:    csel w0, w8, w9, gt
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_i16_f16:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    mov w9, #32767 // =0x7fff
+; CHECK-GI-CVT-NEXT:    fcvtzs w8, s0
+; CHECK-GI-CVT-NEXT:    cmp w8, w9
+; CHECK-GI-CVT-NEXT:    csel w8, w8, w9, lt
+; CHECK-GI-CVT-NEXT:    mov w9, #-32768 // =0xffff8000
+; CHECK-GI-CVT-NEXT:    cmn w8, #8, lsl #12 // =32768
+; CHECK-GI-CVT-NEXT:    csel w0, w8, w9, gt
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_signed_i16_f16:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzs w8, h0
+; CHECK-GI-FP16-NEXT:    mov w9, #32767 // =0x7fff
+; CHECK-GI-FP16-NEXT:    cmp w8, w9
+; CHECK-GI-FP16-NEXT:    csel w8, w8, w9, lt
+; CHECK-GI-FP16-NEXT:    mov w9, #-32768 // =0xffff8000
+; CHECK-GI-FP16-NEXT:    cmn w8, #8, lsl #12 // =32768
+; CHECK-GI-FP16-NEXT:    csel w0, w8, w9, gt
+; CHECK-GI-FP16-NEXT:    ret
     %x = call i16 @llvm.fptosi.sat.i16.f16(half %f)
     ret i16 %x
 }
 
 define i19 @test_signed_i19_f16(half %f) nounwind {
-; CHECK-CVT-LABEL: test_signed_i19_f16:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    mov w9, #262143 // =0x3ffff
-; CHECK-CVT-NEXT:    fcvtzs w8, s0
-; CHECK-CVT-NEXT:    cmp w8, w9
-; CHECK-CVT-NEXT:    csel w8, w8, w9, lt
-; CHECK-CVT-NEXT:    mov w9, #-262144 // =0xfffc0000
-; CHECK-CVT-NEXT:    cmn w8, #64, lsl #12 // =262144
-; CHECK-CVT-NEXT:    csel w0, w8, w9, gt
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_signed_i19_f16:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzs w8, h0
-; CHECK-FP16-NEXT:    mov w9, #262143 // =0x3ffff
-; CHECK-FP16-NEXT:    cmp w8, w9
-; CHECK-FP16-NEXT:    csel w8, w8, w9, lt
-; CHECK-FP16-NEXT:    mov w9, #-262144 // =0xfffc0000
-; CHECK-FP16-NEXT:    cmn w8, #64, lsl #12 // =262144
-; CHECK-FP16-NEXT:    csel w0, w8, w9, gt
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_signed_i19_f16:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvt s0, h0
+; CHECK-SD-CVT-NEXT:    mov w9, #262143 // =0x3ffff
+; CHECK-SD-CVT-NEXT:    fcvtzs w8, s0
+; CHECK-SD-CVT-NEXT:    cmp w8, w9
+; CHECK-SD-CVT-NEXT:    csel w8, w8, w9, lt
+; CHECK-SD-CVT-NEXT:    mov w9, #-262144 // =0xfffc0000
+; CHECK-SD-CVT-NEXT:    cmn w8, #64, lsl #12 // =262144
+; CHECK-SD-CVT-NEXT:    csel w0, w8, w9, gt
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_signed_i19_f16:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzs w8, h0
+; CHECK-SD-FP16-NEXT:    mov w9, #262143 // =0x3ffff
+; CHECK-SD-FP16-NEXT:    cmp w8, w9
+; CHECK-SD-FP16-NEXT:    csel w8, w8, w9, lt
+; CHECK-SD-FP16-NEXT:    mov w9, #-262144 // =0xfffc0000
+; CHECK-SD-FP16-NEXT:    cmn w8, #64, lsl #12 // =262144
+; CHECK-SD-FP16-NEXT:    csel w0, w8, w9, gt
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_i19_f16:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    mov w9, #262143 // =0x3ffff
+; CHECK-GI-CVT-NEXT:    fcvtzs w8, s0
+; CHECK-GI-CVT-NEXT:    cmp w8, w9
+; CHECK-GI-CVT-NEXT:    csel w8, w8, w9, lt
+; CHECK-GI-CVT-NEXT:    mov w9, #-262144 // =0xfffc0000
+; CHECK-GI-CVT-NEXT:    cmn w8, #64, lsl #12 // =262144
+; CHECK-GI-CVT-NEXT:    csel w0, w8, w9, gt
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_signed_i19_f16:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzs w8, h0
+; CHECK-GI-FP16-NEXT:    mov w9, #262143 // =0x3ffff
+; CHECK-GI-FP16-NEXT:    cmp w8, w9
+; CHECK-GI-FP16-NEXT:    csel w8, w8, w9, lt
+; CHECK-GI-FP16-NEXT:    mov w9, #-262144 // =0xfffc0000
+; CHECK-GI-FP16-NEXT:    cmn w8, #64, lsl #12 // =262144
+; CHECK-GI-FP16-NEXT:    csel w0, w8, w9, gt
+; CHECK-GI-FP16-NEXT:    ret
     %x = call i19 @llvm.fptosi.sat.i19.f16(half %f)
     ret i19 %x
 }
 
 define i32 @test_signed_i32_f16(half %f) nounwind {
-; CHECK-CVT-LABEL: test_signed_i32_f16:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    fcvtzs w0, s0
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_signed_i32_f16:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzs w0, h0
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_signed_i32_f16:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvt s0, h0
+; CHECK-SD-CVT-NEXT:    fcvtzs w0, s0
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_signed_i32_f16:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzs w0, h0
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_i32_f16:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    fcvtzs w0, s0
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_signed_i32_f16:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzs w0, h0
+; CHECK-GI-FP16-NEXT:    ret
     %x = call i32 @llvm.fptosi.sat.i32.f16(half %f)
     ret i32 %x
 }
 
 define i50 @test_signed_i50_f16(half %f) nounwind {
-; CHECK-CVT-LABEL: test_signed_i50_f16:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    mov x9, #562949953421311 // =0x1ffffffffffff
-; CHECK-CVT-NEXT:    fcvtzs x8, s0
-; CHECK-CVT-NEXT:    cmp x8, x9
-; CHECK-CVT-NEXT:    csel x8, x8, x9, lt
-; CHECK-CVT-NEXT:    mov x9, #-562949953421312 // =0xfffe000000000000
-; CHECK-CVT-NEXT:    cmp x8, x9
-; CHECK-CVT-NEXT:    csel x0, x8, x9, gt
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_signed_i50_f16:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzs x8, h0
-; CHECK-FP16-NEXT:    mov x9, #562949953421311 // =0x1ffffffffffff
-; CHECK-FP16-NEXT:    cmp x8, x9
-; CHECK-FP16-NEXT:    csel x8, x8, x9, lt
-; CHECK-FP16-NEXT:    mov x9, #-562949953421312 // =0xfffe000000000000
-; CHECK-FP16-NEXT:    cmp x8, x9
-; CHECK-FP16-NEXT:    csel x0, x8, x9, gt
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_signed_i50_f16:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvt s0, h0
+; CHECK-SD-CVT-NEXT:    mov x9, #562949953421311 // =0x1ffffffffffff
+; CHECK-SD-CVT-NEXT:    fcvtzs x8, s0
+; CHECK-SD-CVT-NEXT:    cmp x8, x9
+; CHECK-SD-CVT-NEXT:    csel x8, x8, x9, lt
+; CHECK-SD-CVT-NEXT:    mov x9, #-562949953421312 // =0xfffe000000000000
+; CHECK-SD-CVT-NEXT:    cmp x8, x9
+; CHECK-SD-CVT-NEXT:    csel x0, x8, x9, gt
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_signed_i50_f16:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzs x8, h0
+; CHECK-SD-FP16-NEXT:    mov x9, #562949953421311 // =0x1ffffffffffff
+; CHECK-SD-FP16-NEXT:    cmp x8, x9
+; CHECK-SD-FP16-NEXT:    csel x8, x8, x9, lt
+; CHECK-SD-FP16-NEXT:    mov x9, #-562949953421312 // =0xfffe000000000000
+; CHECK-SD-FP16-NEXT:    cmp x8, x9
+; CHECK-SD-FP16-NEXT:    csel x0, x8, x9, gt
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_i50_f16:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    mov x9, #562949953421311 // =0x1ffffffffffff
+; CHECK-GI-CVT-NEXT:    mov x10, #-562949953421312 // =0xfffe000000000000
+; CHECK-GI-CVT-NEXT:    fcvtzs x8, s0
+; CHECK-GI-CVT-NEXT:    cmp x8, x9
+; CHECK-GI-CVT-NEXT:    csel x8, x8, x9, lt
+; CHECK-GI-CVT-NEXT:    cmp x8, x10
+; CHECK-GI-CVT-NEXT:    csel x0, x8, x10, gt
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_signed_i50_f16:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzs x8, h0
+; CHECK-GI-FP16-NEXT:    mov x9, #562949953421311 // =0x1ffffffffffff
+; CHECK-GI-FP16-NEXT:    mov x10, #-562949953421312 // =0xfffe000000000000
+; CHECK-GI-FP16-NEXT:    cmp x8, x9
+; CHECK-GI-FP16-NEXT:    csel x8, x8, x9, lt
+; CHECK-GI-FP16-NEXT:    cmp x8, x10
+; CHECK-GI-FP16-NEXT:    csel x0, x8, x10, gt
+; CHECK-GI-FP16-NEXT:    ret
     %x = call i50 @llvm.fptosi.sat.i50.f16(half %f)
     ret i50 %x
 }
 
 define i64 @test_signed_i64_f16(half %f) nounwind {
-; CHECK-CVT-LABEL: test_signed_i64_f16:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    fcvtzs x0, s0
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_signed_i64_f16:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzs x0, h0
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_signed_i64_f16:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvt s0, h0
+; CHECK-SD-CVT-NEXT:    fcvtzs x0, s0
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_signed_i64_f16:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzs x0, h0
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_i64_f16:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    fcvtzs x0, s0
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_signed_i64_f16:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzs x0, h0
+; CHECK-GI-FP16-NEXT:    ret
     %x = call i64 @llvm.fptosi.sat.i64.f16(half %f)
     ret i64 %x
 }
 
 define i100 @test_signed_i100_f16(half %f) nounwind {
-; CHECK-LABEL: test_signed_i100_f16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    movi v0.2s, #241, lsl #24
-; CHECK-NEXT:    mov w8, #1895825407 // =0x70ffffff
-; CHECK-NEXT:    mov x10, #34359738367 // =0x7ffffffff
-; CHECK-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT:    fcmp s8, s0
-; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    mov x8, #-34359738368 // =0xfffffff800000000
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    csel x8, x8, x1, lt
-; CHECK-NEXT:    fcmp s8, s0
-; CHECK-NEXT:    csel x8, x10, x8, gt
-; CHECK-NEXT:    csinv x9, x9, xzr, le
-; CHECK-NEXT:    fcmp s8, s8
-; CHECK-NEXT:    csel x0, xzr, x9, vs
-; CHECK-NEXT:    csel x1, xzr, x8, vs
-; CHECK-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_i100_f16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixsfti
+; CHECK-SD-NEXT:    movi v0.2s, #241, lsl #24
+; CHECK-SD-NEXT:    mov w8, #1895825407 // =0x70ffffff
+; CHECK-SD-NEXT:    mov x10, #34359738367 // =0x7ffffffff
+; CHECK-SD-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    fcmp s8, s0
+; CHECK-SD-NEXT:    fmov s0, w8
+; CHECK-SD-NEXT:    mov x8, #-34359738368 // =0xfffffff800000000
+; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x8, x8, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s0
+; CHECK-SD-NEXT:    csel x8, x10, x8, gt
+; CHECK-SD-NEXT:    csinv x9, x9, xzr, le
+; CHECK-SD-NEXT:    fcmp s8, s8
+; CHECK-SD-NEXT:    csel x0, xzr, x9, vs
+; CHECK-SD-NEXT:    csel x1, xzr, x8, vs
+; CHECK-SD-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_i100_f16:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    mov x1, xzr
+; CHECK-GI-CVT-NEXT:    fcvtzs x0, s0
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_signed_i100_f16:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzs x0, h0
+; CHECK-GI-FP16-NEXT:    mov x1, xzr
+; CHECK-GI-FP16-NEXT:    ret
     %x = call i100 @llvm.fptosi.sat.i100.f16(half %f)
     ret i100 %x
 }
 
 define i128 @test_signed_i128_f16(half %f) nounwind {
-; CHECK-LABEL: test_signed_i128_f16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    movi v0.2s, #255, lsl #24
-; CHECK-NEXT:    mov w8, #2130706431 // =0x7effffff
-; CHECK-NEXT:    mov x10, #9223372036854775807 // =0x7fffffffffffffff
-; CHECK-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT:    fcmp s8, s0
-; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    mov x8, #-9223372036854775808 // =0x8000000000000000
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    csel x8, x8, x1, lt
-; CHECK-NEXT:    fcmp s8, s0
-; CHECK-NEXT:    csel x8, x10, x8, gt
-; CHECK-NEXT:    csinv x9, x9, xzr, le
-; CHECK-NEXT:    fcmp s8, s8
-; CHECK-NEXT:    csel x0, xzr, x9, vs
-; CHECK-NEXT:    csel x1, xzr, x8, vs
-; CHECK-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_i128_f16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixsfti
+; CHECK-SD-NEXT:    movi v0.2s, #255, lsl #24
+; CHECK-SD-NEXT:    mov w8, #2130706431 // =0x7effffff
+; CHECK-SD-NEXT:    mov x10, #9223372036854775807 // =0x7fffffffffffffff
+; CHECK-SD-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    fcmp s8, s0
+; CHECK-SD-NEXT:    fmov s0, w8
+; CHECK-SD-NEXT:    mov x8, #-9223372036854775808 // =0x8000000000000000
+; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x8, x8, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s0
+; CHECK-SD-NEXT:    csel x8, x10, x8, gt
+; CHECK-SD-NEXT:    csinv x9, x9, xzr, le
+; CHECK-SD-NEXT:    fcmp s8, s8
+; CHECK-SD-NEXT:    csel x0, xzr, x9, vs
+; CHECK-SD-NEXT:    csel x1, xzr, x8, vs
+; CHECK-SD-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_i128_f16:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    mov x1, xzr
+; CHECK-GI-CVT-NEXT:    fcvtzs x0, s0
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_signed_i128_f16:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzs x0, h0
+; CHECK-GI-FP16-NEXT:    mov x1, xzr
+; CHECK-GI-FP16-NEXT:    ret
     %x = call i128 @llvm.fptosi.sat.i128.f16(half %f)
     ret i128 %x
 }
+
+define i32 @test_signed_f128_i32(fp128 %f) {
+; CHECK-SD-LABEL: test_signed_f128_i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sub sp, sp, #32
+; CHECK-SD-NEXT:    stp x30, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT:    .cfi_offset w19, -8
+; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    adrp x8, .LCPI30_0
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI30_0]
+; CHECK-SD-NEXT:    bl __getf2
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov w19, w0
+; CHECK-SD-NEXT:    bl __fixtfsi
+; CHECK-SD-NEXT:    cmp w19, #0
+; CHECK-SD-NEXT:    mov w8, #-2147483648 // =0x80000000
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csel w19, w8, w0, lt
+; CHECK-SD-NEXT:    adrp x8, .LCPI30_1
+; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI30_1]
+; CHECK-SD-NEXT:    bl __gttf2
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov w8, #2147483647 // =0x7fffffff
+; CHECK-SD-NEXT:    cmp w0, #0
+; CHECK-SD-NEXT:    csel w19, w8, w19, gt
+; CHECK-SD-NEXT:    mov v1.16b, v0.16b
+; CHECK-SD-NEXT:    bl __unordtf2
+; CHECK-SD-NEXT:    cmp w0, #0
+; CHECK-SD-NEXT:    csel w0, wzr, w19, ne
+; CHECK-SD-NEXT:    ldp x30, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    add sp, sp, #32
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_f128_i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    sub sp, sp, #64
+; CHECK-GI-NEXT:    stp d9, d8, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp x30, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-GI-NEXT:    .cfi_offset w19, -8
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    .cfi_offset b8, -24
+; CHECK-GI-NEXT:    .cfi_offset b9, -32
+; CHECK-GI-NEXT:    adrp x8, .LCPI30_1
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI30_1]
+; CHECK-GI-NEXT:    stp q1, q0, [sp] // 32-byte Folded Spill
+; CHECK-GI-NEXT:    bl __getf2
+; CHECK-GI-NEXT:    ldp q3, q2, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    mov d0, v2.d[1]
+; CHECK-GI-NEXT:    mov d1, v3.d[1]
+; CHECK-GI-NEXT:    fcsel d8, d2, d3, lt
+; CHECK-GI-NEXT:    fmov x8, d8
+; CHECK-GI-NEXT:    fcsel d9, d0, d1, lt
+; CHECK-GI-NEXT:    mov v0.d[0], x8
+; CHECK-GI-NEXT:    fmov x8, d9
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    adrp x8, .LCPI30_0
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI30_0]
+; CHECK-GI-NEXT:    str q1, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    bl __gttf2
+; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    mov d0, v1.d[1]
+; CHECK-GI-NEXT:    fcsel d1, d8, d1, gt
+; CHECK-GI-NEXT:    fmov x8, d1
+; CHECK-GI-NEXT:    fcsel d2, d9, d0, gt
+; CHECK-GI-NEXT:    mov v0.d[0], x8
+; CHECK-GI-NEXT:    fmov x8, d2
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    bl __fixtfsi
+; CHECK-GI-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov w19, w0
+; CHECK-GI-NEXT:    mov v1.16b, v0.16b
+; CHECK-GI-NEXT:    bl __unordtf2
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    csel w0, wzr, w19, ne
+; CHECK-GI-NEXT:    ldp x30, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    add sp, sp, #64
+; CHECK-GI-NEXT:    ret
+    %x = call i32 @llvm.fptosi.sat.i32.f128(fp128 %f)
+    ret i32 %x
+}
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
index 4626fd7f2b3dd9..ed7814938da25f 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
@@ -1,6 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-CVT
-; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
+; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-CVT
+; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16
+; RUN: llc < %s -mtriple=aarch64 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-CVT
+; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16
+
+; CHECK-GI:       warning: Instruction selection used fallback path for test_signed_v4f32_v4i50
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for test_signed_v4f16_v4i50
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for test_signed_v8f16_v8i19
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for test_signed_v8f16_v8i50
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for test_signed_v8f16_v8i128
 
 ;
 ; Float to signed 32-bit -- Vector size variation
@@ -16,10 +24,17 @@ declare <7 x i32> @llvm.fptosi.sat.v7f32.v7i32 (<7 x float>)
 declare <8 x i32> @llvm.fptosi.sat.v8f32.v8i32 (<8 x float>)
 
 define <1 x i32> @test_signed_v1f32_v1i32(<1 x float> %f) {
-; CHECK-LABEL: test_signed_v1f32_v1i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs v0.2s, v0.2s
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v1f32_v1i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs v0.2s, v0.2s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_v1f32_v1i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcvtzs w8, s0
+; CHECK-GI-NEXT:    mov v0.s[0], w8
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    ret
     %x = call <1 x i32> @llvm.fptosi.sat.v1f32.v1i32(<1 x float> %f)
     ret <1 x i32> %x
 }
@@ -52,79 +67,157 @@ define <4 x i32> @test_signed_v4f32_v4i32(<4 x float> %f) {
 }
 
 define <5 x i32> @test_signed_v5f32_v5i32(<5 x float> %f) {
-; CHECK-LABEL: test_signed_v5f32_v5i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
-; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
-; CHECK-NEXT:    // kill: def $s2 killed $s2 def $q2
-; CHECK-NEXT:    // kill: def $s3 killed $s3 def $q3
-; CHECK-NEXT:    // kill: def $s4 killed $s4 def $q4
-; CHECK-NEXT:    mov v0.s[1], v1.s[0]
-; CHECK-NEXT:    fcvtzs v4.4s, v4.4s
-; CHECK-NEXT:    mov v0.s[2], v2.s[0]
-; CHECK-NEXT:    fmov w4, s4
-; CHECK-NEXT:    mov v0.s[3], v3.s[0]
-; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-NEXT:    mov w1, v0.s[1]
-; CHECK-NEXT:    mov w2, v0.s[2]
-; CHECK-NEXT:    mov w3, v0.s[3]
-; CHECK-NEXT:    fmov w0, s0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v5f32_v5i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 def $q1
+; CHECK-SD-NEXT:    // kill: def $s2 killed $s2 def $q2
+; CHECK-SD-NEXT:    // kill: def $s3 killed $s3 def $q3
+; CHECK-SD-NEXT:    // kill: def $s4 killed $s4 def $q4
+; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-SD-NEXT:    fcvtzs v4.4s, v4.4s
+; CHECK-SD-NEXT:    mov v0.s[2], v2.s[0]
+; CHECK-SD-NEXT:    fmov w4, s4
+; CHECK-SD-NEXT:    mov v0.s[3], v3.s[0]
+; CHECK-SD-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-SD-NEXT:    mov w1, v0.s[1]
+; CHECK-SD-NEXT:    mov w2, v0.s[2]
+; CHECK-SD-NEXT:    mov w3, v0.s[3]
+; CHECK-SD-NEXT:    fmov w0, s0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_v5f32_v5i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 def $q1
+; CHECK-GI-NEXT:    // kill: def $s2 killed $s2 def $q2
+; CHECK-GI-NEXT:    // kill: def $s3 killed $s3 def $q3
+; CHECK-GI-NEXT:    // kill: def $s4 killed $s4 def $q4
+; CHECK-GI-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-GI-NEXT:    fcvtzs v1.4s, v4.4s
+; CHECK-GI-NEXT:    mov v0.s[2], v2.s[0]
+; CHECK-GI-NEXT:    fmov w4, s1
+; CHECK-GI-NEXT:    mov v0.s[3], v3.s[0]
+; CHECK-GI-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-GI-NEXT:    mov s2, v0.s[1]
+; CHECK-GI-NEXT:    mov s3, v0.s[2]
+; CHECK-GI-NEXT:    mov s4, v0.s[3]
+; CHECK-GI-NEXT:    fmov w0, s0
+; CHECK-GI-NEXT:    fmov w1, s2
+; CHECK-GI-NEXT:    fmov w2, s3
+; CHECK-GI-NEXT:    fmov w3, s4
+; CHECK-GI-NEXT:    ret
     %x = call <5 x i32> @llvm.fptosi.sat.v5f32.v5i32(<5 x float> %f)
     ret <5 x i32> %x
 }
 
 define <6 x i32> @test_signed_v6f32_v6i32(<6 x float> %f) {
-; CHECK-LABEL: test_signed_v6f32_v6i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
-; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
-; CHECK-NEXT:    // kill: def $s2 killed $s2 def $q2
-; CHECK-NEXT:    // kill: def $s4 killed $s4 def $q4
-; CHECK-NEXT:    // kill: def $s5 killed $s5 def $q5
-; CHECK-NEXT:    // kill: def $s3 killed $s3 def $q3
-; CHECK-NEXT:    mov v0.s[1], v1.s[0]
-; CHECK-NEXT:    mov v4.s[1], v5.s[0]
-; CHECK-NEXT:    mov v0.s[2], v2.s[0]
-; CHECK-NEXT:    fcvtzs v1.4s, v4.4s
-; CHECK-NEXT:    mov v0.s[3], v3.s[0]
-; CHECK-NEXT:    mov w5, v1.s[1]
-; CHECK-NEXT:    fmov w4, s1
-; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-NEXT:    mov w1, v0.s[1]
-; CHECK-NEXT:    mov w2, v0.s[2]
-; CHECK-NEXT:    mov w3, v0.s[3]
-; CHECK-NEXT:    fmov w0, s0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v6f32_v6i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 def $q1
+; CHECK-SD-NEXT:    // kill: def $s2 killed $s2 def $q2
+; CHECK-SD-NEXT:    // kill: def $s4 killed $s4 def $q4
+; CHECK-SD-NEXT:    // kill: def $s5 killed $s5 def $q5
+; CHECK-SD-NEXT:    // kill: def $s3 killed $s3 def $q3
+; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-SD-NEXT:    mov v4.s[1], v5.s[0]
+; CHECK-SD-NEXT:    mov v0.s[2], v2.s[0]
+; CHECK-SD-NEXT:    fcvtzs v1.4s, v4.4s
+; CHECK-SD-NEXT:    mov v0.s[3], v3.s[0]
+; CHECK-SD-NEXT:    mov w5, v1.s[1]
+; CHECK-SD-NEXT:    fmov w4, s1
+; CHECK-SD-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-SD-NEXT:    mov w1, v0.s[1]
+; CHECK-SD-NEXT:    mov w2, v0.s[2]
+; CHECK-SD-NEXT:    mov w3, v0.s[3]
+; CHECK-SD-NEXT:    fmov w0, s0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_v6f32_v6i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 def $q1
+; CHECK-GI-NEXT:    // kill: def $s2 killed $s2 def $q2
+; CHECK-GI-NEXT:    // kill: def $s4 killed $s4 def $q4
+; CHECK-GI-NEXT:    // kill: def $s3 killed $s3 def $q3
+; CHECK-GI-NEXT:    // kill: def $s5 killed $s5 def $q5
+; CHECK-GI-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-GI-NEXT:    mov v4.s[1], v5.s[0]
+; CHECK-GI-NEXT:    mov v0.s[2], v2.s[0]
+; CHECK-GI-NEXT:    fcvtzs v1.4s, v4.4s
+; CHECK-GI-NEXT:    mov v0.s[3], v3.s[0]
+; CHECK-GI-NEXT:    mov s4, v1.s[1]
+; CHECK-GI-NEXT:    fmov w4, s1
+; CHECK-GI-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-GI-NEXT:    fmov w5, s4
+; CHECK-GI-NEXT:    mov s2, v0.s[1]
+; CHECK-GI-NEXT:    mov s3, v0.s[2]
+; CHECK-GI-NEXT:    mov s5, v0.s[3]
+; CHECK-GI-NEXT:    fmov w0, s0
+; CHECK-GI-NEXT:    fmov w1, s2
+; CHECK-GI-NEXT:    fmov w2, s3
+; CHECK-GI-NEXT:    fmov w3, s5
+; CHECK-GI-NEXT:    ret
     %x = call <6 x i32> @llvm.fptosi.sat.v6f32.v6i32(<6 x float> %f)
     ret <6 x i32> %x
 }
 
 define <7 x i32> @test_signed_v7f32_v7i32(<7 x float> %f) {
-; CHECK-LABEL: test_signed_v7f32_v7i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
-; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
-; CHECK-NEXT:    // kill: def $s4 killed $s4 def $q4
-; CHECK-NEXT:    // kill: def $s5 killed $s5 def $q5
-; CHECK-NEXT:    // kill: def $s2 killed $s2 def $q2
-; CHECK-NEXT:    // kill: def $s6 killed $s6 def $q6
-; CHECK-NEXT:    // kill: def $s3 killed $s3 def $q3
-; CHECK-NEXT:    mov v0.s[1], v1.s[0]
-; CHECK-NEXT:    mov v4.s[1], v5.s[0]
-; CHECK-NEXT:    mov v0.s[2], v2.s[0]
-; CHECK-NEXT:    mov v4.s[2], v6.s[0]
-; CHECK-NEXT:    mov v0.s[3], v3.s[0]
-; CHECK-NEXT:    fcvtzs v1.4s, v4.4s
-; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-NEXT:    mov w5, v1.s[1]
-; CHECK-NEXT:    mov w6, v1.s[2]
-; CHECK-NEXT:    fmov w4, s1
-; CHECK-NEXT:    mov w1, v0.s[1]
-; CHECK-NEXT:    mov w2, v0.s[2]
-; CHECK-NEXT:    mov w3, v0.s[3]
-; CHECK-NEXT:    fmov w0, s0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v7f32_v7i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 def $q1
+; CHECK-SD-NEXT:    // kill: def $s4 killed $s4 def $q4
+; CHECK-SD-NEXT:    // kill: def $s5 killed $s5 def $q5
+; CHECK-SD-NEXT:    // kill: def $s2 killed $s2 def $q2
+; CHECK-SD-NEXT:    // kill: def $s6 killed $s6 def $q6
+; CHECK-SD-NEXT:    // kill: def $s3 killed $s3 def $q3
+; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-SD-NEXT:    mov v4.s[1], v5.s[0]
+; CHECK-SD-NEXT:    mov v0.s[2], v2.s[0]
+; CHECK-SD-NEXT:    mov v4.s[2], v6.s[0]
+; CHECK-SD-NEXT:    mov v0.s[3], v3.s[0]
+; CHECK-SD-NEXT:    fcvtzs v1.4s, v4.4s
+; CHECK-SD-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-SD-NEXT:    mov w5, v1.s[1]
+; CHECK-SD-NEXT:    mov w6, v1.s[2]
+; CHECK-SD-NEXT:    fmov w4, s1
+; CHECK-SD-NEXT:    mov w1, v0.s[1]
+; CHECK-SD-NEXT:    mov w2, v0.s[2]
+; CHECK-SD-NEXT:    mov w3, v0.s[3]
+; CHECK-SD-NEXT:    fmov w0, s0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_v7f32_v7i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 def $q1
+; CHECK-GI-NEXT:    // kill: def $s4 killed $s4 def $q4
+; CHECK-GI-NEXT:    // kill: def $s2 killed $s2 def $q2
+; CHECK-GI-NEXT:    // kill: def $s5 killed $s5 def $q5
+; CHECK-GI-NEXT:    // kill: def $s3 killed $s3 def $q3
+; CHECK-GI-NEXT:    // kill: def $s6 killed $s6 def $q6
+; CHECK-GI-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-GI-NEXT:    mov v4.s[1], v5.s[0]
+; CHECK-GI-NEXT:    mov v0.s[2], v2.s[0]
+; CHECK-GI-NEXT:    mov v4.s[2], v6.s[0]
+; CHECK-GI-NEXT:    mov v0.s[3], v3.s[0]
+; CHECK-GI-NEXT:    fcvtzs v1.4s, v4.4s
+; CHECK-GI-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-GI-NEXT:    mov s5, v1.s[1]
+; CHECK-GI-NEXT:    mov s6, v1.s[2]
+; CHECK-GI-NEXT:    fmov w4, s1
+; CHECK-GI-NEXT:    mov s2, v0.s[1]
+; CHECK-GI-NEXT:    mov s3, v0.s[2]
+; CHECK-GI-NEXT:    mov s4, v0.s[3]
+; CHECK-GI-NEXT:    fmov w0, s0
+; CHECK-GI-NEXT:    fmov w5, s5
+; CHECK-GI-NEXT:    fmov w6, s6
+; CHECK-GI-NEXT:    fmov w1, s2
+; CHECK-GI-NEXT:    fmov w2, s3
+; CHECK-GI-NEXT:    fmov w3, s4
+; CHECK-GI-NEXT:    ret
     %x = call <7 x i32> @llvm.fptosi.sat.v7f32.v7i32(<7 x float> %f)
     ret <7 x i32> %x
 }
@@ -151,86 +244,238 @@ declare <5 x i32> @llvm.fptosi.sat.v5f64.v5i32 (<5 x double>)
 declare <6 x i32> @llvm.fptosi.sat.v6f64.v6i32 (<6 x double>)
 
 define <1 x i32> @test_signed_v1f64_v1i32(<1 x double> %f) {
-; CHECK-LABEL: test_signed_v1f64_v1i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs w8, d0
-; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v1f64_v1i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs w8, d0
+; CHECK-SD-NEXT:    fmov s0, w8
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_v1f64_v1i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcvtzs w8, d0
+; CHECK-GI-NEXT:    mov v0.s[0], w8
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    ret
     %x = call <1 x i32> @llvm.fptosi.sat.v1f64.v1i32(<1 x double> %f)
     ret <1 x i32> %x
 }
 
 define <2 x i32> @test_signed_v2f64_v2i32(<2 x double> %f) {
-; CHECK-LABEL: test_signed_v2f64_v2i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov d1, v0.d[1]
-; CHECK-NEXT:    fcvtzs w8, d0
-; CHECK-NEXT:    fcvtzs w9, d1
-; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    mov v0.s[1], w9
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v2f64_v2i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov d1, v0.d[1]
+; CHECK-SD-NEXT:    fcvtzs w8, d0
+; CHECK-SD-NEXT:    fcvtzs w9, d1
+; CHECK-SD-NEXT:    fmov s0, w8
+; CHECK-SD-NEXT:    mov v0.s[1], w9
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_v2f64_v2i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-GI-NEXT:    adrp x8, .LCPI9_1
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI9_1]
+; CHECK-GI-NEXT:    adrp x8, .LCPI9_0
+; CHECK-GI-NEXT:    cmgt v2.2d, v1.2d, v0.2d
+; CHECK-GI-NEXT:    bif v0.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI9_0]
+; CHECK-GI-NEXT:    cmgt v2.2d, v0.2d, v1.2d
+; CHECK-GI-NEXT:    bif v0.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT:    xtn v0.2s, v0.2d
+; CHECK-GI-NEXT:    ret
     %x = call <2 x i32> @llvm.fptosi.sat.v2f64.v2i32(<2 x double> %f)
     ret <2 x i32> %x
 }
 
 define <3 x i32> @test_signed_v3f64_v3i32(<3 x double> %f) {
-; CHECK-LABEL: test_signed_v3f64_v3i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs w8, d0
-; CHECK-NEXT:    fcvtzs w9, d1
-; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    fcvtzs w8, d2
-; CHECK-NEXT:    mov v0.s[1], w9
-; CHECK-NEXT:    mov v0.s[2], w8
-; CHECK-NEXT:    fcvtzs w8, d0
-; CHECK-NEXT:    mov v0.s[3], w8
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v3f64_v3i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs w8, d0
+; CHECK-SD-NEXT:    fcvtzs w9, d1
+; CHECK-SD-NEXT:    fmov s0, w8
+; CHECK-SD-NEXT:    fcvtzs w8, d2
+; CHECK-SD-NEXT:    mov v0.s[1], w9
+; CHECK-SD-NEXT:    mov v0.s[2], w8
+; CHECK-SD-NEXT:    fcvtzs w8, d0
+; CHECK-SD-NEXT:    mov v0.s[3], w8
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_v3f64_v3i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT:    adrp x8, .LCPI10_1
+; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    fcvtzs v1.2d, v2.2d
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI10_1]
+; CHECK-GI-NEXT:    adrp x8, .LCPI10_0
+; CHECK-GI-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-GI-NEXT:    cmgt v4.2d, v2.2d, v1.2d
+; CHECK-GI-NEXT:    bif v1.16b, v2.16b, v4.16b
+; CHECK-GI-NEXT:    cmgt v3.2d, v2.2d, v0.2d
+; CHECK-GI-NEXT:    bif v0.16b, v2.16b, v3.16b
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI10_0]
+; CHECK-GI-NEXT:    cmgt v4.2d, v1.2d, v2.2d
+; CHECK-GI-NEXT:    cmgt v3.2d, v0.2d, v2.2d
+; CHECK-GI-NEXT:    bif v1.16b, v2.16b, v4.16b
+; CHECK-GI-NEXT:    bif v0.16b, v2.16b, v3.16b
+; CHECK-GI-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    ret
     %x = call <3 x i32> @llvm.fptosi.sat.v3f64.v3i32(<3 x double> %f)
     ret <3 x i32> %x
 }
 
 define <4 x i32> @test_signed_v4f64_v4i32(<4 x double> %f) {
-; CHECK-LABEL: test_signed_v4f64_v4i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov d2, v0.d[1]
-; CHECK-NEXT:    fcvtzs w8, d0
-; CHECK-NEXT:    fcvtzs w9, d2
-; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    fcvtzs w8, d1
-; CHECK-NEXT:    mov d1, v1.d[1]
-; CHECK-NEXT:    mov v0.s[1], w9
-; CHECK-NEXT:    mov v0.s[2], w8
-; CHECK-NEXT:    fcvtzs w8, d1
-; CHECK-NEXT:    mov v0.s[3], w8
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v4f64_v4i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov d2, v0.d[1]
+; CHECK-SD-NEXT:    fcvtzs w8, d0
+; CHECK-SD-NEXT:    fcvtzs w9, d2
+; CHECK-SD-NEXT:    fmov s0, w8
+; CHECK-SD-NEXT:    fcvtzs w8, d1
+; CHECK-SD-NEXT:    mov d1, v1.d[1]
+; CHECK-SD-NEXT:    mov v0.s[1], w9
+; CHECK-SD-NEXT:    mov v0.s[2], w8
+; CHECK-SD-NEXT:    fcvtzs w8, d1
+; CHECK-SD-NEXT:    mov v0.s[3], w8
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_v4f64_v4i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-GI-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-GI-NEXT:    adrp x8, .LCPI11_1
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI11_1]
+; CHECK-GI-NEXT:    adrp x8, .LCPI11_0
+; CHECK-GI-NEXT:    cmgt v3.2d, v2.2d, v0.2d
+; CHECK-GI-NEXT:    cmgt v4.2d, v2.2d, v1.2d
+; CHECK-GI-NEXT:    bif v0.16b, v2.16b, v3.16b
+; CHECK-GI-NEXT:    bif v1.16b, v2.16b, v4.16b
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI11_0]
+; CHECK-GI-NEXT:    cmgt v3.2d, v0.2d, v2.2d
+; CHECK-GI-NEXT:    cmgt v4.2d, v1.2d, v2.2d
+; CHECK-GI-NEXT:    bif v0.16b, v2.16b, v3.16b
+; CHECK-GI-NEXT:    bif v1.16b, v2.16b, v4.16b
+; CHECK-GI-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    ret
     %x = call <4 x i32> @llvm.fptosi.sat.v4f64.v4i32(<4 x double> %f)
     ret <4 x i32> %x
 }
 
 define <5 x i32> @test_signed_v5f64_v5i32(<5 x double> %f) {
-; CHECK-LABEL: test_signed_v5f64_v5i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs w0, d0
-; CHECK-NEXT:    fcvtzs w1, d1
-; CHECK-NEXT:    fcvtzs w2, d2
-; CHECK-NEXT:    fcvtzs w3, d3
-; CHECK-NEXT:    fcvtzs w4, d4
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v5f64_v5i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs w0, d0
+; CHECK-SD-NEXT:    fcvtzs w1, d1
+; CHECK-SD-NEXT:    fcvtzs w2, d2
+; CHECK-SD-NEXT:    fcvtzs w3, d3
+; CHECK-SD-NEXT:    fcvtzs w4, d4
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_v5f64_v5i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-GI-NEXT:    adrp x8, .LCPI12_1
+; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
+; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    mov v2.d[1], v3.d[0]
+; CHECK-GI-NEXT:    fcvtzs v3.2d, v4.2d
+; CHECK-GI-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-GI-NEXT:    fcvtzs v1.2d, v2.2d
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI12_1]
+; CHECK-GI-NEXT:    adrp x8, .LCPI12_0
+; CHECK-GI-NEXT:    cmgt v4.2d, v2.2d, v0.2d
+; CHECK-GI-NEXT:    cmgt v5.2d, v2.2d, v1.2d
+; CHECK-GI-NEXT:    bif v0.16b, v2.16b, v4.16b
+; CHECK-GI-NEXT:    bif v1.16b, v2.16b, v5.16b
+; CHECK-GI-NEXT:    cmgt v4.2d, v2.2d, v3.2d
+; CHECK-GI-NEXT:    ldr q5, [x8, :lo12:.LCPI12_0]
+; CHECK-GI-NEXT:    bit v2.16b, v3.16b, v4.16b
+; CHECK-GI-NEXT:    cmgt v3.2d, v0.2d, v5.2d
+; CHECK-GI-NEXT:    cmgt v4.2d, v1.2d, v5.2d
+; CHECK-GI-NEXT:    bif v0.16b, v5.16b, v3.16b
+; CHECK-GI-NEXT:    bif v1.16b, v5.16b, v4.16b
+; CHECK-GI-NEXT:    cmgt v3.2d, v2.2d, v5.2d
+; CHECK-GI-NEXT:    bif v2.16b, v5.16b, v3.16b
+; CHECK-GI-NEXT:    mov d3, v0.d[1]
+; CHECK-GI-NEXT:    mov d4, v1.d[1]
+; CHECK-GI-NEXT:    fmov x0, d0
+; CHECK-GI-NEXT:    fmov x2, d1
+; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-GI-NEXT:    // kill: def $w2 killed $w2 killed $x2
+; CHECK-GI-NEXT:    fmov x4, d2
+; CHECK-GI-NEXT:    fmov x1, d3
+; CHECK-GI-NEXT:    fmov x3, d4
+; CHECK-GI-NEXT:    // kill: def $w4 killed $w4 killed $x4
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 killed $x1
+; CHECK-GI-NEXT:    // kill: def $w3 killed $w3 killed $x3
+; CHECK-GI-NEXT:    ret
     %x = call <5 x i32> @llvm.fptosi.sat.v5f64.v5i32(<5 x double> %f)
     ret <5 x i32> %x
 }
 
 define <6 x i32> @test_signed_v6f64_v6i32(<6 x double> %f) {
-; CHECK-LABEL: test_signed_v6f64_v6i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs w0, d0
-; CHECK-NEXT:    fcvtzs w1, d1
-; CHECK-NEXT:    fcvtzs w2, d2
-; CHECK-NEXT:    fcvtzs w3, d3
-; CHECK-NEXT:    fcvtzs w4, d4
-; CHECK-NEXT:    fcvtzs w5, d5
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v6f64_v6i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs w0, d0
+; CHECK-SD-NEXT:    fcvtzs w1, d1
+; CHECK-SD-NEXT:    fcvtzs w2, d2
+; CHECK-SD-NEXT:    fcvtzs w3, d3
+; CHECK-SD-NEXT:    fcvtzs w4, d4
+; CHECK-SD-NEXT:    fcvtzs w5, d5
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_v6f64_v6i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-GI-NEXT:    // kill: def $d5 killed $d5 def $q5
+; CHECK-GI-NEXT:    adrp x8, .LCPI13_1
+; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    mov v2.d[1], v3.d[0]
+; CHECK-GI-NEXT:    mov v4.d[1], v5.d[0]
+; CHECK-GI-NEXT:    ldr q3, [x8, :lo12:.LCPI13_1]
+; CHECK-GI-NEXT:    adrp x8, .LCPI13_0
+; CHECK-GI-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-GI-NEXT:    fcvtzs v1.2d, v2.2d
+; CHECK-GI-NEXT:    fcvtzs v2.2d, v4.2d
+; CHECK-GI-NEXT:    cmgt v4.2d, v3.2d, v0.2d
+; CHECK-GI-NEXT:    cmgt v5.2d, v3.2d, v1.2d
+; CHECK-GI-NEXT:    cmgt v6.2d, v3.2d, v2.2d
+; CHECK-GI-NEXT:    bif v0.16b, v3.16b, v4.16b
+; CHECK-GI-NEXT:    bif v1.16b, v3.16b, v5.16b
+; CHECK-GI-NEXT:    bif v2.16b, v3.16b, v6.16b
+; CHECK-GI-NEXT:    ldr q3, [x8, :lo12:.LCPI13_0]
+; CHECK-GI-NEXT:    cmgt v4.2d, v0.2d, v3.2d
+; CHECK-GI-NEXT:    cmgt v5.2d, v1.2d, v3.2d
+; CHECK-GI-NEXT:    cmgt v6.2d, v2.2d, v3.2d
+; CHECK-GI-NEXT:    bif v0.16b, v3.16b, v4.16b
+; CHECK-GI-NEXT:    bif v1.16b, v3.16b, v5.16b
+; CHECK-GI-NEXT:    bif v2.16b, v3.16b, v6.16b
+; CHECK-GI-NEXT:    mov d3, v0.d[1]
+; CHECK-GI-NEXT:    mov d4, v1.d[1]
+; CHECK-GI-NEXT:    mov d5, v2.d[1]
+; CHECK-GI-NEXT:    fmov x0, d0
+; CHECK-GI-NEXT:    fmov x2, d1
+; CHECK-GI-NEXT:    fmov x4, d2
+; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-GI-NEXT:    // kill: def $w2 killed $w2 killed $x2
+; CHECK-GI-NEXT:    // kill: def $w4 killed $w4 killed $x4
+; CHECK-GI-NEXT:    fmov x1, d3
+; CHECK-GI-NEXT:    fmov x3, d4
+; CHECK-GI-NEXT:    fmov x5, d5
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 killed $x1
+; CHECK-GI-NEXT:    // kill: def $w3 killed $w3 killed $x3
+; CHECK-GI-NEXT:    // kill: def $w5 killed $w5 killed $x5
+; CHECK-GI-NEXT:    ret
     %x = call <6 x i32> @llvm.fptosi.sat.v6f64.v6i32(<6 x double> %f)
     ret <6 x i32> %x
 }
@@ -245,308 +490,727 @@ declare <3 x i32> @llvm.fptosi.sat.v3f128.v3i32 (<3 x fp128>)
 declare <4 x i32> @llvm.fptosi.sat.v4f128.v4i32 (<4 x fp128>)
 
 define <1 x i32> @test_signed_v1f128_v1i32(<1 x fp128> %f) {
-; CHECK-LABEL: test_signed_v1f128_v1i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #32
-; CHECK-NEXT:    stp x30, x19, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    .cfi_offset w19, -8
-; CHECK-NEXT:    .cfi_offset w30, -16
-; CHECK-NEXT:    adrp x8, .LCPI14_0
-; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI14_0]
-; CHECK-NEXT:    bl __getf2
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    mov w19, w0
-; CHECK-NEXT:    bl __fixtfsi
-; CHECK-NEXT:    cmp w19, #0
-; CHECK-NEXT:    mov w8, #-2147483648 // =0x80000000
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    csel w19, w8, w0, lt
-; CHECK-NEXT:    adrp x8, .LCPI14_1
-; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI14_1]
-; CHECK-NEXT:    bl __gttf2
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    mov w8, #2147483647 // =0x7fffffff
-; CHECK-NEXT:    cmp w0, #0
-; CHECK-NEXT:    csel w19, w8, w19, gt
-; CHECK-NEXT:    mov v1.16b, v0.16b
-; CHECK-NEXT:    bl __unordtf2
-; CHECK-NEXT:    cmp w0, #0
-; CHECK-NEXT:    csel w8, wzr, w19, ne
-; CHECK-NEXT:    ldp x30, x19, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    add sp, sp, #32
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v1f128_v1i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sub sp, sp, #32
+; CHECK-SD-NEXT:    stp x30, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT:    .cfi_offset w19, -8
+; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    adrp x8, .LCPI14_0
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI14_0]
+; CHECK-SD-NEXT:    bl __getf2
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov w19, w0
+; CHECK-SD-NEXT:    bl __fixtfsi
+; CHECK-SD-NEXT:    cmp w19, #0
+; CHECK-SD-NEXT:    mov w8, #-2147483648 // =0x80000000
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csel w19, w8, w0, lt
+; CHECK-SD-NEXT:    adrp x8, .LCPI14_1
+; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI14_1]
+; CHECK-SD-NEXT:    bl __gttf2
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov w8, #2147483647 // =0x7fffffff
+; CHECK-SD-NEXT:    cmp w0, #0
+; CHECK-SD-NEXT:    csel w19, w8, w19, gt
+; CHECK-SD-NEXT:    mov v1.16b, v0.16b
+; CHECK-SD-NEXT:    bl __unordtf2
+; CHECK-SD-NEXT:    cmp w0, #0
+; CHECK-SD-NEXT:    csel w8, wzr, w19, ne
+; CHECK-SD-NEXT:    ldp x30, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    fmov s0, w8
+; CHECK-SD-NEXT:    add sp, sp, #32
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_v1f128_v1i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    sub sp, sp, #64
+; CHECK-GI-NEXT:    stp d9, d8, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp x30, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-GI-NEXT:    .cfi_offset w19, -8
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    .cfi_offset b8, -24
+; CHECK-GI-NEXT:    .cfi_offset b9, -32
+; CHECK-GI-NEXT:    adrp x8, .LCPI14_1
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI14_1]
+; CHECK-GI-NEXT:    stp q1, q0, [sp] // 32-byte Folded Spill
+; CHECK-GI-NEXT:    bl __getf2
+; CHECK-GI-NEXT:    ldp q3, q2, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    mov d0, v2.d[1]
+; CHECK-GI-NEXT:    mov d1, v3.d[1]
+; CHECK-GI-NEXT:    fcsel d8, d2, d3, lt
+; CHECK-GI-NEXT:    fmov x8, d8
+; CHECK-GI-NEXT:    fcsel d9, d0, d1, lt
+; CHECK-GI-NEXT:    mov v0.d[0], x8
+; CHECK-GI-NEXT:    fmov x8, d9
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    adrp x8, .LCPI14_0
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI14_0]
+; CHECK-GI-NEXT:    str q1, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    bl __gttf2
+; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    mov d0, v1.d[1]
+; CHECK-GI-NEXT:    fcsel d1, d8, d1, gt
+; CHECK-GI-NEXT:    fmov x8, d1
+; CHECK-GI-NEXT:    fcsel d2, d9, d0, gt
+; CHECK-GI-NEXT:    mov v0.d[0], x8
+; CHECK-GI-NEXT:    fmov x8, d2
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    bl __fixtfsi
+; CHECK-GI-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov w19, w0
+; CHECK-GI-NEXT:    mov v1.16b, v0.16b
+; CHECK-GI-NEXT:    bl __unordtf2
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    csel w8, wzr, w19, ne
+; CHECK-GI-NEXT:    ldp x30, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v0.s[0], w8
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    add sp, sp, #64
+; CHECK-GI-NEXT:    ret
     %x = call <1 x i32> @llvm.fptosi.sat.v1f128.v1i32(<1 x fp128> %f)
     ret <1 x i32> %x
 }
 
 define <2 x i32> @test_signed_v2f128_v2i32(<2 x fp128> %f) {
-; CHECK-LABEL: test_signed_v2f128_v2i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #112
-; CHECK-NEXT:    str x30, [sp, #64] // 8-byte Folded Spill
-; CHECK-NEXT:    stp x22, x21, [sp, #80] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x20, x19, [sp, #96] // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 112
-; CHECK-NEXT:    .cfi_offset w19, -8
-; CHECK-NEXT:    .cfi_offset w20, -16
-; CHECK-NEXT:    .cfi_offset w21, -24
-; CHECK-NEXT:    .cfi_offset w22, -32
-; CHECK-NEXT:    .cfi_offset w30, -48
-; CHECK-NEXT:    mov v2.16b, v1.16b
-; CHECK-NEXT:    stp q1, q0, [sp, #32] // 32-byte Folded Spill
-; CHECK-NEXT:    adrp x8, .LCPI15_0
-; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI15_0]
-; CHECK-NEXT:    mov v0.16b, v2.16b
-; CHECK-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT:    bl __getf2
-; CHECK-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT:    mov w19, w0
-; CHECK-NEXT:    bl __fixtfsi
-; CHECK-NEXT:    adrp x8, .LCPI15_1
-; CHECK-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT:    cmp w19, #0
-; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI15_1]
-; CHECK-NEXT:    mov w20, #-2147483648 // =0x80000000
-; CHECK-NEXT:    csel w19, w20, w0, lt
-; CHECK-NEXT:    str q1, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    bl __gttf2
-; CHECK-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT:    mov w21, #2147483647 // =0x7fffffff
-; CHECK-NEXT:    cmp w0, #0
-; CHECK-NEXT:    csel w19, w21, w19, gt
-; CHECK-NEXT:    mov v1.16b, v0.16b
-; CHECK-NEXT:    bl __unordtf2
-; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    cmp w0, #0
-; CHECK-NEXT:    csel w22, wzr, w19, ne
-; CHECK-NEXT:    bl __getf2
-; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    mov w19, w0
-; CHECK-NEXT:    bl __fixtfsi
-; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    cmp w19, #0
-; CHECK-NEXT:    csel w19, w20, w0, lt
-; CHECK-NEXT:    bl __gttf2
-; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    cmp w0, #0
-; CHECK-NEXT:    csel w19, w21, w19, gt
-; CHECK-NEXT:    mov v1.16b, v0.16b
-; CHECK-NEXT:    bl __unordtf2
-; CHECK-NEXT:    cmp w0, #0
-; CHECK-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
-; CHECK-NEXT:    csel w8, wzr, w19, ne
-; CHECK-NEXT:    ldp x20, x19, [sp, #96] // 16-byte Folded Reload
-; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    mov v0.s[1], w22
-; CHECK-NEXT:    ldp x22, x21, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT:    add sp, sp, #112
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v2f128_v2i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sub sp, sp, #112
+; CHECK-SD-NEXT:    str x30, [sp, #64] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    stp x22, x21, [sp, #80] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x20, x19, [sp, #96] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 112
+; CHECK-SD-NEXT:    .cfi_offset w19, -8
+; CHECK-SD-NEXT:    .cfi_offset w20, -16
+; CHECK-SD-NEXT:    .cfi_offset w21, -24
+; CHECK-SD-NEXT:    .cfi_offset w22, -32
+; CHECK-SD-NEXT:    .cfi_offset w30, -48
+; CHECK-SD-NEXT:    mov v2.16b, v1.16b
+; CHECK-SD-NEXT:    stp q1, q0, [sp, #32] // 32-byte Folded Spill
+; CHECK-SD-NEXT:    adrp x8, .LCPI15_0
+; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI15_0]
+; CHECK-SD-NEXT:    mov v0.16b, v2.16b
+; CHECK-SD-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    bl __getf2
+; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov w19, w0
+; CHECK-SD-NEXT:    bl __fixtfsi
+; CHECK-SD-NEXT:    adrp x8, .LCPI15_1
+; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    cmp w19, #0
+; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI15_1]
+; CHECK-SD-NEXT:    mov w20, #-2147483648 // =0x80000000
+; CHECK-SD-NEXT:    csel w19, w20, w0, lt
+; CHECK-SD-NEXT:    str q1, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    bl __gttf2
+; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov w21, #2147483647 // =0x7fffffff
+; CHECK-SD-NEXT:    cmp w0, #0
+; CHECK-SD-NEXT:    csel w19, w21, w19, gt
+; CHECK-SD-NEXT:    mov v1.16b, v0.16b
+; CHECK-SD-NEXT:    bl __unordtf2
+; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    cmp w0, #0
+; CHECK-SD-NEXT:    csel w22, wzr, w19, ne
+; CHECK-SD-NEXT:    bl __getf2
+; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov w19, w0
+; CHECK-SD-NEXT:    bl __fixtfsi
+; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    cmp w19, #0
+; CHECK-SD-NEXT:    csel w19, w20, w0, lt
+; CHECK-SD-NEXT:    bl __gttf2
+; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    cmp w0, #0
+; CHECK-SD-NEXT:    csel w19, w21, w19, gt
+; CHECK-SD-NEXT:    mov v1.16b, v0.16b
+; CHECK-SD-NEXT:    bl __unordtf2
+; CHECK-SD-NEXT:    cmp w0, #0
+; CHECK-SD-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    csel w8, wzr, w19, ne
+; CHECK-SD-NEXT:    ldp x20, x19, [sp, #96] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    fmov s0, w8
+; CHECK-SD-NEXT:    mov v0.s[1], w22
+; CHECK-SD-NEXT:    ldp x22, x21, [sp, #80] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    add sp, sp, #112
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_v2f128_v2i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    sub sp, sp, #128
+; CHECK-GI-NEXT:    stp d11, d10, [sp, #64] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp d9, d8, [sp, #80] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    str x30, [sp, #96] // 8-byte Folded Spill
+; CHECK-GI-NEXT:    stp x20, x19, [sp, #112] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 128
+; CHECK-GI-NEXT:    .cfi_offset w19, -8
+; CHECK-GI-NEXT:    .cfi_offset w20, -16
+; CHECK-GI-NEXT:    .cfi_offset w30, -32
+; CHECK-GI-NEXT:    .cfi_offset b8, -40
+; CHECK-GI-NEXT:    .cfi_offset b9, -48
+; CHECK-GI-NEXT:    .cfi_offset b10, -56
+; CHECK-GI-NEXT:    .cfi_offset b11, -64
+; CHECK-GI-NEXT:    adrp x8, .LCPI15_1
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI15_1]
+; CHECK-GI-NEXT:    stp q2, q1, [sp, #32] // 32-byte Folded Spill
+; CHECK-GI-NEXT:    mov v1.16b, v2.16b
+; CHECK-GI-NEXT:    bl __getf2
+; CHECK-GI-NEXT:    ldr q2, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    mov d0, v2.d[1]
+; CHECK-GI-NEXT:    mov d8, v1.d[1]
+; CHECK-GI-NEXT:    fcsel d9, d2, d1, lt
+; CHECK-GI-NEXT:    fmov x8, d9
+; CHECK-GI-NEXT:    fcsel d10, d0, d8, lt
+; CHECK-GI-NEXT:    mov v0.d[0], x8
+; CHECK-GI-NEXT:    fmov x8, d10
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    adrp x8, .LCPI15_0
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI15_0]
+; CHECK-GI-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    bl __gttf2
+; CHECK-GI-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    mov d11, v0.d[1]
+; CHECK-GI-NEXT:    fcsel d0, d9, d0, gt
+; CHECK-GI-NEXT:    fmov x8, d0
+; CHECK-GI-NEXT:    fcsel d1, d10, d11, gt
+; CHECK-GI-NEXT:    mov v0.d[0], x8
+; CHECK-GI-NEXT:    fmov x8, d1
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    bl __fixtfsi
+; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov w19, w0
+; CHECK-GI-NEXT:    mov v1.16b, v0.16b
+; CHECK-GI-NEXT:    bl __unordtf2
+; CHECK-GI-NEXT:    ldp q1, q0, [sp, #32] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    csel w20, wzr, w19, ne
+; CHECK-GI-NEXT:    bl __getf2
+; CHECK-GI-NEXT:    ldp q2, q1, [sp, #32] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    mov d0, v1.d[1]
+; CHECK-GI-NEXT:    fcsel d9, d1, d2, lt
+; CHECK-GI-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    fmov x8, d9
+; CHECK-GI-NEXT:    fcsel d8, d0, d8, lt
+; CHECK-GI-NEXT:    mov v0.d[0], x8
+; CHECK-GI-NEXT:    fmov x8, d8
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    bl __gttf2
+; CHECK-GI-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    fcsel d1, d8, d11, gt
+; CHECK-GI-NEXT:    fcsel d0, d9, d0, gt
+; CHECK-GI-NEXT:    fmov x8, d0
+; CHECK-GI-NEXT:    mov v0.d[0], x8
+; CHECK-GI-NEXT:    fmov x8, d1
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    bl __fixtfsi
+; CHECK-GI-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov w19, w0
+; CHECK-GI-NEXT:    mov v1.16b, v0.16b
+; CHECK-GI-NEXT:    bl __unordtf2
+; CHECK-GI-NEXT:    mov v0.s[0], w20
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    ldr x30, [sp, #96] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    csel w8, wzr, w19, ne
+; CHECK-GI-NEXT:    ldp x20, x19, [sp, #112] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #80] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d11, d10, [sp, #64] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v0.s[1], w8
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    add sp, sp, #128
+; CHECK-GI-NEXT:    ret
     %x = call <2 x i32> @llvm.fptosi.sat.v2f128.v2i32(<2 x fp128> %f)
     ret <2 x i32> %x
 }
 
 define <3 x i32> @test_signed_v3f128_v3i32(<3 x fp128> %f) {
-; CHECK-LABEL: test_signed_v3f128_v3i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #128
-; CHECK-NEXT:    str x30, [sp, #80] // 8-byte Folded Spill
-; CHECK-NEXT:    stp x22, x21, [sp, #96] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x20, x19, [sp, #112] // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 128
-; CHECK-NEXT:    .cfi_offset w19, -8
-; CHECK-NEXT:    .cfi_offset w20, -16
-; CHECK-NEXT:    .cfi_offset w21, -24
-; CHECK-NEXT:    .cfi_offset w22, -32
-; CHECK-NEXT:    .cfi_offset w30, -48
-; CHECK-NEXT:    stp q0, q2, [sp, #48] // 32-byte Folded Spill
-; CHECK-NEXT:    mov v2.16b, v1.16b
-; CHECK-NEXT:    adrp x8, .LCPI16_0
-; CHECK-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI16_0]
-; CHECK-NEXT:    mov v0.16b, v2.16b
-; CHECK-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT:    bl __getf2
-; CHECK-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT:    mov w19, w0
-; CHECK-NEXT:    bl __fixtfsi
-; CHECK-NEXT:    adrp x8, .LCPI16_1
-; CHECK-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT:    cmp w19, #0
-; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI16_1]
-; CHECK-NEXT:    mov w20, #-2147483648 // =0x80000000
-; CHECK-NEXT:    csel w19, w20, w0, lt
-; CHECK-NEXT:    str q1, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    bl __gttf2
-; CHECK-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT:    mov w21, #2147483647 // =0x7fffffff
-; CHECK-NEXT:    cmp w0, #0
-; CHECK-NEXT:    csel w19, w21, w19, gt
-; CHECK-NEXT:    mov v1.16b, v0.16b
-; CHECK-NEXT:    bl __unordtf2
-; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    cmp w0, #0
-; CHECK-NEXT:    csel w22, wzr, w19, ne
-; CHECK-NEXT:    bl __getf2
-; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    mov w19, w0
-; CHECK-NEXT:    bl __fixtfsi
-; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    cmp w19, #0
-; CHECK-NEXT:    csel w19, w20, w0, lt
-; CHECK-NEXT:    bl __gttf2
-; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    cmp w0, #0
-; CHECK-NEXT:    csel w19, w21, w19, gt
-; CHECK-NEXT:    mov v1.16b, v0.16b
-; CHECK-NEXT:    bl __unordtf2
-; CHECK-NEXT:    cmp w0, #0
-; CHECK-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    csel w8, wzr, w19, ne
-; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    mov v0.s[1], w22
-; CHECK-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT:    bl __getf2
-; CHECK-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT:    mov w19, w0
-; CHECK-NEXT:    bl __fixtfsi
-; CHECK-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    cmp w19, #0
-; CHECK-NEXT:    csel w19, w20, w0, lt
-; CHECK-NEXT:    bl __gttf2
-; CHECK-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT:    cmp w0, #0
-; CHECK-NEXT:    csel w19, w21, w19, gt
-; CHECK-NEXT:    mov v1.16b, v0.16b
-; CHECK-NEXT:    bl __unordtf2
-; CHECK-NEXT:    cmp w0, #0
-; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    ldr x30, [sp, #80] // 8-byte Folded Reload
-; CHECK-NEXT:    csel w8, wzr, w19, ne
-; CHECK-NEXT:    ldp x20, x19, [sp, #112] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp x22, x21, [sp, #96] // 16-byte Folded Reload
-; CHECK-NEXT:    mov v0.s[2], w8
-; CHECK-NEXT:    add sp, sp, #128
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v3f128_v3i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sub sp, sp, #128
+; CHECK-SD-NEXT:    str x30, [sp, #80] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    stp x22, x21, [sp, #96] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x20, x19, [sp, #112] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 128
+; CHECK-SD-NEXT:    .cfi_offset w19, -8
+; CHECK-SD-NEXT:    .cfi_offset w20, -16
+; CHECK-SD-NEXT:    .cfi_offset w21, -24
+; CHECK-SD-NEXT:    .cfi_offset w22, -32
+; CHECK-SD-NEXT:    .cfi_offset w30, -48
+; CHECK-SD-NEXT:    stp q0, q2, [sp, #48] // 32-byte Folded Spill
+; CHECK-SD-NEXT:    mov v2.16b, v1.16b
+; CHECK-SD-NEXT:    adrp x8, .LCPI16_0
+; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI16_0]
+; CHECK-SD-NEXT:    mov v0.16b, v2.16b
+; CHECK-SD-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    bl __getf2
+; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov w19, w0
+; CHECK-SD-NEXT:    bl __fixtfsi
+; CHECK-SD-NEXT:    adrp x8, .LCPI16_1
+; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    cmp w19, #0
+; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI16_1]
+; CHECK-SD-NEXT:    mov w20, #-2147483648 // =0x80000000
+; CHECK-SD-NEXT:    csel w19, w20, w0, lt
+; CHECK-SD-NEXT:    str q1, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    bl __gttf2
+; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov w21, #2147483647 // =0x7fffffff
+; CHECK-SD-NEXT:    cmp w0, #0
+; CHECK-SD-NEXT:    csel w19, w21, w19, gt
+; CHECK-SD-NEXT:    mov v1.16b, v0.16b
+; CHECK-SD-NEXT:    bl __unordtf2
+; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    cmp w0, #0
+; CHECK-SD-NEXT:    csel w22, wzr, w19, ne
+; CHECK-SD-NEXT:    bl __getf2
+; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov w19, w0
+; CHECK-SD-NEXT:    bl __fixtfsi
+; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    cmp w19, #0
+; CHECK-SD-NEXT:    csel w19, w20, w0, lt
+; CHECK-SD-NEXT:    bl __gttf2
+; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    cmp w0, #0
+; CHECK-SD-NEXT:    csel w19, w21, w19, gt
+; CHECK-SD-NEXT:    mov v1.16b, v0.16b
+; CHECK-SD-NEXT:    bl __unordtf2
+; CHECK-SD-NEXT:    cmp w0, #0
+; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csel w8, wzr, w19, ne
+; CHECK-SD-NEXT:    fmov s0, w8
+; CHECK-SD-NEXT:    mov v0.s[1], w22
+; CHECK-SD-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    bl __getf2
+; CHECK-SD-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov w19, w0
+; CHECK-SD-NEXT:    bl __fixtfsi
+; CHECK-SD-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    cmp w19, #0
+; CHECK-SD-NEXT:    csel w19, w20, w0, lt
+; CHECK-SD-NEXT:    bl __gttf2
+; CHECK-SD-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    cmp w0, #0
+; CHECK-SD-NEXT:    csel w19, w21, w19, gt
+; CHECK-SD-NEXT:    mov v1.16b, v0.16b
+; CHECK-SD-NEXT:    bl __unordtf2
+; CHECK-SD-NEXT:    cmp w0, #0
+; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldr x30, [sp, #80] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    csel w8, wzr, w19, ne
+; CHECK-SD-NEXT:    ldp x20, x19, [sp, #112] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x22, x21, [sp, #96] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov v0.s[2], w8
+; CHECK-SD-NEXT:    add sp, sp, #128
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_v3f128_v3i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    sub sp, sp, #144
+; CHECK-GI-NEXT:    stp d11, d10, [sp, #80] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp d9, d8, [sp, #96] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp x30, x21, [sp, #112] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp x20, x19, [sp, #128] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 144
+; CHECK-GI-NEXT:    .cfi_offset w19, -8
+; CHECK-GI-NEXT:    .cfi_offset w20, -16
+; CHECK-GI-NEXT:    .cfi_offset w21, -24
+; CHECK-GI-NEXT:    .cfi_offset w30, -32
+; CHECK-GI-NEXT:    .cfi_offset b8, -40
+; CHECK-GI-NEXT:    .cfi_offset b9, -48
+; CHECK-GI-NEXT:    .cfi_offset b10, -56
+; CHECK-GI-NEXT:    .cfi_offset b11, -64
+; CHECK-GI-NEXT:    adrp x8, .LCPI16_1
+; CHECK-GI-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI16_1]
+; CHECK-GI-NEXT:    str q2, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    str q1, [sp, #64] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    bl __getf2
+; CHECK-GI-NEXT:    ldr q2, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q1, [sp, #64] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    mov d0, v2.d[1]
+; CHECK-GI-NEXT:    mov d8, v1.d[1]
+; CHECK-GI-NEXT:    fcsel d10, d2, d1, lt
+; CHECK-GI-NEXT:    fmov x8, d10
+; CHECK-GI-NEXT:    fcsel d11, d0, d8, lt
+; CHECK-GI-NEXT:    mov v0.d[0], x8
+; CHECK-GI-NEXT:    fmov x8, d11
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    adrp x8, .LCPI16_0
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI16_0]
+; CHECK-GI-NEXT:    str q1, [sp, #48] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    bl __gttf2
+; CHECK-GI-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    mov d9, v0.d[1]
+; CHECK-GI-NEXT:    fcsel d0, d10, d0, gt
+; CHECK-GI-NEXT:    fmov x8, d0
+; CHECK-GI-NEXT:    fcsel d1, d11, d9, gt
+; CHECK-GI-NEXT:    mov v0.d[0], x8
+; CHECK-GI-NEXT:    fmov x8, d1
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    bl __fixtfsi
+; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov w19, w0
+; CHECK-GI-NEXT:    mov v1.16b, v0.16b
+; CHECK-GI-NEXT:    bl __unordtf2
+; CHECK-GI-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q1, [sp, #64] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    csel w20, wzr, w19, ne
+; CHECK-GI-NEXT:    bl __getf2
+; CHECK-GI-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q2, [sp, #64] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    mov d0, v1.d[1]
+; CHECK-GI-NEXT:    fcsel d10, d1, d2, lt
+; CHECK-GI-NEXT:    ldr q1, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    fmov x8, d10
+; CHECK-GI-NEXT:    fcsel d11, d0, d8, lt
+; CHECK-GI-NEXT:    mov v0.d[0], x8
+; CHECK-GI-NEXT:    fmov x8, d11
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    bl __gttf2
+; CHECK-GI-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    fcsel d1, d11, d9, gt
+; CHECK-GI-NEXT:    fcsel d0, d10, d0, gt
+; CHECK-GI-NEXT:    fmov x8, d0
+; CHECK-GI-NEXT:    mov v0.d[0], x8
+; CHECK-GI-NEXT:    fmov x8, d1
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    bl __fixtfsi
+; CHECK-GI-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov w19, w0
+; CHECK-GI-NEXT:    mov v1.16b, v0.16b
+; CHECK-GI-NEXT:    bl __unordtf2
+; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q1, [sp, #64] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    csel w21, wzr, w19, ne
+; CHECK-GI-NEXT:    bl __getf2
+; CHECK-GI-NEXT:    ldp q3, q1, [sp, #32] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    ldr q2, [sp, #64] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov d0, v3.d[1]
+; CHECK-GI-NEXT:    fcsel d10, d3, d2, lt
+; CHECK-GI-NEXT:    fmov x8, d10
+; CHECK-GI-NEXT:    fcsel d8, d0, d8, lt
+; CHECK-GI-NEXT:    mov v0.d[0], x8
+; CHECK-GI-NEXT:    fmov x8, d8
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    bl __gttf2
+; CHECK-GI-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    fcsel d1, d8, d9, gt
+; CHECK-GI-NEXT:    fcsel d0, d10, d0, gt
+; CHECK-GI-NEXT:    fmov x8, d0
+; CHECK-GI-NEXT:    mov v0.d[0], x8
+; CHECK-GI-NEXT:    fmov x8, d1
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    bl __fixtfsi
+; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov w19, w0
+; CHECK-GI-NEXT:    mov v1.16b, v0.16b
+; CHECK-GI-NEXT:    bl __unordtf2
+; CHECK-GI-NEXT:    mov v0.s[0], w20
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    csel w8, wzr, w19, ne
+; CHECK-GI-NEXT:    ldp x20, x19, [sp, #128] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #96] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d11, d10, [sp, #80] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v0.s[1], w21
+; CHECK-GI-NEXT:    ldp x30, x21, [sp, #112] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v0.s[2], w8
+; CHECK-GI-NEXT:    add sp, sp, #144
+; CHECK-GI-NEXT:    ret
     %x = call <3 x i32> @llvm.fptosi.sat.v3f128.v3i32(<3 x fp128> %f)
     ret <3 x i32> %x
 }
 
 define <4 x i32> @test_signed_v4f128_v4i32(<4 x fp128> %f) {
-; CHECK-LABEL: test_signed_v4f128_v4i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #144
-; CHECK-NEXT:    str x30, [sp, #96] // 8-byte Folded Spill
-; CHECK-NEXT:    stp x22, x21, [sp, #112] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x20, x19, [sp, #128] // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 144
-; CHECK-NEXT:    .cfi_offset w19, -8
-; CHECK-NEXT:    .cfi_offset w20, -16
-; CHECK-NEXT:    .cfi_offset w21, -24
-; CHECK-NEXT:    .cfi_offset w22, -32
-; CHECK-NEXT:    .cfi_offset w30, -48
-; CHECK-NEXT:    stp q2, q3, [sp, #64] // 32-byte Folded Spill
-; CHECK-NEXT:    mov v2.16b, v1.16b
-; CHECK-NEXT:    adrp x8, .LCPI17_0
-; CHECK-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT:    str q1, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI17_0]
-; CHECK-NEXT:    mov v0.16b, v2.16b
-; CHECK-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT:    bl __getf2
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    mov w19, w0
-; CHECK-NEXT:    bl __fixtfsi
-; CHECK-NEXT:    adrp x8, .LCPI17_1
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    cmp w19, #0
-; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI17_1]
-; CHECK-NEXT:    mov w20, #-2147483648 // =0x80000000
-; CHECK-NEXT:    csel w19, w20, w0, lt
-; CHECK-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT:    bl __gttf2
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    mov w21, #2147483647 // =0x7fffffff
-; CHECK-NEXT:    cmp w0, #0
-; CHECK-NEXT:    csel w19, w21, w19, gt
-; CHECK-NEXT:    mov v1.16b, v0.16b
-; CHECK-NEXT:    bl __unordtf2
-; CHECK-NEXT:    ldp q1, q0, [sp, #32] // 32-byte Folded Reload
-; CHECK-NEXT:    cmp w0, #0
-; CHECK-NEXT:    csel w22, wzr, w19, ne
-; CHECK-NEXT:    bl __getf2
-; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    mov w19, w0
-; CHECK-NEXT:    bl __fixtfsi
-; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    cmp w19, #0
-; CHECK-NEXT:    csel w19, w20, w0, lt
-; CHECK-NEXT:    bl __gttf2
-; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    cmp w0, #0
-; CHECK-NEXT:    csel w19, w21, w19, gt
-; CHECK-NEXT:    mov v1.16b, v0.16b
-; CHECK-NEXT:    bl __unordtf2
-; CHECK-NEXT:    cmp w0, #0
-; CHECK-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT:    csel w8, wzr, w19, ne
-; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    mov v0.s[1], w22
-; CHECK-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT:    bl __getf2
-; CHECK-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT:    mov w19, w0
-; CHECK-NEXT:    bl __fixtfsi
-; CHECK-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    cmp w19, #0
-; CHECK-NEXT:    csel w19, w20, w0, lt
-; CHECK-NEXT:    bl __gttf2
-; CHECK-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT:    cmp w0, #0
-; CHECK-NEXT:    csel w19, w21, w19, gt
-; CHECK-NEXT:    mov v1.16b, v0.16b
-; CHECK-NEXT:    bl __unordtf2
-; CHECK-NEXT:    ldp q1, q0, [sp, #32] // 32-byte Folded Reload
-; CHECK-NEXT:    cmp w0, #0
-; CHECK-NEXT:    csel w8, wzr, w19, ne
-; CHECK-NEXT:    mov v0.s[2], w8
-; CHECK-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT:    ldr q0, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT:    bl __getf2
-; CHECK-NEXT:    ldr q0, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT:    mov w19, w0
-; CHECK-NEXT:    bl __fixtfsi
-; CHECK-NEXT:    ldr q0, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    cmp w19, #0
-; CHECK-NEXT:    csel w19, w20, w0, lt
-; CHECK-NEXT:    bl __gttf2
-; CHECK-NEXT:    ldr q0, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT:    cmp w0, #0
-; CHECK-NEXT:    csel w19, w21, w19, gt
-; CHECK-NEXT:    mov v1.16b, v0.16b
-; CHECK-NEXT:    bl __unordtf2
-; CHECK-NEXT:    cmp w0, #0
-; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    ldr x30, [sp, #96] // 8-byte Folded Reload
-; CHECK-NEXT:    csel w8, wzr, w19, ne
-; CHECK-NEXT:    ldp x20, x19, [sp, #128] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp x22, x21, [sp, #112] // 16-byte Folded Reload
-; CHECK-NEXT:    mov v0.s[3], w8
-; CHECK-NEXT:    add sp, sp, #144
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v4f128_v4i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sub sp, sp, #144
+; CHECK-SD-NEXT:    str x30, [sp, #96] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    stp x22, x21, [sp, #112] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x20, x19, [sp, #128] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 144
+; CHECK-SD-NEXT:    .cfi_offset w19, -8
+; CHECK-SD-NEXT:    .cfi_offset w20, -16
+; CHECK-SD-NEXT:    .cfi_offset w21, -24
+; CHECK-SD-NEXT:    .cfi_offset w22, -32
+; CHECK-SD-NEXT:    .cfi_offset w30, -48
+; CHECK-SD-NEXT:    stp q2, q3, [sp, #64] // 32-byte Folded Spill
+; CHECK-SD-NEXT:    mov v2.16b, v1.16b
+; CHECK-SD-NEXT:    adrp x8, .LCPI17_0
+; CHECK-SD-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    str q1, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI17_0]
+; CHECK-SD-NEXT:    mov v0.16b, v2.16b
+; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    bl __getf2
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov w19, w0
+; CHECK-SD-NEXT:    bl __fixtfsi
+; CHECK-SD-NEXT:    adrp x8, .LCPI17_1
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    cmp w19, #0
+; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI17_1]
+; CHECK-SD-NEXT:    mov w20, #-2147483648 // =0x80000000
+; CHECK-SD-NEXT:    csel w19, w20, w0, lt
+; CHECK-SD-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    bl __gttf2
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov w21, #2147483647 // =0x7fffffff
+; CHECK-SD-NEXT:    cmp w0, #0
+; CHECK-SD-NEXT:    csel w19, w21, w19, gt
+; CHECK-SD-NEXT:    mov v1.16b, v0.16b
+; CHECK-SD-NEXT:    bl __unordtf2
+; CHECK-SD-NEXT:    ldp q1, q0, [sp, #32] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    cmp w0, #0
+; CHECK-SD-NEXT:    csel w22, wzr, w19, ne
+; CHECK-SD-NEXT:    bl __getf2
+; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov w19, w0
+; CHECK-SD-NEXT:    bl __fixtfsi
+; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    cmp w19, #0
+; CHECK-SD-NEXT:    csel w19, w20, w0, lt
+; CHECK-SD-NEXT:    bl __gttf2
+; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    cmp w0, #0
+; CHECK-SD-NEXT:    csel w19, w21, w19, gt
+; CHECK-SD-NEXT:    mov v1.16b, v0.16b
+; CHECK-SD-NEXT:    bl __unordtf2
+; CHECK-SD-NEXT:    cmp w0, #0
+; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csel w8, wzr, w19, ne
+; CHECK-SD-NEXT:    fmov s0, w8
+; CHECK-SD-NEXT:    mov v0.s[1], w22
+; CHECK-SD-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    bl __getf2
+; CHECK-SD-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov w19, w0
+; CHECK-SD-NEXT:    bl __fixtfsi
+; CHECK-SD-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    cmp w19, #0
+; CHECK-SD-NEXT:    csel w19, w20, w0, lt
+; CHECK-SD-NEXT:    bl __gttf2
+; CHECK-SD-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    cmp w0, #0
+; CHECK-SD-NEXT:    csel w19, w21, w19, gt
+; CHECK-SD-NEXT:    mov v1.16b, v0.16b
+; CHECK-SD-NEXT:    bl __unordtf2
+; CHECK-SD-NEXT:    ldp q1, q0, [sp, #32] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    cmp w0, #0
+; CHECK-SD-NEXT:    csel w8, wzr, w19, ne
+; CHECK-SD-NEXT:    mov v0.s[2], w8
+; CHECK-SD-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldr q0, [sp, #80] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    bl __getf2
+; CHECK-SD-NEXT:    ldr q0, [sp, #80] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov w19, w0
+; CHECK-SD-NEXT:    bl __fixtfsi
+; CHECK-SD-NEXT:    ldr q0, [sp, #80] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    cmp w19, #0
+; CHECK-SD-NEXT:    csel w19, w20, w0, lt
+; CHECK-SD-NEXT:    bl __gttf2
+; CHECK-SD-NEXT:    ldr q0, [sp, #80] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    cmp w0, #0
+; CHECK-SD-NEXT:    csel w19, w21, w19, gt
+; CHECK-SD-NEXT:    mov v1.16b, v0.16b
+; CHECK-SD-NEXT:    bl __unordtf2
+; CHECK-SD-NEXT:    cmp w0, #0
+; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldr x30, [sp, #96] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    csel w8, wzr, w19, ne
+; CHECK-SD-NEXT:    ldp x20, x19, [sp, #128] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x22, x21, [sp, #112] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov v0.s[3], w8
+; CHECK-SD-NEXT:    add sp, sp, #144
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_v4f128_v4i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    sub sp, sp, #176
+; CHECK-GI-NEXT:    stp d11, d10, [sp, #96] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp d9, d8, [sp, #112] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    str x30, [sp, #128] // 8-byte Folded Spill
+; CHECK-GI-NEXT:    stp x22, x21, [sp, #144] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp x20, x19, [sp, #160] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 176
+; CHECK-GI-NEXT:    .cfi_offset w19, -8
+; CHECK-GI-NEXT:    .cfi_offset w20, -16
+; CHECK-GI-NEXT:    .cfi_offset w21, -24
+; CHECK-GI-NEXT:    .cfi_offset w22, -32
+; CHECK-GI-NEXT:    .cfi_offset w30, -48
+; CHECK-GI-NEXT:    .cfi_offset b8, -56
+; CHECK-GI-NEXT:    .cfi_offset b9, -64
+; CHECK-GI-NEXT:    .cfi_offset b10, -72
+; CHECK-GI-NEXT:    .cfi_offset b11, -80
+; CHECK-GI-NEXT:    adrp x8, .LCPI17_1
+; CHECK-GI-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI17_1]
+; CHECK-GI-NEXT:    str q2, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    str q3, [sp, #48] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    str q1, [sp, #80] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    bl __getf2
+; CHECK-GI-NEXT:    ldr q2, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q1, [sp, #80] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    mov d0, v2.d[1]
+; CHECK-GI-NEXT:    mov d8, v1.d[1]
+; CHECK-GI-NEXT:    fcsel d10, d2, d1, lt
+; CHECK-GI-NEXT:    fmov x8, d10
+; CHECK-GI-NEXT:    fcsel d11, d0, d8, lt
+; CHECK-GI-NEXT:    mov v0.d[0], x8
+; CHECK-GI-NEXT:    fmov x8, d11
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    adrp x8, .LCPI17_0
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI17_0]
+; CHECK-GI-NEXT:    str q1, [sp, #64] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    bl __gttf2
+; CHECK-GI-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    mov d9, v0.d[1]
+; CHECK-GI-NEXT:    fcsel d0, d10, d0, gt
+; CHECK-GI-NEXT:    fmov x8, d0
+; CHECK-GI-NEXT:    fcsel d1, d11, d9, gt
+; CHECK-GI-NEXT:    mov v0.d[0], x8
+; CHECK-GI-NEXT:    fmov x8, d1
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    bl __fixtfsi
+; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov w19, w0
+; CHECK-GI-NEXT:    mov v1.16b, v0.16b
+; CHECK-GI-NEXT:    bl __unordtf2
+; CHECK-GI-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q1, [sp, #80] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    csel w20, wzr, w19, ne
+; CHECK-GI-NEXT:    bl __getf2
+; CHECK-GI-NEXT:    ldp q1, q4, [sp, #64] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    ldr q2, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov d0, v2.d[1]
+; CHECK-GI-NEXT:    fcsel d10, d2, d4, lt
+; CHECK-GI-NEXT:    fmov x8, d10
+; CHECK-GI-NEXT:    fcsel d11, d0, d8, lt
+; CHECK-GI-NEXT:    mov v0.d[0], x8
+; CHECK-GI-NEXT:    fmov x8, d11
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    bl __gttf2
+; CHECK-GI-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    fcsel d1, d11, d9, gt
+; CHECK-GI-NEXT:    fcsel d0, d10, d0, gt
+; CHECK-GI-NEXT:    fmov x8, d0
+; CHECK-GI-NEXT:    mov v0.d[0], x8
+; CHECK-GI-NEXT:    fmov x8, d1
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    bl __fixtfsi
+; CHECK-GI-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov w19, w0
+; CHECK-GI-NEXT:    mov v1.16b, v0.16b
+; CHECK-GI-NEXT:    bl __unordtf2
+; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q1, [sp, #80] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    csel w21, wzr, w19, ne
+; CHECK-GI-NEXT:    bl __getf2
+; CHECK-GI-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q2, [sp, #80] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    mov d0, v1.d[1]
+; CHECK-GI-NEXT:    fcsel d10, d1, d2, lt
+; CHECK-GI-NEXT:    ldr q1, [sp, #64] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    fmov x8, d10
+; CHECK-GI-NEXT:    fcsel d11, d0, d8, lt
+; CHECK-GI-NEXT:    mov v0.d[0], x8
+; CHECK-GI-NEXT:    fmov x8, d11
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    bl __gttf2
+; CHECK-GI-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    fcsel d1, d11, d9, gt
+; CHECK-GI-NEXT:    fcsel d0, d10, d0, gt
+; CHECK-GI-NEXT:    fmov x8, d0
+; CHECK-GI-NEXT:    mov v0.d[0], x8
+; CHECK-GI-NEXT:    fmov x8, d1
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    bl __fixtfsi
+; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov w19, w0
+; CHECK-GI-NEXT:    mov v1.16b, v0.16b
+; CHECK-GI-NEXT:    bl __unordtf2
+; CHECK-GI-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q1, [sp, #80] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    csel w22, wzr, w19, ne
+; CHECK-GI-NEXT:    bl __getf2
+; CHECK-GI-NEXT:    ldp q5, q1, [sp, #48] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    ldr q2, [sp, #80] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov d0, v5.d[1]
+; CHECK-GI-NEXT:    fcsel d10, d5, d2, lt
+; CHECK-GI-NEXT:    fmov x8, d10
+; CHECK-GI-NEXT:    fcsel d8, d0, d8, lt
+; CHECK-GI-NEXT:    mov v0.d[0], x8
+; CHECK-GI-NEXT:    fmov x8, d8
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    bl __gttf2
+; CHECK-GI-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    fcsel d1, d8, d9, gt
+; CHECK-GI-NEXT:    fcsel d0, d10, d0, gt
+; CHECK-GI-NEXT:    fmov x8, d0
+; CHECK-GI-NEXT:    mov v0.d[0], x8
+; CHECK-GI-NEXT:    fmov x8, d1
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    bl __fixtfsi
+; CHECK-GI-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov w19, w0
+; CHECK-GI-NEXT:    mov v1.16b, v0.16b
+; CHECK-GI-NEXT:    bl __unordtf2
+; CHECK-GI-NEXT:    mov v0.s[0], w20
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    ldr x30, [sp, #128] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    csel w8, wzr, w19, ne
+; CHECK-GI-NEXT:    ldp x20, x19, [sp, #160] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #112] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d11, d10, [sp, #96] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v0.s[1], w21
+; CHECK-GI-NEXT:    mov v0.s[2], w22
+; CHECK-GI-NEXT:    ldp x22, x21, [sp, #144] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v0.s[3], w8
+; CHECK-GI-NEXT:    add sp, sp, #176
+; CHECK-GI-NEXT:    ret
     %x = call <4 x i32> @llvm.fptosi.sat.v4f128.v4i32(<4 x fp128> %f)
     ret <4 x i32> %x
 }
@@ -565,29 +1229,53 @@ declare <7 x i32> @llvm.fptosi.sat.v7f16.v7i32 (<7 x half>)
 declare <8 x i32> @llvm.fptosi.sat.v8f16.v8i32 (<8 x half>)
 
 define <1 x i32> @test_signed_v1f16_v1i32(<1 x half> %f) {
-; CHECK-CVT-LABEL: test_signed_v1f16_v1i32:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    fcvtzs w8, s0
-; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_signed_v1f16_v1i32:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzs w8, h0
-; CHECK-FP16-NEXT:    fmov s0, w8
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_signed_v1f16_v1i32:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvt s0, h0
+; CHECK-SD-CVT-NEXT:    fcvtzs w8, s0
+; CHECK-SD-CVT-NEXT:    fmov s0, w8
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_signed_v1f16_v1i32:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzs w8, h0
+; CHECK-SD-FP16-NEXT:    fmov s0, w8
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_v1f16_v1i32:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    fcvtzs w8, s0
+; CHECK-GI-CVT-NEXT:    mov v0.s[0], w8
+; CHECK-GI-CVT-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_signed_v1f16_v1i32:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzs w8, h0
+; CHECK-GI-FP16-NEXT:    mov v0.s[0], w8
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-FP16-NEXT:    ret
     %x = call <1 x i32> @llvm.fptosi.sat.v1f16.v1i32(<1 x half> %f)
     ret <1 x i32> %x
 }
 
 define <2 x i32> @test_signed_v2f16_v2i32(<2 x half> %f) {
-; CHECK-LABEL: test_signed_v2f16_v2i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v2f16_v2i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-SD-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_v2f16_v2i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-GI-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-GI-NEXT:    fcvtzs v0.2s, v0.2s
+; CHECK-GI-NEXT:    ret
     %x = call <2 x i32> @llvm.fptosi.sat.v2f16.v2i32(<2 x half> %f)
     ret <2 x i32> %x
 }
@@ -613,67 +1301,135 @@ define <4 x i32> @test_signed_v4f16_v4i32(<4 x half> %f) {
 }
 
 define <5 x i32> @test_signed_v5f16_v5i32(<5 x half> %f) {
-; CHECK-LABEL: test_signed_v5f16_v5i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtl v1.4s, v0.4h
-; CHECK-NEXT:    fcvtl2 v0.4s, v0.8h
-; CHECK-NEXT:    fcvtzs v1.4s, v1.4s
-; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-NEXT:    mov w1, v1.s[1]
-; CHECK-NEXT:    mov w2, v1.s[2]
-; CHECK-NEXT:    mov w3, v1.s[3]
-; CHECK-NEXT:    fmov w0, s1
-; CHECK-NEXT:    fmov w4, s0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v5f16_v5i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtl v1.4s, v0.4h
+; CHECK-SD-NEXT:    fcvtl2 v0.4s, v0.8h
+; CHECK-SD-NEXT:    fcvtzs v1.4s, v1.4s
+; CHECK-SD-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-SD-NEXT:    mov w1, v1.s[1]
+; CHECK-SD-NEXT:    mov w2, v1.s[2]
+; CHECK-SD-NEXT:    mov w3, v1.s[3]
+; CHECK-SD-NEXT:    fmov w0, s1
+; CHECK-SD-NEXT:    fmov w4, s0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_v5f16_v5i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcvtl v1.4s, v0.4h
+; CHECK-GI-NEXT:    mov v0.h[0], v0.h[4]
+; CHECK-GI-NEXT:    fcvtzs v1.4s, v1.4s
+; CHECK-GI-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-GI-NEXT:    mov s2, v1.s[1]
+; CHECK-GI-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-GI-NEXT:    mov s3, v1.s[2]
+; CHECK-GI-NEXT:    mov s4, v1.s[3]
+; CHECK-GI-NEXT:    fmov w0, s1
+; CHECK-GI-NEXT:    fmov w1, s2
+; CHECK-GI-NEXT:    fmov w2, s3
+; CHECK-GI-NEXT:    fmov w4, s0
+; CHECK-GI-NEXT:    fmov w3, s4
+; CHECK-GI-NEXT:    ret
     %x = call <5 x i32> @llvm.fptosi.sat.v5f16.v5i32(<5 x half> %f)
     ret <5 x i32> %x
 }
 
 define <6 x i32> @test_signed_v6f16_v6i32(<6 x half> %f) {
-; CHECK-LABEL: test_signed_v6f16_v6i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtl v1.4s, v0.4h
-; CHECK-NEXT:    fcvtl2 v0.4s, v0.8h
-; CHECK-NEXT:    fcvtzs v1.4s, v1.4s
-; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-NEXT:    mov w1, v1.s[1]
-; CHECK-NEXT:    mov w2, v1.s[2]
-; CHECK-NEXT:    mov w5, v0.s[1]
-; CHECK-NEXT:    mov w3, v1.s[3]
-; CHECK-NEXT:    fmov w4, s0
-; CHECK-NEXT:    fmov w0, s1
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v6f16_v6i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtl v1.4s, v0.4h
+; CHECK-SD-NEXT:    fcvtl2 v0.4s, v0.8h
+; CHECK-SD-NEXT:    fcvtzs v1.4s, v1.4s
+; CHECK-SD-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-SD-NEXT:    mov w1, v1.s[1]
+; CHECK-SD-NEXT:    mov w2, v1.s[2]
+; CHECK-SD-NEXT:    mov w5, v0.s[1]
+; CHECK-SD-NEXT:    mov w3, v1.s[3]
+; CHECK-SD-NEXT:    fmov w4, s0
+; CHECK-SD-NEXT:    fmov w0, s1
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_v6f16_v6i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov v1.h[0], v0.h[4]
+; CHECK-GI-NEXT:    mov v1.h[1], v0.h[5]
+; CHECK-GI-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-GI-NEXT:    fcvtl v1.4s, v1.4h
+; CHECK-GI-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-GI-NEXT:    fcvtzs v1.4s, v1.4s
+; CHECK-GI-NEXT:    mov s2, v0.s[1]
+; CHECK-GI-NEXT:    mov s3, v0.s[2]
+; CHECK-GI-NEXT:    mov s4, v0.s[3]
+; CHECK-GI-NEXT:    fmov w0, s0
+; CHECK-GI-NEXT:    mov s5, v1.s[1]
+; CHECK-GI-NEXT:    fmov w1, s2
+; CHECK-GI-NEXT:    fmov w2, s3
+; CHECK-GI-NEXT:    fmov w3, s4
+; CHECK-GI-NEXT:    fmov w4, s1
+; CHECK-GI-NEXT:    fmov w5, s5
+; CHECK-GI-NEXT:    ret
     %x = call <6 x i32> @llvm.fptosi.sat.v6f16.v6i32(<6 x half> %f)
     ret <6 x i32> %x
 }
 
 define <7 x i32> @test_signed_v7f16_v7i32(<7 x half> %f) {
-; CHECK-LABEL: test_signed_v7f16_v7i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtl v1.4s, v0.4h
-; CHECK-NEXT:    fcvtl2 v0.4s, v0.8h
-; CHECK-NEXT:    fcvtzs v1.4s, v1.4s
-; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-NEXT:    mov w1, v1.s[1]
-; CHECK-NEXT:    mov w2, v1.s[2]
-; CHECK-NEXT:    mov w3, v1.s[3]
-; CHECK-NEXT:    mov w5, v0.s[1]
-; CHECK-NEXT:    mov w6, v0.s[2]
-; CHECK-NEXT:    fmov w0, s1
-; CHECK-NEXT:    fmov w4, s0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v7f16_v7i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtl v1.4s, v0.4h
+; CHECK-SD-NEXT:    fcvtl2 v0.4s, v0.8h
+; CHECK-SD-NEXT:    fcvtzs v1.4s, v1.4s
+; CHECK-SD-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-SD-NEXT:    mov w1, v1.s[1]
+; CHECK-SD-NEXT:    mov w2, v1.s[2]
+; CHECK-SD-NEXT:    mov w3, v1.s[3]
+; CHECK-SD-NEXT:    mov w5, v0.s[1]
+; CHECK-SD-NEXT:    mov w6, v0.s[2]
+; CHECK-SD-NEXT:    fmov w0, s1
+; CHECK-SD-NEXT:    fmov w4, s0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_v7f16_v7i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov v1.h[0], v0.h[4]
+; CHECK-GI-NEXT:    mov v1.h[1], v0.h[5]
+; CHECK-GI-NEXT:    mov v1.h[2], v0.h[6]
+; CHECK-GI-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-GI-NEXT:    fcvtl v1.4s, v1.4h
+; CHECK-GI-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-GI-NEXT:    fcvtzs v1.4s, v1.4s
+; CHECK-GI-NEXT:    mov s2, v0.s[1]
+; CHECK-GI-NEXT:    mov s3, v0.s[2]
+; CHECK-GI-NEXT:    mov s4, v0.s[3]
+; CHECK-GI-NEXT:    fmov w0, s0
+; CHECK-GI-NEXT:    mov s5, v1.s[1]
+; CHECK-GI-NEXT:    mov s6, v1.s[2]
+; CHECK-GI-NEXT:    fmov w1, s2
+; CHECK-GI-NEXT:    fmov w2, s3
+; CHECK-GI-NEXT:    fmov w3, s4
+; CHECK-GI-NEXT:    fmov w4, s1
+; CHECK-GI-NEXT:    fmov w5, s5
+; CHECK-GI-NEXT:    fmov w6, s6
+; CHECK-GI-NEXT:    ret
     %x = call <7 x i32> @llvm.fptosi.sat.v7f16.v7i32(<7 x half> %f)
     ret <7 x i32> %x
 }
 
 define <8 x i32> @test_signed_v8f16_v8i32(<8 x half> %f) {
-; CHECK-LABEL: test_signed_v8f16_v8i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtl2 v1.4s, v0.8h
-; CHECK-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-NEXT:    fcvtzs v1.4s, v1.4s
-; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v8f16_v8i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtl2 v1.4s, v0.8h
+; CHECK-SD-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-SD-NEXT:    fcvtzs v1.4s, v1.4s
+; CHECK-SD-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_v8f16_v8i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcvtl v1.4s, v0.4h
+; CHECK-GI-NEXT:    fcvtl2 v2.4s, v0.8h
+; CHECK-GI-NEXT:    fcvtzs v0.4s, v1.4s
+; CHECK-GI-NEXT:    fcvtzs v1.4s, v2.4s
+; CHECK-GI-NEXT:    ret
     %x = call <8 x i32> @llvm.fptosi.sat.v8f16.v8i32(<8 x half> %f)
     ret <8 x i32> %x
 }
@@ -693,66 +1449,111 @@ declare <2 x i100> @llvm.fptosi.sat.v2f32.v2i100(<2 x float>)
 declare <2 x i128> @llvm.fptosi.sat.v2f32.v2i128(<2 x float>)
 
 define <2 x i1> @test_signed_v2f32_v2i1(<2 x float> %f) {
-; CHECK-LABEL: test_signed_v2f32_v2i1:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v1.2d, #0000000000000000
-; CHECK-NEXT:    fcvtzs v0.2s, v0.2s
-; CHECK-NEXT:    movi v2.2d, #0xffffffffffffffff
-; CHECK-NEXT:    smin v0.2s, v0.2s, v1.2s
-; CHECK-NEXT:    smax v0.2s, v0.2s, v2.2s
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v2f32_v2i1:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-SD-NEXT:    fcvtzs v0.2s, v0.2s
+; CHECK-SD-NEXT:    movi v2.2d, #0xffffffffffffffff
+; CHECK-SD-NEXT:    smin v0.2s, v0.2s, v1.2s
+; CHECK-SD-NEXT:    smax v0.2s, v0.2s, v2.2s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_v2f32_v2i1:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-GI-NEXT:    fcvtzs v0.2s, v0.2s
+; CHECK-GI-NEXT:    movi d2, #0xffffffffffffffff
+; CHECK-GI-NEXT:    smin v0.2s, v0.2s, v1.2s
+; CHECK-GI-NEXT:    smax v0.2s, v0.2s, v2.2s
+; CHECK-GI-NEXT:    ret
     %x = call <2 x i1> @llvm.fptosi.sat.v2f32.v2i1(<2 x float> %f)
     ret <2 x i1> %x
 }
 
 define <2 x i8> @test_signed_v2f32_v2i8(<2 x float> %f) {
-; CHECK-LABEL: test_signed_v2f32_v2i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v1.2s, #127
-; CHECK-NEXT:    fcvtzs v0.2s, v0.2s
-; CHECK-NEXT:    smin v0.2s, v0.2s, v1.2s
-; CHECK-NEXT:    mvni v1.2s, #127
-; CHECK-NEXT:    smax v0.2s, v0.2s, v1.2s
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v2f32_v2i8:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v1.2s, #127
+; CHECK-SD-NEXT:    fcvtzs v0.2s, v0.2s
+; CHECK-SD-NEXT:    smin v0.2s, v0.2s, v1.2s
+; CHECK-SD-NEXT:    mvni v1.2s, #127
+; CHECK-SD-NEXT:    smax v0.2s, v0.2s, v1.2s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_v2f32_v2i8:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v1.2s, #127
+; CHECK-GI-NEXT:    fcvtzs v0.2s, v0.2s
+; CHECK-GI-NEXT:    mvni v2.2s, #127
+; CHECK-GI-NEXT:    smin v0.2s, v0.2s, v1.2s
+; CHECK-GI-NEXT:    smax v0.2s, v0.2s, v2.2s
+; CHECK-GI-NEXT:    ret
     %x = call <2 x i8> @llvm.fptosi.sat.v2f32.v2i8(<2 x float> %f)
     ret <2 x i8> %x
 }
 
 define <2 x i13> @test_signed_v2f32_v2i13(<2 x float> %f) {
-; CHECK-LABEL: test_signed_v2f32_v2i13:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v1.2s, #15, msl #8
-; CHECK-NEXT:    fcvtzs v0.2s, v0.2s
-; CHECK-NEXT:    smin v0.2s, v0.2s, v1.2s
-; CHECK-NEXT:    mvni v1.2s, #15, msl #8
-; CHECK-NEXT:    smax v0.2s, v0.2s, v1.2s
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v2f32_v2i13:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v1.2s, #15, msl #8
+; CHECK-SD-NEXT:    fcvtzs v0.2s, v0.2s
+; CHECK-SD-NEXT:    smin v0.2s, v0.2s, v1.2s
+; CHECK-SD-NEXT:    mvni v1.2s, #15, msl #8
+; CHECK-SD-NEXT:    smax v0.2s, v0.2s, v1.2s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_v2f32_v2i13:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v1.2s, #15, msl #8
+; CHECK-GI-NEXT:    fcvtzs v0.2s, v0.2s
+; CHECK-GI-NEXT:    mvni v2.2s, #15, msl #8
+; CHECK-GI-NEXT:    smin v0.2s, v0.2s, v1.2s
+; CHECK-GI-NEXT:    smax v0.2s, v0.2s, v2.2s
+; CHECK-GI-NEXT:    ret
     %x = call <2 x i13> @llvm.fptosi.sat.v2f32.v2i13(<2 x float> %f)
     ret <2 x i13> %x
 }
 
 define <2 x i16> @test_signed_v2f32_v2i16(<2 x float> %f) {
-; CHECK-LABEL: test_signed_v2f32_v2i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v1.2s, #127, msl #8
-; CHECK-NEXT:    fcvtzs v0.2s, v0.2s
-; CHECK-NEXT:    smin v0.2s, v0.2s, v1.2s
-; CHECK-NEXT:    mvni v1.2s, #127, msl #8
-; CHECK-NEXT:    smax v0.2s, v0.2s, v1.2s
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v2f32_v2i16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v1.2s, #127, msl #8
+; CHECK-SD-NEXT:    fcvtzs v0.2s, v0.2s
+; CHECK-SD-NEXT:    smin v0.2s, v0.2s, v1.2s
+; CHECK-SD-NEXT:    mvni v1.2s, #127, msl #8
+; CHECK-SD-NEXT:    smax v0.2s, v0.2s, v1.2s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_v2f32_v2i16:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v1.2s, #127, msl #8
+; CHECK-GI-NEXT:    fcvtzs v0.2s, v0.2s
+; CHECK-GI-NEXT:    mvni v2.2s, #127, msl #8
+; CHECK-GI-NEXT:    smin v0.2s, v0.2s, v1.2s
+; CHECK-GI-NEXT:    smax v0.2s, v0.2s, v2.2s
+; CHECK-GI-NEXT:    ret
     %x = call <2 x i16> @llvm.fptosi.sat.v2f32.v2i16(<2 x float> %f)
     ret <2 x i16> %x
 }
 
 define <2 x i19> @test_signed_v2f32_v2i19(<2 x float> %f) {
-; CHECK-LABEL: test_signed_v2f32_v2i19:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v1.2s, #3, msl #16
-; CHECK-NEXT:    fcvtzs v0.2s, v0.2s
-; CHECK-NEXT:    smin v0.2s, v0.2s, v1.2s
-; CHECK-NEXT:    mvni v1.2s, #3, msl #16
-; CHECK-NEXT:    smax v0.2s, v0.2s, v1.2s
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v2f32_v2i19:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v1.2s, #3, msl #16
+; CHECK-SD-NEXT:    fcvtzs v0.2s, v0.2s
+; CHECK-SD-NEXT:    smin v0.2s, v0.2s, v1.2s
+; CHECK-SD-NEXT:    mvni v1.2s, #3, msl #16
+; CHECK-SD-NEXT:    smax v0.2s, v0.2s, v1.2s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_v2f32_v2i19:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v1.2s, #3, msl #16
+; CHECK-GI-NEXT:    fcvtzs v0.2s, v0.2s
+; CHECK-GI-NEXT:    mvni v2.2s, #3, msl #16
+; CHECK-GI-NEXT:    smin v0.2s, v0.2s, v1.2s
+; CHECK-GI-NEXT:    smax v0.2s, v0.2s, v2.2s
+; CHECK-GI-NEXT:    ret
     %x = call <2 x i19> @llvm.fptosi.sat.v2f32.v2i19(<2 x float> %f)
     ret <2 x i19> %x
 }
@@ -767,25 +1568,39 @@ define <2 x i32> @test_signed_v2f32_v2i32_duplicate(<2 x float> %f) {
 }
 
 define <2 x i50> @test_signed_v2f32_v2i50(<2 x float> %f) {
-; CHECK-LABEL: test_signed_v2f32_v2i50:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    mov s1, v0.s[1]
-; CHECK-NEXT:    mov x8, #562949953421311 // =0x1ffffffffffff
-; CHECK-NEXT:    fcvtzs x10, s0
-; CHECK-NEXT:    mov x11, #-562949953421312 // =0xfffe000000000000
-; CHECK-NEXT:    fcvtzs x9, s1
-; CHECK-NEXT:    cmp x9, x8
-; CHECK-NEXT:    csel x9, x9, x8, lt
-; CHECK-NEXT:    cmp x9, x11
-; CHECK-NEXT:    csel x9, x9, x11, gt
-; CHECK-NEXT:    cmp x10, x8
-; CHECK-NEXT:    csel x8, x10, x8, lt
-; CHECK-NEXT:    cmp x8, x11
-; CHECK-NEXT:    csel x8, x8, x11, gt
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    mov v0.d[1], x9
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v2f32_v2i50:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    mov s1, v0.s[1]
+; CHECK-SD-NEXT:    mov x8, #562949953421311 // =0x1ffffffffffff
+; CHECK-SD-NEXT:    fcvtzs x10, s0
+; CHECK-SD-NEXT:    mov x11, #-562949953421312 // =0xfffe000000000000
+; CHECK-SD-NEXT:    fcvtzs x9, s1
+; CHECK-SD-NEXT:    cmp x9, x8
+; CHECK-SD-NEXT:    csel x9, x9, x8, lt
+; CHECK-SD-NEXT:    cmp x9, x11
+; CHECK-SD-NEXT:    csel x9, x9, x11, gt
+; CHECK-SD-NEXT:    cmp x10, x8
+; CHECK-SD-NEXT:    csel x8, x10, x8, lt
+; CHECK-SD-NEXT:    cmp x8, x11
+; CHECK-SD-NEXT:    csel x8, x8, x11, gt
+; CHECK-SD-NEXT:    fmov d0, x8
+; CHECK-SD-NEXT:    mov v0.d[1], x9
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_v2f32_v2i50:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcvtl v0.2d, v0.2s
+; CHECK-GI-NEXT:    adrp x8, .LCPI32_1
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI32_1]
+; CHECK-GI-NEXT:    adrp x8, .LCPI32_0
+; CHECK-GI-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-GI-NEXT:    cmgt v2.2d, v1.2d, v0.2d
+; CHECK-GI-NEXT:    bif v0.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI32_0]
+; CHECK-GI-NEXT:    cmgt v2.2d, v0.2d, v1.2d
+; CHECK-GI-NEXT:    bif v0.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT:    ret
     %x = call <2 x i50> @llvm.fptosi.sat.v2f32.v2i50(<2 x float> %f)
     ret <2 x i50> %x
 }
@@ -801,125 +1616,241 @@ define <2 x i64> @test_signed_v2f32_v2i64(<2 x float> %f) {
 }
 
 define <2 x i100> @test_signed_v2f32_v2i100(<2 x float> %f) {
-; CHECK-LABEL: test_signed_v2f32_v2i100:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #80
-; CHECK-NEXT:    str d10, [sp, #16] // 8-byte Folded Spill
-; CHECK-NEXT:    stp d9, d8, [sp, #24] // 16-byte Folded Spill
-; CHECK-NEXT:    str x30, [sp, #40] // 8-byte Folded Spill
-; CHECK-NEXT:    stp x22, x21, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x20, x19, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 80
-; CHECK-NEXT:    .cfi_offset w19, -8
-; CHECK-NEXT:    .cfi_offset w20, -16
-; CHECK-NEXT:    .cfi_offset w21, -24
-; CHECK-NEXT:    .cfi_offset w22, -32
-; CHECK-NEXT:    .cfi_offset w30, -40
-; CHECK-NEXT:    .cfi_offset b8, -48
-; CHECK-NEXT:    .cfi_offset b9, -56
-; CHECK-NEXT:    .cfi_offset b10, -64
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    movi v9.2s, #241, lsl #24
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    mov w8, #1895825407 // =0x70ffffff
-; CHECK-NEXT:    fmov s10, w8
-; CHECK-NEXT:    mov x21, #-34359738368 // =0xfffffff800000000
-; CHECK-NEXT:    mov x22, #34359738367 // =0x7ffffffff
-; CHECK-NEXT:    mov s8, v0.s[1]
-; CHECK-NEXT:    fcmp s0, s9
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, x21, x1, lt
-; CHECK-NEXT:    fcmp s0, s10
-; CHECK-NEXT:    csel x9, x22, x9, gt
-; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    fcmp s0, s0
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    csel x19, xzr, x8, vs
-; CHECK-NEXT:    csel x20, xzr, x9, vs
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    ldr x30, [sp, #40] // 8-byte Folded Reload
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, x21, x1, lt
-; CHECK-NEXT:    fcmp s8, s10
-; CHECK-NEXT:    mov x0, x19
-; CHECK-NEXT:    mov x1, x20
-; CHECK-NEXT:    ldr d10, [sp, #16] // 8-byte Folded Reload
-; CHECK-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x9, x22, x9, gt
-; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    fcmp s8, s8
-; CHECK-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp d9, d8, [sp, #24] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x2, xzr, x8, vs
-; CHECK-NEXT:    csel x3, xzr, x9, vs
-; CHECK-NEXT:    add sp, sp, #80
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v2f32_v2i100:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sub sp, sp, #80
+; CHECK-SD-NEXT:    str d10, [sp, #16] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    stp d9, d8, [sp, #24] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    str x30, [sp, #40] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    stp x22, x21, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 80
+; CHECK-SD-NEXT:    .cfi_offset w19, -8
+; CHECK-SD-NEXT:    .cfi_offset w20, -16
+; CHECK-SD-NEXT:    .cfi_offset w21, -24
+; CHECK-SD-NEXT:    .cfi_offset w22, -32
+; CHECK-SD-NEXT:    .cfi_offset w30, -40
+; CHECK-SD-NEXT:    .cfi_offset b8, -48
+; CHECK-SD-NEXT:    .cfi_offset b9, -56
+; CHECK-SD-NEXT:    .cfi_offset b10, -64
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT:    bl __fixsfti
+; CHECK-SD-NEXT:    movi v9.2s, #241, lsl #24
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov w8, #1895825407 // =0x70ffffff
+; CHECK-SD-NEXT:    fmov s10, w8
+; CHECK-SD-NEXT:    mov x21, #-34359738368 // =0xfffffff800000000
+; CHECK-SD-NEXT:    mov x22, #34359738367 // =0x7ffffffff
+; CHECK-SD-NEXT:    mov s8, v0.s[1]
+; CHECK-SD-NEXT:    fcmp s0, s9
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, x21, x1, lt
+; CHECK-SD-NEXT:    fcmp s0, s10
+; CHECK-SD-NEXT:    csel x9, x22, x9, gt
+; CHECK-SD-NEXT:    csinv x8, x8, xzr, le
+; CHECK-SD-NEXT:    fcmp s0, s0
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    csel x19, xzr, x8, vs
+; CHECK-SD-NEXT:    csel x20, xzr, x9, vs
+; CHECK-SD-NEXT:    bl __fixsfti
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    ldr x30, [sp, #40] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, x21, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s10
+; CHECK-SD-NEXT:    mov x0, x19
+; CHECK-SD-NEXT:    mov x1, x20
+; CHECK-SD-NEXT:    ldr d10, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csel x9, x22, x9, gt
+; CHECK-SD-NEXT:    csinv x8, x8, xzr, le
+; CHECK-SD-NEXT:    fcmp s8, s8
+; CHECK-SD-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp d9, d8, [sp, #24] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csel x2, xzr, x8, vs
+; CHECK-SD-NEXT:    csel x3, xzr, x9, vs
+; CHECK-SD-NEXT:    add sp, sp, #80
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_v2f32_v2i100:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    sub sp, sp, #80
+; CHECK-GI-NEXT:    str d10, [sp, #16] // 8-byte Folded Spill
+; CHECK-GI-NEXT:    stp d9, d8, [sp, #24] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    str x30, [sp, #40] // 8-byte Folded Spill
+; CHECK-GI-NEXT:    stp x22, x21, [sp, #48] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 80
+; CHECK-GI-NEXT:    .cfi_offset w19, -8
+; CHECK-GI-NEXT:    .cfi_offset w20, -16
+; CHECK-GI-NEXT:    .cfi_offset w21, -24
+; CHECK-GI-NEXT:    .cfi_offset w22, -32
+; CHECK-GI-NEXT:    .cfi_offset w30, -40
+; CHECK-GI-NEXT:    .cfi_offset b8, -48
+; CHECK-GI-NEXT:    .cfi_offset b9, -56
+; CHECK-GI-NEXT:    .cfi_offset b10, -64
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    mov s8, v0.s[1]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-GI-NEXT:    bl __fixsfti
+; CHECK-GI-NEXT:    movi v9.2s, #241, lsl #24
+; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov w8, #1895825407 // =0x70ffffff
+; CHECK-GI-NEXT:    fmov s10, w8
+; CHECK-GI-NEXT:    mov x21, #34359738368 // =0x800000000
+; CHECK-GI-NEXT:    mov x22, #34359738367 // =0x7ffffffff
+; CHECK-GI-NEXT:    fcmp s0, s9
+; CHECK-GI-NEXT:    csel x8, xzr, x0, lt
+; CHECK-GI-NEXT:    csel x9, x21, x1, lt
+; CHECK-GI-NEXT:    fcmp s0, s10
+; CHECK-GI-NEXT:    csinv x8, x8, xzr, le
+; CHECK-GI-NEXT:    csel x9, x22, x9, gt
+; CHECK-GI-NEXT:    fcmp s0, s0
+; CHECK-GI-NEXT:    fmov s0, s8
+; CHECK-GI-NEXT:    csel x19, xzr, x8, vs
+; CHECK-GI-NEXT:    csel x20, xzr, x9, vs
+; CHECK-GI-NEXT:    bl __fixsfti
+; CHECK-GI-NEXT:    fcmp s8, s9
+; CHECK-GI-NEXT:    ldr x30, [sp, #40] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    csel x8, xzr, x0, lt
+; CHECK-GI-NEXT:    csel x9, x21, x1, lt
+; CHECK-GI-NEXT:    fcmp s8, s10
+; CHECK-GI-NEXT:    mov x0, x19
+; CHECK-GI-NEXT:    mov x1, x20
+; CHECK-GI-NEXT:    ldr d10, [sp, #16] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    csinv x8, x8, xzr, le
+; CHECK-GI-NEXT:    csel x9, x22, x9, gt
+; CHECK-GI-NEXT:    fcmp s8, s8
+; CHECK-GI-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #24] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    csel x2, xzr, x8, vs
+; CHECK-GI-NEXT:    csel x3, xzr, x9, vs
+; CHECK-GI-NEXT:    add sp, sp, #80
+; CHECK-GI-NEXT:    ret
     %x = call <2 x i100> @llvm.fptosi.sat.v2f32.v2i100(<2 x float> %f)
     ret <2 x i100> %x
 }
 
 define <2 x i128> @test_signed_v2f32_v2i128(<2 x float> %f) {
-; CHECK-LABEL: test_signed_v2f32_v2i128:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #80
-; CHECK-NEXT:    str d10, [sp, #16] // 8-byte Folded Spill
-; CHECK-NEXT:    stp d9, d8, [sp, #24] // 16-byte Folded Spill
-; CHECK-NEXT:    str x30, [sp, #40] // 8-byte Folded Spill
-; CHECK-NEXT:    stp x22, x21, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x20, x19, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 80
-; CHECK-NEXT:    .cfi_offset w19, -8
-; CHECK-NEXT:    .cfi_offset w20, -16
-; CHECK-NEXT:    .cfi_offset w21, -24
-; CHECK-NEXT:    .cfi_offset w22, -32
-; CHECK-NEXT:    .cfi_offset w30, -40
-; CHECK-NEXT:    .cfi_offset b8, -48
-; CHECK-NEXT:    .cfi_offset b9, -56
-; CHECK-NEXT:    .cfi_offset b10, -64
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    movi v9.2s, #255, lsl #24
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    mov w8, #2130706431 // =0x7effffff
-; CHECK-NEXT:    fmov s10, w8
-; CHECK-NEXT:    mov x21, #-9223372036854775808 // =0x8000000000000000
-; CHECK-NEXT:    mov x22, #9223372036854775807 // =0x7fffffffffffffff
-; CHECK-NEXT:    mov s8, v0.s[1]
-; CHECK-NEXT:    fcmp s0, s9
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, x21, x1, lt
-; CHECK-NEXT:    fcmp s0, s10
-; CHECK-NEXT:    csel x9, x22, x9, gt
-; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    fcmp s0, s0
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    csel x19, xzr, x8, vs
-; CHECK-NEXT:    csel x20, xzr, x9, vs
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    ldr x30, [sp, #40] // 8-byte Folded Reload
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, x21, x1, lt
-; CHECK-NEXT:    fcmp s8, s10
-; CHECK-NEXT:    mov x0, x19
-; CHECK-NEXT:    mov x1, x20
-; CHECK-NEXT:    ldr d10, [sp, #16] // 8-byte Folded Reload
-; CHECK-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x9, x22, x9, gt
-; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    fcmp s8, s8
-; CHECK-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp d9, d8, [sp, #24] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x2, xzr, x8, vs
-; CHECK-NEXT:    csel x3, xzr, x9, vs
-; CHECK-NEXT:    add sp, sp, #80
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v2f32_v2i128:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sub sp, sp, #80
+; CHECK-SD-NEXT:    str d10, [sp, #16] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    stp d9, d8, [sp, #24] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    str x30, [sp, #40] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    stp x22, x21, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 80
+; CHECK-SD-NEXT:    .cfi_offset w19, -8
+; CHECK-SD-NEXT:    .cfi_offset w20, -16
+; CHECK-SD-NEXT:    .cfi_offset w21, -24
+; CHECK-SD-NEXT:    .cfi_offset w22, -32
+; CHECK-SD-NEXT:    .cfi_offset w30, -40
+; CHECK-SD-NEXT:    .cfi_offset b8, -48
+; CHECK-SD-NEXT:    .cfi_offset b9, -56
+; CHECK-SD-NEXT:    .cfi_offset b10, -64
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT:    bl __fixsfti
+; CHECK-SD-NEXT:    movi v9.2s, #255, lsl #24
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov w8, #2130706431 // =0x7effffff
+; CHECK-SD-NEXT:    fmov s10, w8
+; CHECK-SD-NEXT:    mov x21, #-9223372036854775808 // =0x8000000000000000
+; CHECK-SD-NEXT:    mov x22, #9223372036854775807 // =0x7fffffffffffffff
+; CHECK-SD-NEXT:    mov s8, v0.s[1]
+; CHECK-SD-NEXT:    fcmp s0, s9
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, x21, x1, lt
+; CHECK-SD-NEXT:    fcmp s0, s10
+; CHECK-SD-NEXT:    csel x9, x22, x9, gt
+; CHECK-SD-NEXT:    csinv x8, x8, xzr, le
+; CHECK-SD-NEXT:    fcmp s0, s0
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    csel x19, xzr, x8, vs
+; CHECK-SD-NEXT:    csel x20, xzr, x9, vs
+; CHECK-SD-NEXT:    bl __fixsfti
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    ldr x30, [sp, #40] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, x21, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s10
+; CHECK-SD-NEXT:    mov x0, x19
+; CHECK-SD-NEXT:    mov x1, x20
+; CHECK-SD-NEXT:    ldr d10, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csel x9, x22, x9, gt
+; CHECK-SD-NEXT:    csinv x8, x8, xzr, le
+; CHECK-SD-NEXT:    fcmp s8, s8
+; CHECK-SD-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp d9, d8, [sp, #24] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csel x2, xzr, x8, vs
+; CHECK-SD-NEXT:    csel x3, xzr, x9, vs
+; CHECK-SD-NEXT:    add sp, sp, #80
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_v2f32_v2i128:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    sub sp, sp, #80
+; CHECK-GI-NEXT:    str d10, [sp, #16] // 8-byte Folded Spill
+; CHECK-GI-NEXT:    stp d9, d8, [sp, #24] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    str x30, [sp, #40] // 8-byte Folded Spill
+; CHECK-GI-NEXT:    stp x22, x21, [sp, #48] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 80
+; CHECK-GI-NEXT:    .cfi_offset w19, -8
+; CHECK-GI-NEXT:    .cfi_offset w20, -16
+; CHECK-GI-NEXT:    .cfi_offset w21, -24
+; CHECK-GI-NEXT:    .cfi_offset w22, -32
+; CHECK-GI-NEXT:    .cfi_offset w30, -40
+; CHECK-GI-NEXT:    .cfi_offset b8, -48
+; CHECK-GI-NEXT:    .cfi_offset b9, -56
+; CHECK-GI-NEXT:    .cfi_offset b10, -64
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    mov s8, v0.s[1]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-GI-NEXT:    bl __fixsfti
+; CHECK-GI-NEXT:    movi v9.2s, #255, lsl #24
+; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov w8, #2130706431 // =0x7effffff
+; CHECK-GI-NEXT:    fmov s10, w8
+; CHECK-GI-NEXT:    mov x21, #-9223372036854775808 // =0x8000000000000000
+; CHECK-GI-NEXT:    mov x22, #9223372036854775807 // =0x7fffffffffffffff
+; CHECK-GI-NEXT:    fcmp s0, s9
+; CHECK-GI-NEXT:    csel x8, xzr, x0, lt
+; CHECK-GI-NEXT:    csel x9, x21, x1, lt
+; CHECK-GI-NEXT:    fcmp s0, s10
+; CHECK-GI-NEXT:    csinv x8, x8, xzr, le
+; CHECK-GI-NEXT:    csel x9, x22, x9, gt
+; CHECK-GI-NEXT:    fcmp s0, s0
+; CHECK-GI-NEXT:    fmov s0, s8
+; CHECK-GI-NEXT:    csel x19, xzr, x8, vs
+; CHECK-GI-NEXT:    csel x20, xzr, x9, vs
+; CHECK-GI-NEXT:    bl __fixsfti
+; CHECK-GI-NEXT:    fcmp s8, s9
+; CHECK-GI-NEXT:    ldr x30, [sp, #40] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    csel x8, xzr, x0, lt
+; CHECK-GI-NEXT:    csel x9, x21, x1, lt
+; CHECK-GI-NEXT:    fcmp s8, s10
+; CHECK-GI-NEXT:    mov x0, x19
+; CHECK-GI-NEXT:    mov x1, x20
+; CHECK-GI-NEXT:    ldr d10, [sp, #16] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    csinv x8, x8, xzr, le
+; CHECK-GI-NEXT:    csel x9, x22, x9, gt
+; CHECK-GI-NEXT:    fcmp s8, s8
+; CHECK-GI-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #24] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    csel x2, xzr, x8, vs
+; CHECK-GI-NEXT:    csel x3, xzr, x9, vs
+; CHECK-GI-NEXT:    add sp, sp, #80
+; CHECK-GI-NEXT:    ret
     %x = call <2 x i128> @llvm.fptosi.sat.v2f32.v2i128(<2 x float> %f)
     ret <2 x i128> %x
 }
@@ -939,15 +1870,25 @@ declare <4 x i100> @llvm.fptosi.sat.v4f32.v4i100(<4 x float>)
 declare <4 x i128> @llvm.fptosi.sat.v4f32.v4i128(<4 x float>)
 
 define <4 x i1> @test_signed_v4f32_v4i1(<4 x float> %f) {
-; CHECK-LABEL: test_signed_v4f32_v4i1:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v1.2d, #0000000000000000
-; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    movi v1.2d, #0xffffffffffffffff
-; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    xtn v0.4h, v0.4s
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v4f32_v4i1:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-SD-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-SD-NEXT:    smin v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT:    movi v1.2d, #0xffffffffffffffff
+; CHECK-SD-NEXT:    smax v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT:    xtn v0.4h, v0.4s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_v4f32_v4i1:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-GI-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-GI-NEXT:    movi v2.2d, #0xffffffffffffffff
+; CHECK-GI-NEXT:    smin v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    smax v0.4s, v0.4s, v2.4s
+; CHECK-GI-NEXT:    xtn v0.4h, v0.4s
+; CHECK-GI-NEXT:    ret
     %x = call <4 x i1> @llvm.fptosi.sat.v4f32.v4i1(<4 x float> %f)
     ret <4 x i1> %x
 }
@@ -981,11 +1922,21 @@ define <4 x i13> @test_signed_v4f32_v4i13(<4 x float> %f) {
 }
 
 define <4 x i16> @test_signed_v4f32_v4i16(<4 x float> %f) {
-; CHECK-LABEL: test_signed_v4f32_v4i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-NEXT:    sqxtn v0.4h, v0.4s
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v4f32_v4i16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-SD-NEXT:    sqxtn v0.4h, v0.4s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_v4f32_v4i16:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v1.4s, #127, msl #8
+; CHECK-GI-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-GI-NEXT:    smin v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    mvni v1.4s, #127, msl #8
+; CHECK-GI-NEXT:    smax v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    xtn v0.4h, v0.4s
+; CHECK-GI-NEXT:    ret
     %x = call <4 x i16> @llvm.fptosi.sat.v4f32.v4i16(<4 x float> %f)
     ret <4 x i16> %x
 }
@@ -1046,213 +1997,415 @@ define <4 x i50> @test_signed_v4f32_v4i50(<4 x float> %f) {
 }
 
 define <4 x i64> @test_signed_v4f32_v4i64(<4 x float> %f) {
-; CHECK-LABEL: test_signed_v4f32_v4i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtl2 v1.2d, v0.4s
-; CHECK-NEXT:    fcvtl v0.2d, v0.2s
-; CHECK-NEXT:    fcvtzs v1.2d, v1.2d
-; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v4f32_v4i64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtl2 v1.2d, v0.4s
+; CHECK-SD-NEXT:    fcvtl v0.2d, v0.2s
+; CHECK-SD-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_v4f32_v4i64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcvtl v1.2d, v0.2s
+; CHECK-GI-NEXT:    fcvtl2 v2.2d, v0.4s
+; CHECK-GI-NEXT:    fcvtzs v0.2d, v1.2d
+; CHECK-GI-NEXT:    fcvtzs v1.2d, v2.2d
+; CHECK-GI-NEXT:    ret
     %x = call <4 x i64> @llvm.fptosi.sat.v4f32.v4i64(<4 x float> %f)
     ret <4 x i64> %x
 }
 
 define <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) {
-; CHECK-LABEL: test_signed_v4f32_v4i100:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #112
-; CHECK-NEXT:    str d10, [sp, #16] // 8-byte Folded Spill
-; CHECK-NEXT:    stp d9, d8, [sp, #24] // 16-byte Folded Spill
-; CHECK-NEXT:    str x30, [sp, #40] // 8-byte Folded Spill
-; CHECK-NEXT:    stp x26, x25, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x24, x23, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x22, x21, [sp, #80] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x20, x19, [sp, #96] // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 112
-; CHECK-NEXT:    .cfi_offset w19, -8
-; CHECK-NEXT:    .cfi_offset w20, -16
-; CHECK-NEXT:    .cfi_offset w21, -24
-; CHECK-NEXT:    .cfi_offset w22, -32
-; CHECK-NEXT:    .cfi_offset w23, -40
-; CHECK-NEXT:    .cfi_offset w24, -48
-; CHECK-NEXT:    .cfi_offset w25, -56
-; CHECK-NEXT:    .cfi_offset w26, -64
-; CHECK-NEXT:    .cfi_offset w30, -72
-; CHECK-NEXT:    .cfi_offset b8, -80
-; CHECK-NEXT:    .cfi_offset b9, -88
-; CHECK-NEXT:    .cfi_offset b10, -96
-; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    movi v9.2s, #241, lsl #24
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    mov w8, #1895825407 // =0x70ffffff
-; CHECK-NEXT:    fmov s10, w8
-; CHECK-NEXT:    mov x25, #-34359738368 // =0xfffffff800000000
-; CHECK-NEXT:    mov x26, #34359738367 // =0x7ffffffff
-; CHECK-NEXT:    mov s8, v0.s[1]
-; CHECK-NEXT:    fcmp s0, s9
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, x25, x1, lt
-; CHECK-NEXT:    fcmp s0, s10
-; CHECK-NEXT:    csel x9, x26, x9, gt
-; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    fcmp s0, s0
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    csel x19, xzr, x8, vs
-; CHECK-NEXT:    csel x20, xzr, x9, vs
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, x25, x1, lt
-; CHECK-NEXT:    fcmp s8, s10
-; CHECK-NEXT:    csel x9, x26, x9, gt
-; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    fcmp s8, s8
-; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
-; CHECK-NEXT:    csel x21, xzr, x8, vs
-; CHECK-NEXT:    csel x22, xzr, x9, vs
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    fcmp s0, s9
-; CHECK-NEXT:    mov s8, v0.s[1]
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, x25, x1, lt
-; CHECK-NEXT:    fcmp s0, s10
-; CHECK-NEXT:    csel x9, x26, x9, gt
-; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    fcmp s0, s0
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    csel x23, xzr, x8, vs
-; CHECK-NEXT:    csel x24, xzr, x9, vs
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    mov x2, x21
-; CHECK-NEXT:    mov x3, x22
-; CHECK-NEXT:    mov x4, x23
-; CHECK-NEXT:    mov x5, x24
-; CHECK-NEXT:    ldr x30, [sp, #40] // 8-byte Folded Reload
-; CHECK-NEXT:    ldp x22, x21, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, x25, x1, lt
-; CHECK-NEXT:    fcmp s8, s10
-; CHECK-NEXT:    mov x0, x19
-; CHECK-NEXT:    mov x1, x20
-; CHECK-NEXT:    ldr d10, [sp, #16] // 8-byte Folded Reload
-; CHECK-NEXT:    ldp x20, x19, [sp, #96] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x9, x26, x9, gt
-; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    fcmp s8, s8
-; CHECK-NEXT:    ldp x24, x23, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp x26, x25, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp d9, d8, [sp, #24] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x6, xzr, x8, vs
-; CHECK-NEXT:    csel x7, xzr, x9, vs
-; CHECK-NEXT:    add sp, sp, #112
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v4f32_v4i100:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sub sp, sp, #112
+; CHECK-SD-NEXT:    str d10, [sp, #16] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    stp d9, d8, [sp, #24] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    str x30, [sp, #40] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    stp x26, x25, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x24, x23, [sp, #64] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x22, x21, [sp, #80] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x20, x19, [sp, #96] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 112
+; CHECK-SD-NEXT:    .cfi_offset w19, -8
+; CHECK-SD-NEXT:    .cfi_offset w20, -16
+; CHECK-SD-NEXT:    .cfi_offset w21, -24
+; CHECK-SD-NEXT:    .cfi_offset w22, -32
+; CHECK-SD-NEXT:    .cfi_offset w23, -40
+; CHECK-SD-NEXT:    .cfi_offset w24, -48
+; CHECK-SD-NEXT:    .cfi_offset w25, -56
+; CHECK-SD-NEXT:    .cfi_offset w26, -64
+; CHECK-SD-NEXT:    .cfi_offset w30, -72
+; CHECK-SD-NEXT:    .cfi_offset b8, -80
+; CHECK-SD-NEXT:    .cfi_offset b9, -88
+; CHECK-SD-NEXT:    .cfi_offset b10, -96
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT:    bl __fixsfti
+; CHECK-SD-NEXT:    movi v9.2s, #241, lsl #24
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov w8, #1895825407 // =0x70ffffff
+; CHECK-SD-NEXT:    fmov s10, w8
+; CHECK-SD-NEXT:    mov x25, #-34359738368 // =0xfffffff800000000
+; CHECK-SD-NEXT:    mov x26, #34359738367 // =0x7ffffffff
+; CHECK-SD-NEXT:    mov s8, v0.s[1]
+; CHECK-SD-NEXT:    fcmp s0, s9
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, x25, x1, lt
+; CHECK-SD-NEXT:    fcmp s0, s10
+; CHECK-SD-NEXT:    csel x9, x26, x9, gt
+; CHECK-SD-NEXT:    csinv x8, x8, xzr, le
+; CHECK-SD-NEXT:    fcmp s0, s0
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    csel x19, xzr, x8, vs
+; CHECK-SD-NEXT:    csel x20, xzr, x9, vs
+; CHECK-SD-NEXT:    bl __fixsfti
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, x25, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s10
+; CHECK-SD-NEXT:    csel x9, x26, x9, gt
+; CHECK-SD-NEXT:    csinv x8, x8, xzr, le
+; CHECK-SD-NEXT:    fcmp s8, s8
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT:    csel x21, xzr, x8, vs
+; CHECK-SD-NEXT:    csel x22, xzr, x9, vs
+; CHECK-SD-NEXT:    bl __fixsfti
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    fcmp s0, s9
+; CHECK-SD-NEXT:    mov s8, v0.s[1]
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, x25, x1, lt
+; CHECK-SD-NEXT:    fcmp s0, s10
+; CHECK-SD-NEXT:    csel x9, x26, x9, gt
+; CHECK-SD-NEXT:    csinv x8, x8, xzr, le
+; CHECK-SD-NEXT:    fcmp s0, s0
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    csel x23, xzr, x8, vs
+; CHECK-SD-NEXT:    csel x24, xzr, x9, vs
+; CHECK-SD-NEXT:    bl __fixsfti
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    mov x2, x21
+; CHECK-SD-NEXT:    mov x3, x22
+; CHECK-SD-NEXT:    mov x4, x23
+; CHECK-SD-NEXT:    mov x5, x24
+; CHECK-SD-NEXT:    ldr x30, [sp, #40] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x22, x21, [sp, #80] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, x25, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s10
+; CHECK-SD-NEXT:    mov x0, x19
+; CHECK-SD-NEXT:    mov x1, x20
+; CHECK-SD-NEXT:    ldr d10, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x20, x19, [sp, #96] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csel x9, x26, x9, gt
+; CHECK-SD-NEXT:    csinv x8, x8, xzr, le
+; CHECK-SD-NEXT:    fcmp s8, s8
+; CHECK-SD-NEXT:    ldp x24, x23, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x26, x25, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp d9, d8, [sp, #24] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csel x6, xzr, x8, vs
+; CHECK-SD-NEXT:    csel x7, xzr, x9, vs
+; CHECK-SD-NEXT:    add sp, sp, #112
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_v4f32_v4i100:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    sub sp, sp, #128
+; CHECK-GI-NEXT:    str d12, [sp, #16] // 8-byte Folded Spill
+; CHECK-GI-NEXT:    stp d11, d10, [sp, #24] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp d9, d8, [sp, #40] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    str x30, [sp, #56] // 8-byte Folded Spill
+; CHECK-GI-NEXT:    stp x26, x25, [sp, #64] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp x24, x23, [sp, #80] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp x22, x21, [sp, #96] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp x20, x19, [sp, #112] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 128
+; CHECK-GI-NEXT:    .cfi_offset w19, -8
+; CHECK-GI-NEXT:    .cfi_offset w20, -16
+; CHECK-GI-NEXT:    .cfi_offset w21, -24
+; CHECK-GI-NEXT:    .cfi_offset w22, -32
+; CHECK-GI-NEXT:    .cfi_offset w23, -40
+; CHECK-GI-NEXT:    .cfi_offset w24, -48
+; CHECK-GI-NEXT:    .cfi_offset w25, -56
+; CHECK-GI-NEXT:    .cfi_offset w26, -64
+; CHECK-GI-NEXT:    .cfi_offset w30, -72
+; CHECK-GI-NEXT:    .cfi_offset b8, -80
+; CHECK-GI-NEXT:    .cfi_offset b9, -88
+; CHECK-GI-NEXT:    .cfi_offset b10, -96
+; CHECK-GI-NEXT:    .cfi_offset b11, -104
+; CHECK-GI-NEXT:    .cfi_offset b12, -112
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    mov s9, v0.s[1]
+; CHECK-GI-NEXT:    mov s10, v0.s[2]
+; CHECK-GI-NEXT:    mov s8, v0.s[3]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-GI-NEXT:    bl __fixsfti
+; CHECK-GI-NEXT:    movi v11.2s, #241, lsl #24
+; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov w8, #1895825407 // =0x70ffffff
+; CHECK-GI-NEXT:    fmov s12, w8
+; CHECK-GI-NEXT:    mov x25, #34359738368 // =0x800000000
+; CHECK-GI-NEXT:    mov x26, #34359738367 // =0x7ffffffff
+; CHECK-GI-NEXT:    fcmp s0, s11
+; CHECK-GI-NEXT:    csel x8, xzr, x0, lt
+; CHECK-GI-NEXT:    csel x9, x25, x1, lt
+; CHECK-GI-NEXT:    fcmp s0, s12
+; CHECK-GI-NEXT:    csinv x8, x8, xzr, le
+; CHECK-GI-NEXT:    csel x9, x26, x9, gt
+; CHECK-GI-NEXT:    fcmp s0, s0
+; CHECK-GI-NEXT:    fmov s0, s9
+; CHECK-GI-NEXT:    csel x19, xzr, x8, vs
+; CHECK-GI-NEXT:    csel x20, xzr, x9, vs
+; CHECK-GI-NEXT:    bl __fixsfti
+; CHECK-GI-NEXT:    fcmp s9, s11
+; CHECK-GI-NEXT:    fmov s0, s10
+; CHECK-GI-NEXT:    csel x8, xzr, x0, lt
+; CHECK-GI-NEXT:    csel x9, x25, x1, lt
+; CHECK-GI-NEXT:    fcmp s9, s12
+; CHECK-GI-NEXT:    csinv x8, x8, xzr, le
+; CHECK-GI-NEXT:    csel x9, x26, x9, gt
+; CHECK-GI-NEXT:    fcmp s9, s9
+; CHECK-GI-NEXT:    csel x21, xzr, x8, vs
+; CHECK-GI-NEXT:    csel x22, xzr, x9, vs
+; CHECK-GI-NEXT:    bl __fixsfti
+; CHECK-GI-NEXT:    fcmp s10, s11
+; CHECK-GI-NEXT:    fmov s0, s8
+; CHECK-GI-NEXT:    csel x8, xzr, x0, lt
+; CHECK-GI-NEXT:    csel x9, x25, x1, lt
+; CHECK-GI-NEXT:    fcmp s10, s12
+; CHECK-GI-NEXT:    csinv x8, x8, xzr, le
+; CHECK-GI-NEXT:    csel x9, x26, x9, gt
+; CHECK-GI-NEXT:    fcmp s10, s10
+; CHECK-GI-NEXT:    csel x23, xzr, x8, vs
+; CHECK-GI-NEXT:    csel x24, xzr, x9, vs
+; CHECK-GI-NEXT:    bl __fixsfti
+; CHECK-GI-NEXT:    fcmp s8, s11
+; CHECK-GI-NEXT:    mov x2, x21
+; CHECK-GI-NEXT:    mov x3, x22
+; CHECK-GI-NEXT:    mov x4, x23
+; CHECK-GI-NEXT:    mov x5, x24
+; CHECK-GI-NEXT:    ldr x30, [sp, #56] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ldp x22, x21, [sp, #96] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    csel x8, xzr, x0, lt
+; CHECK-GI-NEXT:    csel x9, x25, x1, lt
+; CHECK-GI-NEXT:    fcmp s8, s12
+; CHECK-GI-NEXT:    mov x0, x19
+; CHECK-GI-NEXT:    mov x1, x20
+; CHECK-GI-NEXT:    ldr d12, [sp, #16] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ldp x20, x19, [sp, #112] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    csinv x8, x8, xzr, le
+; CHECK-GI-NEXT:    csel x9, x26, x9, gt
+; CHECK-GI-NEXT:    fcmp s8, s8
+; CHECK-GI-NEXT:    ldp x24, x23, [sp, #80] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp x26, x25, [sp, #64] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #40] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    csel x6, xzr, x8, vs
+; CHECK-GI-NEXT:    ldp d11, d10, [sp, #24] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    csel x7, xzr, x9, vs
+; CHECK-GI-NEXT:    add sp, sp, #128
+; CHECK-GI-NEXT:    ret
     %x = call <4 x i100> @llvm.fptosi.sat.v4f32.v4i100(<4 x float> %f)
     ret <4 x i100> %x
 }
 
 define <4 x i128> @test_signed_v4f32_v4i128(<4 x float> %f) {
-; CHECK-LABEL: test_signed_v4f32_v4i128:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #112
-; CHECK-NEXT:    str d10, [sp, #16] // 8-byte Folded Spill
-; CHECK-NEXT:    stp d9, d8, [sp, #24] // 16-byte Folded Spill
-; CHECK-NEXT:    str x30, [sp, #40] // 8-byte Folded Spill
-; CHECK-NEXT:    stp x26, x25, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x24, x23, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x22, x21, [sp, #80] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x20, x19, [sp, #96] // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 112
-; CHECK-NEXT:    .cfi_offset w19, -8
-; CHECK-NEXT:    .cfi_offset w20, -16
-; CHECK-NEXT:    .cfi_offset w21, -24
-; CHECK-NEXT:    .cfi_offset w22, -32
-; CHECK-NEXT:    .cfi_offset w23, -40
-; CHECK-NEXT:    .cfi_offset w24, -48
-; CHECK-NEXT:    .cfi_offset w25, -56
-; CHECK-NEXT:    .cfi_offset w26, -64
-; CHECK-NEXT:    .cfi_offset w30, -72
-; CHECK-NEXT:    .cfi_offset b8, -80
-; CHECK-NEXT:    .cfi_offset b9, -88
-; CHECK-NEXT:    .cfi_offset b10, -96
-; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    movi v9.2s, #255, lsl #24
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    mov w8, #2130706431 // =0x7effffff
-; CHECK-NEXT:    fmov s10, w8
-; CHECK-NEXT:    mov x25, #-9223372036854775808 // =0x8000000000000000
-; CHECK-NEXT:    mov x26, #9223372036854775807 // =0x7fffffffffffffff
-; CHECK-NEXT:    mov s8, v0.s[1]
-; CHECK-NEXT:    fcmp s0, s9
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, x25, x1, lt
-; CHECK-NEXT:    fcmp s0, s10
-; CHECK-NEXT:    csel x9, x26, x9, gt
-; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    fcmp s0, s0
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    csel x19, xzr, x8, vs
-; CHECK-NEXT:    csel x20, xzr, x9, vs
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, x25, x1, lt
-; CHECK-NEXT:    fcmp s8, s10
-; CHECK-NEXT:    csel x9, x26, x9, gt
-; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    fcmp s8, s8
-; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
-; CHECK-NEXT:    csel x21, xzr, x8, vs
-; CHECK-NEXT:    csel x22, xzr, x9, vs
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    fcmp s0, s9
-; CHECK-NEXT:    mov s8, v0.s[1]
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, x25, x1, lt
-; CHECK-NEXT:    fcmp s0, s10
-; CHECK-NEXT:    csel x9, x26, x9, gt
-; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    fcmp s0, s0
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    csel x23, xzr, x8, vs
-; CHECK-NEXT:    csel x24, xzr, x9, vs
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    mov x2, x21
-; CHECK-NEXT:    mov x3, x22
-; CHECK-NEXT:    mov x4, x23
-; CHECK-NEXT:    mov x5, x24
-; CHECK-NEXT:    ldr x30, [sp, #40] // 8-byte Folded Reload
-; CHECK-NEXT:    ldp x22, x21, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, x25, x1, lt
-; CHECK-NEXT:    fcmp s8, s10
-; CHECK-NEXT:    mov x0, x19
-; CHECK-NEXT:    mov x1, x20
-; CHECK-NEXT:    ldr d10, [sp, #16] // 8-byte Folded Reload
-; CHECK-NEXT:    ldp x20, x19, [sp, #96] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x9, x26, x9, gt
-; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    fcmp s8, s8
-; CHECK-NEXT:    ldp x24, x23, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp x26, x25, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp d9, d8, [sp, #24] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x6, xzr, x8, vs
-; CHECK-NEXT:    csel x7, xzr, x9, vs
-; CHECK-NEXT:    add sp, sp, #112
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v4f32_v4i128:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sub sp, sp, #112
+; CHECK-SD-NEXT:    str d10, [sp, #16] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    stp d9, d8, [sp, #24] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    str x30, [sp, #40] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    stp x26, x25, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x24, x23, [sp, #64] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x22, x21, [sp, #80] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x20, x19, [sp, #96] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 112
+; CHECK-SD-NEXT:    .cfi_offset w19, -8
+; CHECK-SD-NEXT:    .cfi_offset w20, -16
+; CHECK-SD-NEXT:    .cfi_offset w21, -24
+; CHECK-SD-NEXT:    .cfi_offset w22, -32
+; CHECK-SD-NEXT:    .cfi_offset w23, -40
+; CHECK-SD-NEXT:    .cfi_offset w24, -48
+; CHECK-SD-NEXT:    .cfi_offset w25, -56
+; CHECK-SD-NEXT:    .cfi_offset w26, -64
+; CHECK-SD-NEXT:    .cfi_offset w30, -72
+; CHECK-SD-NEXT:    .cfi_offset b8, -80
+; CHECK-SD-NEXT:    .cfi_offset b9, -88
+; CHECK-SD-NEXT:    .cfi_offset b10, -96
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT:    bl __fixsfti
+; CHECK-SD-NEXT:    movi v9.2s, #255, lsl #24
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov w8, #2130706431 // =0x7effffff
+; CHECK-SD-NEXT:    fmov s10, w8
+; CHECK-SD-NEXT:    mov x25, #-9223372036854775808 // =0x8000000000000000
+; CHECK-SD-NEXT:    mov x26, #9223372036854775807 // =0x7fffffffffffffff
+; CHECK-SD-NEXT:    mov s8, v0.s[1]
+; CHECK-SD-NEXT:    fcmp s0, s9
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, x25, x1, lt
+; CHECK-SD-NEXT:    fcmp s0, s10
+; CHECK-SD-NEXT:    csel x9, x26, x9, gt
+; CHECK-SD-NEXT:    csinv x8, x8, xzr, le
+; CHECK-SD-NEXT:    fcmp s0, s0
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    csel x19, xzr, x8, vs
+; CHECK-SD-NEXT:    csel x20, xzr, x9, vs
+; CHECK-SD-NEXT:    bl __fixsfti
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, x25, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s10
+; CHECK-SD-NEXT:    csel x9, x26, x9, gt
+; CHECK-SD-NEXT:    csinv x8, x8, xzr, le
+; CHECK-SD-NEXT:    fcmp s8, s8
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT:    csel x21, xzr, x8, vs
+; CHECK-SD-NEXT:    csel x22, xzr, x9, vs
+; CHECK-SD-NEXT:    bl __fixsfti
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    fcmp s0, s9
+; CHECK-SD-NEXT:    mov s8, v0.s[1]
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, x25, x1, lt
+; CHECK-SD-NEXT:    fcmp s0, s10
+; CHECK-SD-NEXT:    csel x9, x26, x9, gt
+; CHECK-SD-NEXT:    csinv x8, x8, xzr, le
+; CHECK-SD-NEXT:    fcmp s0, s0
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    csel x23, xzr, x8, vs
+; CHECK-SD-NEXT:    csel x24, xzr, x9, vs
+; CHECK-SD-NEXT:    bl __fixsfti
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    mov x2, x21
+; CHECK-SD-NEXT:    mov x3, x22
+; CHECK-SD-NEXT:    mov x4, x23
+; CHECK-SD-NEXT:    mov x5, x24
+; CHECK-SD-NEXT:    ldr x30, [sp, #40] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x22, x21, [sp, #80] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, x25, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s10
+; CHECK-SD-NEXT:    mov x0, x19
+; CHECK-SD-NEXT:    mov x1, x20
+; CHECK-SD-NEXT:    ldr d10, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x20, x19, [sp, #96] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csel x9, x26, x9, gt
+; CHECK-SD-NEXT:    csinv x8, x8, xzr, le
+; CHECK-SD-NEXT:    fcmp s8, s8
+; CHECK-SD-NEXT:    ldp x24, x23, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x26, x25, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp d9, d8, [sp, #24] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csel x6, xzr, x8, vs
+; CHECK-SD-NEXT:    csel x7, xzr, x9, vs
+; CHECK-SD-NEXT:    add sp, sp, #112
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_v4f32_v4i128:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    sub sp, sp, #128
+; CHECK-GI-NEXT:    str d12, [sp, #16] // 8-byte Folded Spill
+; CHECK-GI-NEXT:    stp d11, d10, [sp, #24] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp d9, d8, [sp, #40] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    str x30, [sp, #56] // 8-byte Folded Spill
+; CHECK-GI-NEXT:    stp x26, x25, [sp, #64] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp x24, x23, [sp, #80] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp x22, x21, [sp, #96] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp x20, x19, [sp, #112] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 128
+; CHECK-GI-NEXT:    .cfi_offset w19, -8
+; CHECK-GI-NEXT:    .cfi_offset w20, -16
+; CHECK-GI-NEXT:    .cfi_offset w21, -24
+; CHECK-GI-NEXT:    .cfi_offset w22, -32
+; CHECK-GI-NEXT:    .cfi_offset w23, -40
+; CHECK-GI-NEXT:    .cfi_offset w24, -48
+; CHECK-GI-NEXT:    .cfi_offset w25, -56
+; CHECK-GI-NEXT:    .cfi_offset w26, -64
+; CHECK-GI-NEXT:    .cfi_offset w30, -72
+; CHECK-GI-NEXT:    .cfi_offset b8, -80
+; CHECK-GI-NEXT:    .cfi_offset b9, -88
+; CHECK-GI-NEXT:    .cfi_offset b10, -96
+; CHECK-GI-NEXT:    .cfi_offset b11, -104
+; CHECK-GI-NEXT:    .cfi_offset b12, -112
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    mov s9, v0.s[1]
+; CHECK-GI-NEXT:    mov s10, v0.s[2]
+; CHECK-GI-NEXT:    mov s8, v0.s[3]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-GI-NEXT:    bl __fixsfti
+; CHECK-GI-NEXT:    movi v11.2s, #255, lsl #24
+; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov w8, #2130706431 // =0x7effffff
+; CHECK-GI-NEXT:    fmov s12, w8
+; CHECK-GI-NEXT:    mov x25, #-9223372036854775808 // =0x8000000000000000
+; CHECK-GI-NEXT:    mov x26, #9223372036854775807 // =0x7fffffffffffffff
+; CHECK-GI-NEXT:    fcmp s0, s11
+; CHECK-GI-NEXT:    csel x8, xzr, x0, lt
+; CHECK-GI-NEXT:    csel x9, x25, x1, lt
+; CHECK-GI-NEXT:    fcmp s0, s12
+; CHECK-GI-NEXT:    csinv x8, x8, xzr, le
+; CHECK-GI-NEXT:    csel x9, x26, x9, gt
+; CHECK-GI-NEXT:    fcmp s0, s0
+; CHECK-GI-NEXT:    fmov s0, s9
+; CHECK-GI-NEXT:    csel x19, xzr, x8, vs
+; CHECK-GI-NEXT:    csel x20, xzr, x9, vs
+; CHECK-GI-NEXT:    bl __fixsfti
+; CHECK-GI-NEXT:    fcmp s9, s11
+; CHECK-GI-NEXT:    fmov s0, s10
+; CHECK-GI-NEXT:    csel x8, xzr, x0, lt
+; CHECK-GI-NEXT:    csel x9, x25, x1, lt
+; CHECK-GI-NEXT:    fcmp s9, s12
+; CHECK-GI-NEXT:    csinv x8, x8, xzr, le
+; CHECK-GI-NEXT:    csel x9, x26, x9, gt
+; CHECK-GI-NEXT:    fcmp s9, s9
+; CHECK-GI-NEXT:    csel x21, xzr, x8, vs
+; CHECK-GI-NEXT:    csel x22, xzr, x9, vs
+; CHECK-GI-NEXT:    bl __fixsfti
+; CHECK-GI-NEXT:    fcmp s10, s11
+; CHECK-GI-NEXT:    fmov s0, s8
+; CHECK-GI-NEXT:    csel x8, xzr, x0, lt
+; CHECK-GI-NEXT:    csel x9, x25, x1, lt
+; CHECK-GI-NEXT:    fcmp s10, s12
+; CHECK-GI-NEXT:    csinv x8, x8, xzr, le
+; CHECK-GI-NEXT:    csel x9, x26, x9, gt
+; CHECK-GI-NEXT:    fcmp s10, s10
+; CHECK-GI-NEXT:    csel x23, xzr, x8, vs
+; CHECK-GI-NEXT:    csel x24, xzr, x9, vs
+; CHECK-GI-NEXT:    bl __fixsfti
+; CHECK-GI-NEXT:    fcmp s8, s11
+; CHECK-GI-NEXT:    mov x2, x21
+; CHECK-GI-NEXT:    mov x3, x22
+; CHECK-GI-NEXT:    mov x4, x23
+; CHECK-GI-NEXT:    mov x5, x24
+; CHECK-GI-NEXT:    ldr x30, [sp, #56] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ldp x22, x21, [sp, #96] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    csel x8, xzr, x0, lt
+; CHECK-GI-NEXT:    csel x9, x25, x1, lt
+; CHECK-GI-NEXT:    fcmp s8, s12
+; CHECK-GI-NEXT:    mov x0, x19
+; CHECK-GI-NEXT:    mov x1, x20
+; CHECK-GI-NEXT:    ldr d12, [sp, #16] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ldp x20, x19, [sp, #112] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    csinv x8, x8, xzr, le
+; CHECK-GI-NEXT:    csel x9, x26, x9, gt
+; CHECK-GI-NEXT:    fcmp s8, s8
+; CHECK-GI-NEXT:    ldp x24, x23, [sp, #80] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp x26, x25, [sp, #64] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #40] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    csel x6, xzr, x8, vs
+; CHECK-GI-NEXT:    ldp d11, d10, [sp, #24] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    csel x7, xzr, x9, vs
+; CHECK-GI-NEXT:    add sp, sp, #128
+; CHECK-GI-NEXT:    ret
     %x = call <4 x i128> @llvm.fptosi.sat.v4f32.v4i128(<4 x float> %f)
     ret <4 x i128> %x
 }
@@ -1272,152 +2425,246 @@ declare <2 x i100> @llvm.fptosi.sat.v2f64.v2i100(<2 x double>)
 declare <2 x i128> @llvm.fptosi.sat.v2f64.v2i128(<2 x double>)
 
 define <2 x i1> @test_signed_v2f64_v2i1(<2 x double> %f) {
-; CHECK-LABEL: test_signed_v2f64_v2i1:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov d1, v0.d[1]
-; CHECK-NEXT:    fcvtzs w9, d0
-; CHECK-NEXT:    fcvtzs w8, d1
-; CHECK-NEXT:    ands w8, w8, w8, asr #31
-; CHECK-NEXT:    csinv w8, w8, wzr, ge
-; CHECK-NEXT:    ands w9, w9, w9, asr #31
-; CHECK-NEXT:    csinv w9, w9, wzr, ge
-; CHECK-NEXT:    fmov s0, w9
-; CHECK-NEXT:    mov v0.s[1], w8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v2f64_v2i1:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov d1, v0.d[1]
+; CHECK-SD-NEXT:    fcvtzs w9, d0
+; CHECK-SD-NEXT:    fcvtzs w8, d1
+; CHECK-SD-NEXT:    ands w8, w8, w8, asr #31
+; CHECK-SD-NEXT:    csinv w8, w8, wzr, ge
+; CHECK-SD-NEXT:    ands w9, w9, w9, asr #31
+; CHECK-SD-NEXT:    csinv w9, w9, wzr, ge
+; CHECK-SD-NEXT:    fmov s0, w9
+; CHECK-SD-NEXT:    mov v0.s[1], w8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_v2f64_v2i1:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-GI-NEXT:    movi v2.2d, #0xffffffffffffffff
+; CHECK-GI-NEXT:    cmlt v1.2d, v0.2d, #0
+; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT:    cmgt v1.2d, v0.2d, v2.2d
+; CHECK-GI-NEXT:    bif v0.16b, v2.16b, v1.16b
+; CHECK-GI-NEXT:    xtn v0.2s, v0.2d
+; CHECK-GI-NEXT:    ret
     %x = call <2 x i1> @llvm.fptosi.sat.v2f64.v2i1(<2 x double> %f)
     ret <2 x i1> %x
 }
 
 define <2 x i8> @test_signed_v2f64_v2i8(<2 x double> %f) {
-; CHECK-LABEL: test_signed_v2f64_v2i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov d1, v0.d[1]
-; CHECK-NEXT:    fcvtzs w10, d0
-; CHECK-NEXT:    mov w8, #127 // =0x7f
-; CHECK-NEXT:    mov w11, #-128 // =0xffffff80
-; CHECK-NEXT:    fcvtzs w9, d1
-; CHECK-NEXT:    cmp w9, #127
-; CHECK-NEXT:    csel w9, w9, w8, lt
-; CHECK-NEXT:    cmn w9, #128
-; CHECK-NEXT:    csel w9, w9, w11, gt
-; CHECK-NEXT:    cmp w10, #127
-; CHECK-NEXT:    csel w8, w10, w8, lt
-; CHECK-NEXT:    cmn w8, #128
-; CHECK-NEXT:    csel w8, w8, w11, gt
-; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    mov v0.s[1], w9
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v2f64_v2i8:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov d1, v0.d[1]
+; CHECK-SD-NEXT:    fcvtzs w10, d0
+; CHECK-SD-NEXT:    mov w8, #127 // =0x7f
+; CHECK-SD-NEXT:    mov w11, #-128 // =0xffffff80
+; CHECK-SD-NEXT:    fcvtzs w9, d1
+; CHECK-SD-NEXT:    cmp w9, #127
+; CHECK-SD-NEXT:    csel w9, w9, w8, lt
+; CHECK-SD-NEXT:    cmn w9, #128
+; CHECK-SD-NEXT:    csel w9, w9, w11, gt
+; CHECK-SD-NEXT:    cmp w10, #127
+; CHECK-SD-NEXT:    csel w8, w10, w8, lt
+; CHECK-SD-NEXT:    cmn w8, #128
+; CHECK-SD-NEXT:    csel w8, w8, w11, gt
+; CHECK-SD-NEXT:    fmov s0, w8
+; CHECK-SD-NEXT:    mov v0.s[1], w9
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_v2f64_v2i8:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-GI-NEXT:    adrp x8, .LCPI47_1
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI47_1]
+; CHECK-GI-NEXT:    adrp x8, .LCPI47_0
+; CHECK-GI-NEXT:    cmgt v2.2d, v1.2d, v0.2d
+; CHECK-GI-NEXT:    bif v0.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI47_0]
+; CHECK-GI-NEXT:    cmgt v2.2d, v0.2d, v1.2d
+; CHECK-GI-NEXT:    bif v0.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT:    xtn v0.2s, v0.2d
+; CHECK-GI-NEXT:    ret
     %x = call <2 x i8> @llvm.fptosi.sat.v2f64.v2i8(<2 x double> %f)
     ret <2 x i8> %x
 }
 
 define <2 x i13> @test_signed_v2f64_v2i13(<2 x double> %f) {
-; CHECK-LABEL: test_signed_v2f64_v2i13:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov d1, v0.d[1]
-; CHECK-NEXT:    fcvtzs w10, d0
-; CHECK-NEXT:    mov w8, #4095 // =0xfff
-; CHECK-NEXT:    mov w11, #-4096 // =0xfffff000
-; CHECK-NEXT:    fcvtzs w9, d1
-; CHECK-NEXT:    cmp w9, #4095
-; CHECK-NEXT:    csel w9, w9, w8, lt
-; CHECK-NEXT:    cmn w9, #1, lsl #12 // =4096
-; CHECK-NEXT:    csel w9, w9, w11, gt
-; CHECK-NEXT:    cmp w10, #4095
-; CHECK-NEXT:    csel w8, w10, w8, lt
-; CHECK-NEXT:    cmn w8, #1, lsl #12 // =4096
-; CHECK-NEXT:    csel w8, w8, w11, gt
-; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    mov v0.s[1], w9
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v2f64_v2i13:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov d1, v0.d[1]
+; CHECK-SD-NEXT:    fcvtzs w10, d0
+; CHECK-SD-NEXT:    mov w8, #4095 // =0xfff
+; CHECK-SD-NEXT:    mov w11, #-4096 // =0xfffff000
+; CHECK-SD-NEXT:    fcvtzs w9, d1
+; CHECK-SD-NEXT:    cmp w9, #4095
+; CHECK-SD-NEXT:    csel w9, w9, w8, lt
+; CHECK-SD-NEXT:    cmn w9, #1, lsl #12 // =4096
+; CHECK-SD-NEXT:    csel w9, w9, w11, gt
+; CHECK-SD-NEXT:    cmp w10, #4095
+; CHECK-SD-NEXT:    csel w8, w10, w8, lt
+; CHECK-SD-NEXT:    cmn w8, #1, lsl #12 // =4096
+; CHECK-SD-NEXT:    csel w8, w8, w11, gt
+; CHECK-SD-NEXT:    fmov s0, w8
+; CHECK-SD-NEXT:    mov v0.s[1], w9
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_v2f64_v2i13:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-GI-NEXT:    adrp x8, .LCPI48_1
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI48_1]
+; CHECK-GI-NEXT:    adrp x8, .LCPI48_0
+; CHECK-GI-NEXT:    cmgt v2.2d, v1.2d, v0.2d
+; CHECK-GI-NEXT:    bif v0.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI48_0]
+; CHECK-GI-NEXT:    cmgt v2.2d, v0.2d, v1.2d
+; CHECK-GI-NEXT:    bif v0.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT:    xtn v0.2s, v0.2d
+; CHECK-GI-NEXT:    ret
     %x = call <2 x i13> @llvm.fptosi.sat.v2f64.v2i13(<2 x double> %f)
     ret <2 x i13> %x
 }
 
 define <2 x i16> @test_signed_v2f64_v2i16(<2 x double> %f) {
-; CHECK-LABEL: test_signed_v2f64_v2i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov d1, v0.d[1]
-; CHECK-NEXT:    mov w8, #32767 // =0x7fff
-; CHECK-NEXT:    fcvtzs w10, d0
-; CHECK-NEXT:    mov w11, #-32768 // =0xffff8000
-; CHECK-NEXT:    fcvtzs w9, d1
-; CHECK-NEXT:    cmp w9, w8
-; CHECK-NEXT:    csel w9, w9, w8, lt
-; CHECK-NEXT:    cmn w9, #8, lsl #12 // =32768
-; CHECK-NEXT:    csel w9, w9, w11, gt
-; CHECK-NEXT:    cmp w10, w8
-; CHECK-NEXT:    csel w8, w10, w8, lt
-; CHECK-NEXT:    cmn w8, #8, lsl #12 // =32768
-; CHECK-NEXT:    csel w8, w8, w11, gt
-; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    mov v0.s[1], w9
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v2f64_v2i16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov d1, v0.d[1]
+; CHECK-SD-NEXT:    mov w8, #32767 // =0x7fff
+; CHECK-SD-NEXT:    fcvtzs w10, d0
+; CHECK-SD-NEXT:    mov w11, #-32768 // =0xffff8000
+; CHECK-SD-NEXT:    fcvtzs w9, d1
+; CHECK-SD-NEXT:    cmp w9, w8
+; CHECK-SD-NEXT:    csel w9, w9, w8, lt
+; CHECK-SD-NEXT:    cmn w9, #8, lsl #12 // =32768
+; CHECK-SD-NEXT:    csel w9, w9, w11, gt
+; CHECK-SD-NEXT:    cmp w10, w8
+; CHECK-SD-NEXT:    csel w8, w10, w8, lt
+; CHECK-SD-NEXT:    cmn w8, #8, lsl #12 // =32768
+; CHECK-SD-NEXT:    csel w8, w8, w11, gt
+; CHECK-SD-NEXT:    fmov s0, w8
+; CHECK-SD-NEXT:    mov v0.s[1], w9
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_v2f64_v2i16:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-GI-NEXT:    adrp x8, .LCPI49_1
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI49_1]
+; CHECK-GI-NEXT:    adrp x8, .LCPI49_0
+; CHECK-GI-NEXT:    cmgt v2.2d, v1.2d, v0.2d
+; CHECK-GI-NEXT:    bif v0.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI49_0]
+; CHECK-GI-NEXT:    cmgt v2.2d, v0.2d, v1.2d
+; CHECK-GI-NEXT:    bif v0.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT:    xtn v0.2s, v0.2d
+; CHECK-GI-NEXT:    ret
     %x = call <2 x i16> @llvm.fptosi.sat.v2f64.v2i16(<2 x double> %f)
     ret <2 x i16> %x
 }
 
 define <2 x i19> @test_signed_v2f64_v2i19(<2 x double> %f) {
-; CHECK-LABEL: test_signed_v2f64_v2i19:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov d1, v0.d[1]
-; CHECK-NEXT:    mov w8, #262143 // =0x3ffff
-; CHECK-NEXT:    fcvtzs w10, d0
-; CHECK-NEXT:    mov w11, #-262144 // =0xfffc0000
-; CHECK-NEXT:    fcvtzs w9, d1
-; CHECK-NEXT:    cmp w9, w8
-; CHECK-NEXT:    csel w9, w9, w8, lt
-; CHECK-NEXT:    cmn w9, #64, lsl #12 // =262144
-; CHECK-NEXT:    csel w9, w9, w11, gt
-; CHECK-NEXT:    cmp w10, w8
-; CHECK-NEXT:    csel w8, w10, w8, lt
-; CHECK-NEXT:    cmn w8, #64, lsl #12 // =262144
-; CHECK-NEXT:    csel w8, w8, w11, gt
-; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    mov v0.s[1], w9
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v2f64_v2i19:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov d1, v0.d[1]
+; CHECK-SD-NEXT:    mov w8, #262143 // =0x3ffff
+; CHECK-SD-NEXT:    fcvtzs w10, d0
+; CHECK-SD-NEXT:    mov w11, #-262144 // =0xfffc0000
+; CHECK-SD-NEXT:    fcvtzs w9, d1
+; CHECK-SD-NEXT:    cmp w9, w8
+; CHECK-SD-NEXT:    csel w9, w9, w8, lt
+; CHECK-SD-NEXT:    cmn w9, #64, lsl #12 // =262144
+; CHECK-SD-NEXT:    csel w9, w9, w11, gt
+; CHECK-SD-NEXT:    cmp w10, w8
+; CHECK-SD-NEXT:    csel w8, w10, w8, lt
+; CHECK-SD-NEXT:    cmn w8, #64, lsl #12 // =262144
+; CHECK-SD-NEXT:    csel w8, w8, w11, gt
+; CHECK-SD-NEXT:    fmov s0, w8
+; CHECK-SD-NEXT:    mov v0.s[1], w9
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_v2f64_v2i19:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-GI-NEXT:    adrp x8, .LCPI50_1
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI50_1]
+; CHECK-GI-NEXT:    adrp x8, .LCPI50_0
+; CHECK-GI-NEXT:    cmgt v2.2d, v1.2d, v0.2d
+; CHECK-GI-NEXT:    bif v0.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI50_0]
+; CHECK-GI-NEXT:    cmgt v2.2d, v0.2d, v1.2d
+; CHECK-GI-NEXT:    bif v0.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT:    xtn v0.2s, v0.2d
+; CHECK-GI-NEXT:    ret
     %x = call <2 x i19> @llvm.fptosi.sat.v2f64.v2i19(<2 x double> %f)
     ret <2 x i19> %x
 }
 
 define <2 x i32> @test_signed_v2f64_v2i32_duplicate(<2 x double> %f) {
-; CHECK-LABEL: test_signed_v2f64_v2i32_duplicate:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov d1, v0.d[1]
-; CHECK-NEXT:    fcvtzs w8, d0
-; CHECK-NEXT:    fcvtzs w9, d1
-; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    mov v0.s[1], w9
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v2f64_v2i32_duplicate:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov d1, v0.d[1]
+; CHECK-SD-NEXT:    fcvtzs w8, d0
+; CHECK-SD-NEXT:    fcvtzs w9, d1
+; CHECK-SD-NEXT:    fmov s0, w8
+; CHECK-SD-NEXT:    mov v0.s[1], w9
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_v2f64_v2i32_duplicate:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-GI-NEXT:    adrp x8, .LCPI51_1
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI51_1]
+; CHECK-GI-NEXT:    adrp x8, .LCPI51_0
+; CHECK-GI-NEXT:    cmgt v2.2d, v1.2d, v0.2d
+; CHECK-GI-NEXT:    bif v0.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI51_0]
+; CHECK-GI-NEXT:    cmgt v2.2d, v0.2d, v1.2d
+; CHECK-GI-NEXT:    bif v0.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT:    xtn v0.2s, v0.2d
+; CHECK-GI-NEXT:    ret
     %x = call <2 x i32> @llvm.fptosi.sat.v2f64.v2i32(<2 x double> %f)
     ret <2 x i32> %x
 }
 
 define <2 x i50> @test_signed_v2f64_v2i50(<2 x double> %f) {
-; CHECK-LABEL: test_signed_v2f64_v2i50:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov d1, v0.d[1]
-; CHECK-NEXT:    mov x8, #562949953421311 // =0x1ffffffffffff
-; CHECK-NEXT:    fcvtzs x10, d0
-; CHECK-NEXT:    mov x11, #-562949953421312 // =0xfffe000000000000
-; CHECK-NEXT:    fcvtzs x9, d1
-; CHECK-NEXT:    cmp x9, x8
-; CHECK-NEXT:    csel x9, x9, x8, lt
-; CHECK-NEXT:    cmp x9, x11
-; CHECK-NEXT:    csel x9, x9, x11, gt
-; CHECK-NEXT:    cmp x10, x8
-; CHECK-NEXT:    csel x8, x10, x8, lt
-; CHECK-NEXT:    cmp x8, x11
-; CHECK-NEXT:    csel x8, x8, x11, gt
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    mov v0.d[1], x9
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v2f64_v2i50:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov d1, v0.d[1]
+; CHECK-SD-NEXT:    mov x8, #562949953421311 // =0x1ffffffffffff
+; CHECK-SD-NEXT:    fcvtzs x10, d0
+; CHECK-SD-NEXT:    mov x11, #-562949953421312 // =0xfffe000000000000
+; CHECK-SD-NEXT:    fcvtzs x9, d1
+; CHECK-SD-NEXT:    cmp x9, x8
+; CHECK-SD-NEXT:    csel x9, x9, x8, lt
+; CHECK-SD-NEXT:    cmp x9, x11
+; CHECK-SD-NEXT:    csel x9, x9, x11, gt
+; CHECK-SD-NEXT:    cmp x10, x8
+; CHECK-SD-NEXT:    csel x8, x10, x8, lt
+; CHECK-SD-NEXT:    cmp x8, x11
+; CHECK-SD-NEXT:    csel x8, x8, x11, gt
+; CHECK-SD-NEXT:    fmov d0, x8
+; CHECK-SD-NEXT:    mov v0.d[1], x9
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_v2f64_v2i50:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-GI-NEXT:    adrp x8, .LCPI52_1
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI52_1]
+; CHECK-GI-NEXT:    adrp x8, .LCPI52_0
+; CHECK-GI-NEXT:    cmgt v2.2d, v1.2d, v0.2d
+; CHECK-GI-NEXT:    bif v0.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI52_0]
+; CHECK-GI-NEXT:    cmgt v2.2d, v0.2d, v1.2d
+; CHECK-GI-NEXT:    bif v0.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT:    ret
     %x = call <2 x i50> @llvm.fptosi.sat.v2f64.v2i50(<2 x double> %f)
     ret <2 x i50> %x
 }
@@ -1432,125 +2679,241 @@ define <2 x i64> @test_signed_v2f64_v2i64(<2 x double> %f) {
 }
 
 define <2 x i100> @test_signed_v2f64_v2i100(<2 x double> %f) {
-; CHECK-LABEL: test_signed_v2f64_v2i100:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #80
-; CHECK-NEXT:    str d10, [sp, #16] // 8-byte Folded Spill
-; CHECK-NEXT:    stp d9, d8, [sp, #24] // 16-byte Folded Spill
-; CHECK-NEXT:    str x30, [sp, #40] // 8-byte Folded Spill
-; CHECK-NEXT:    stp x22, x21, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x20, x19, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 80
-; CHECK-NEXT:    .cfi_offset w19, -8
-; CHECK-NEXT:    .cfi_offset w20, -16
-; CHECK-NEXT:    .cfi_offset w21, -24
-; CHECK-NEXT:    .cfi_offset w22, -32
-; CHECK-NEXT:    .cfi_offset w30, -40
-; CHECK-NEXT:    .cfi_offset b8, -48
-; CHECK-NEXT:    .cfi_offset b9, -56
-; CHECK-NEXT:    .cfi_offset b10, -64
-; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT:    bl __fixdfti
-; CHECK-NEXT:    mov x8, #-4170333254945079296 // =0xc620000000000000
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    mov x21, #-34359738368 // =0xfffffff800000000
-; CHECK-NEXT:    fmov d9, x8
-; CHECK-NEXT:    mov x8, #5053038781909696511 // =0x461fffffffffffff
-; CHECK-NEXT:    mov x22, #34359738367 // =0x7ffffffff
-; CHECK-NEXT:    fmov d10, x8
-; CHECK-NEXT:    mov d8, v0.d[1]
-; CHECK-NEXT:    fcmp d0, d9
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, x21, x1, lt
-; CHECK-NEXT:    fcmp d0, d10
-; CHECK-NEXT:    csel x9, x22, x9, gt
-; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    fcmp d0, d0
-; CHECK-NEXT:    fmov d0, d8
-; CHECK-NEXT:    csel x19, xzr, x8, vs
-; CHECK-NEXT:    csel x20, xzr, x9, vs
-; CHECK-NEXT:    bl __fixdfti
-; CHECK-NEXT:    fcmp d8, d9
-; CHECK-NEXT:    ldr x30, [sp, #40] // 8-byte Folded Reload
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, x21, x1, lt
-; CHECK-NEXT:    fcmp d8, d10
-; CHECK-NEXT:    mov x0, x19
-; CHECK-NEXT:    mov x1, x20
-; CHECK-NEXT:    ldr d10, [sp, #16] // 8-byte Folded Reload
-; CHECK-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x9, x22, x9, gt
-; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    fcmp d8, d8
-; CHECK-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp d9, d8, [sp, #24] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x2, xzr, x8, vs
-; CHECK-NEXT:    csel x3, xzr, x9, vs
-; CHECK-NEXT:    add sp, sp, #80
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v2f64_v2i100:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sub sp, sp, #80
+; CHECK-SD-NEXT:    str d10, [sp, #16] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    stp d9, d8, [sp, #24] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    str x30, [sp, #40] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    stp x22, x21, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 80
+; CHECK-SD-NEXT:    .cfi_offset w19, -8
+; CHECK-SD-NEXT:    .cfi_offset w20, -16
+; CHECK-SD-NEXT:    .cfi_offset w21, -24
+; CHECK-SD-NEXT:    .cfi_offset w22, -32
+; CHECK-SD-NEXT:    .cfi_offset w30, -40
+; CHECK-SD-NEXT:    .cfi_offset b8, -48
+; CHECK-SD-NEXT:    .cfi_offset b9, -56
+; CHECK-SD-NEXT:    .cfi_offset b10, -64
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    bl __fixdfti
+; CHECK-SD-NEXT:    mov x8, #-4170333254945079296 // =0xc620000000000000
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov x21, #-34359738368 // =0xfffffff800000000
+; CHECK-SD-NEXT:    fmov d9, x8
+; CHECK-SD-NEXT:    mov x8, #5053038781909696511 // =0x461fffffffffffff
+; CHECK-SD-NEXT:    mov x22, #34359738367 // =0x7ffffffff
+; CHECK-SD-NEXT:    fmov d10, x8
+; CHECK-SD-NEXT:    mov d8, v0.d[1]
+; CHECK-SD-NEXT:    fcmp d0, d9
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, x21, x1, lt
+; CHECK-SD-NEXT:    fcmp d0, d10
+; CHECK-SD-NEXT:    csel x9, x22, x9, gt
+; CHECK-SD-NEXT:    csinv x8, x8, xzr, le
+; CHECK-SD-NEXT:    fcmp d0, d0
+; CHECK-SD-NEXT:    fmov d0, d8
+; CHECK-SD-NEXT:    csel x19, xzr, x8, vs
+; CHECK-SD-NEXT:    csel x20, xzr, x9, vs
+; CHECK-SD-NEXT:    bl __fixdfti
+; CHECK-SD-NEXT:    fcmp d8, d9
+; CHECK-SD-NEXT:    ldr x30, [sp, #40] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, x21, x1, lt
+; CHECK-SD-NEXT:    fcmp d8, d10
+; CHECK-SD-NEXT:    mov x0, x19
+; CHECK-SD-NEXT:    mov x1, x20
+; CHECK-SD-NEXT:    ldr d10, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csel x9, x22, x9, gt
+; CHECK-SD-NEXT:    csinv x8, x8, xzr, le
+; CHECK-SD-NEXT:    fcmp d8, d8
+; CHECK-SD-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp d9, d8, [sp, #24] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csel x2, xzr, x8, vs
+; CHECK-SD-NEXT:    csel x3, xzr, x9, vs
+; CHECK-SD-NEXT:    add sp, sp, #80
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_v2f64_v2i100:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    sub sp, sp, #80
+; CHECK-GI-NEXT:    str d10, [sp, #16] // 8-byte Folded Spill
+; CHECK-GI-NEXT:    stp d9, d8, [sp, #24] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    str x30, [sp, #40] // 8-byte Folded Spill
+; CHECK-GI-NEXT:    stp x22, x21, [sp, #48] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 80
+; CHECK-GI-NEXT:    .cfi_offset w19, -8
+; CHECK-GI-NEXT:    .cfi_offset w20, -16
+; CHECK-GI-NEXT:    .cfi_offset w21, -24
+; CHECK-GI-NEXT:    .cfi_offset w22, -32
+; CHECK-GI-NEXT:    .cfi_offset w30, -40
+; CHECK-GI-NEXT:    .cfi_offset b8, -48
+; CHECK-GI-NEXT:    .cfi_offset b9, -56
+; CHECK-GI-NEXT:    .cfi_offset b10, -64
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    mov d8, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    bl __fixdfti
+; CHECK-GI-NEXT:    mov x8, #-4170333254945079296 // =0xc620000000000000
+; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov x21, #34359738368 // =0x800000000
+; CHECK-GI-NEXT:    fmov d9, x8
+; CHECK-GI-NEXT:    mov x8, #5053038781909696511 // =0x461fffffffffffff
+; CHECK-GI-NEXT:    mov x22, #34359738367 // =0x7ffffffff
+; CHECK-GI-NEXT:    fmov d10, x8
+; CHECK-GI-NEXT:    fcmp d0, d9
+; CHECK-GI-NEXT:    csel x8, xzr, x0, lt
+; CHECK-GI-NEXT:    csel x9, x21, x1, lt
+; CHECK-GI-NEXT:    fcmp d0, d10
+; CHECK-GI-NEXT:    csinv x8, x8, xzr, le
+; CHECK-GI-NEXT:    csel x9, x22, x9, gt
+; CHECK-GI-NEXT:    fcmp d0, d0
+; CHECK-GI-NEXT:    fmov d0, d8
+; CHECK-GI-NEXT:    csel x19, xzr, x8, vs
+; CHECK-GI-NEXT:    csel x20, xzr, x9, vs
+; CHECK-GI-NEXT:    bl __fixdfti
+; CHECK-GI-NEXT:    fcmp d8, d9
+; CHECK-GI-NEXT:    ldr x30, [sp, #40] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    csel x8, xzr, x0, lt
+; CHECK-GI-NEXT:    csel x9, x21, x1, lt
+; CHECK-GI-NEXT:    fcmp d8, d10
+; CHECK-GI-NEXT:    mov x0, x19
+; CHECK-GI-NEXT:    mov x1, x20
+; CHECK-GI-NEXT:    ldr d10, [sp, #16] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    csinv x8, x8, xzr, le
+; CHECK-GI-NEXT:    csel x9, x22, x9, gt
+; CHECK-GI-NEXT:    fcmp d8, d8
+; CHECK-GI-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #24] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    csel x2, xzr, x8, vs
+; CHECK-GI-NEXT:    csel x3, xzr, x9, vs
+; CHECK-GI-NEXT:    add sp, sp, #80
+; CHECK-GI-NEXT:    ret
     %x = call <2 x i100> @llvm.fptosi.sat.v2f64.v2i100(<2 x double> %f)
     ret <2 x i100> %x
 }
 
 define <2 x i128> @test_signed_v2f64_v2i128(<2 x double> %f) {
-; CHECK-LABEL: test_signed_v2f64_v2i128:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #80
-; CHECK-NEXT:    str d10, [sp, #16] // 8-byte Folded Spill
-; CHECK-NEXT:    stp d9, d8, [sp, #24] // 16-byte Folded Spill
-; CHECK-NEXT:    str x30, [sp, #40] // 8-byte Folded Spill
-; CHECK-NEXT:    stp x22, x21, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x20, x19, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 80
-; CHECK-NEXT:    .cfi_offset w19, -8
-; CHECK-NEXT:    .cfi_offset w20, -16
-; CHECK-NEXT:    .cfi_offset w21, -24
-; CHECK-NEXT:    .cfi_offset w22, -32
-; CHECK-NEXT:    .cfi_offset w30, -40
-; CHECK-NEXT:    .cfi_offset b8, -48
-; CHECK-NEXT:    .cfi_offset b9, -56
-; CHECK-NEXT:    .cfi_offset b10, -64
-; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT:    bl __fixdfti
-; CHECK-NEXT:    mov x8, #-4044232465378705408 // =0xc7e0000000000000
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    mov x21, #-9223372036854775808 // =0x8000000000000000
-; CHECK-NEXT:    fmov d9, x8
-; CHECK-NEXT:    mov x8, #5179139571476070399 // =0x47dfffffffffffff
-; CHECK-NEXT:    mov x22, #9223372036854775807 // =0x7fffffffffffffff
-; CHECK-NEXT:    fmov d10, x8
-; CHECK-NEXT:    mov d8, v0.d[1]
-; CHECK-NEXT:    fcmp d0, d9
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, x21, x1, lt
-; CHECK-NEXT:    fcmp d0, d10
-; CHECK-NEXT:    csel x9, x22, x9, gt
-; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    fcmp d0, d0
-; CHECK-NEXT:    fmov d0, d8
-; CHECK-NEXT:    csel x19, xzr, x8, vs
-; CHECK-NEXT:    csel x20, xzr, x9, vs
-; CHECK-NEXT:    bl __fixdfti
-; CHECK-NEXT:    fcmp d8, d9
-; CHECK-NEXT:    ldr x30, [sp, #40] // 8-byte Folded Reload
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, x21, x1, lt
-; CHECK-NEXT:    fcmp d8, d10
-; CHECK-NEXT:    mov x0, x19
-; CHECK-NEXT:    mov x1, x20
-; CHECK-NEXT:    ldr d10, [sp, #16] // 8-byte Folded Reload
-; CHECK-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x9, x22, x9, gt
-; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    fcmp d8, d8
-; CHECK-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp d9, d8, [sp, #24] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x2, xzr, x8, vs
-; CHECK-NEXT:    csel x3, xzr, x9, vs
-; CHECK-NEXT:    add sp, sp, #80
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v2f64_v2i128:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sub sp, sp, #80
+; CHECK-SD-NEXT:    str d10, [sp, #16] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    stp d9, d8, [sp, #24] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    str x30, [sp, #40] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    stp x22, x21, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 80
+; CHECK-SD-NEXT:    .cfi_offset w19, -8
+; CHECK-SD-NEXT:    .cfi_offset w20, -16
+; CHECK-SD-NEXT:    .cfi_offset w21, -24
+; CHECK-SD-NEXT:    .cfi_offset w22, -32
+; CHECK-SD-NEXT:    .cfi_offset w30, -40
+; CHECK-SD-NEXT:    .cfi_offset b8, -48
+; CHECK-SD-NEXT:    .cfi_offset b9, -56
+; CHECK-SD-NEXT:    .cfi_offset b10, -64
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    bl __fixdfti
+; CHECK-SD-NEXT:    mov x8, #-4044232465378705408 // =0xc7e0000000000000
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov x21, #-9223372036854775808 // =0x8000000000000000
+; CHECK-SD-NEXT:    fmov d9, x8
+; CHECK-SD-NEXT:    mov x8, #5179139571476070399 // =0x47dfffffffffffff
+; CHECK-SD-NEXT:    mov x22, #9223372036854775807 // =0x7fffffffffffffff
+; CHECK-SD-NEXT:    fmov d10, x8
+; CHECK-SD-NEXT:    mov d8, v0.d[1]
+; CHECK-SD-NEXT:    fcmp d0, d9
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, x21, x1, lt
+; CHECK-SD-NEXT:    fcmp d0, d10
+; CHECK-SD-NEXT:    csel x9, x22, x9, gt
+; CHECK-SD-NEXT:    csinv x8, x8, xzr, le
+; CHECK-SD-NEXT:    fcmp d0, d0
+; CHECK-SD-NEXT:    fmov d0, d8
+; CHECK-SD-NEXT:    csel x19, xzr, x8, vs
+; CHECK-SD-NEXT:    csel x20, xzr, x9, vs
+; CHECK-SD-NEXT:    bl __fixdfti
+; CHECK-SD-NEXT:    fcmp d8, d9
+; CHECK-SD-NEXT:    ldr x30, [sp, #40] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, x21, x1, lt
+; CHECK-SD-NEXT:    fcmp d8, d10
+; CHECK-SD-NEXT:    mov x0, x19
+; CHECK-SD-NEXT:    mov x1, x20
+; CHECK-SD-NEXT:    ldr d10, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csel x9, x22, x9, gt
+; CHECK-SD-NEXT:    csinv x8, x8, xzr, le
+; CHECK-SD-NEXT:    fcmp d8, d8
+; CHECK-SD-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp d9, d8, [sp, #24] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csel x2, xzr, x8, vs
+; CHECK-SD-NEXT:    csel x3, xzr, x9, vs
+; CHECK-SD-NEXT:    add sp, sp, #80
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_v2f64_v2i128:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    sub sp, sp, #80
+; CHECK-GI-NEXT:    str d10, [sp, #16] // 8-byte Folded Spill
+; CHECK-GI-NEXT:    stp d9, d8, [sp, #24] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    str x30, [sp, #40] // 8-byte Folded Spill
+; CHECK-GI-NEXT:    stp x22, x21, [sp, #48] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 80
+; CHECK-GI-NEXT:    .cfi_offset w19, -8
+; CHECK-GI-NEXT:    .cfi_offset w20, -16
+; CHECK-GI-NEXT:    .cfi_offset w21, -24
+; CHECK-GI-NEXT:    .cfi_offset w22, -32
+; CHECK-GI-NEXT:    .cfi_offset w30, -40
+; CHECK-GI-NEXT:    .cfi_offset b8, -48
+; CHECK-GI-NEXT:    .cfi_offset b9, -56
+; CHECK-GI-NEXT:    .cfi_offset b10, -64
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    mov d8, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    bl __fixdfti
+; CHECK-GI-NEXT:    mov x8, #-4044232465378705408 // =0xc7e0000000000000
+; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov x21, #-9223372036854775808 // =0x8000000000000000
+; CHECK-GI-NEXT:    fmov d9, x8
+; CHECK-GI-NEXT:    mov x8, #5179139571476070399 // =0x47dfffffffffffff
+; CHECK-GI-NEXT:    mov x22, #9223372036854775807 // =0x7fffffffffffffff
+; CHECK-GI-NEXT:    fmov d10, x8
+; CHECK-GI-NEXT:    fcmp d0, d9
+; CHECK-GI-NEXT:    csel x8, xzr, x0, lt
+; CHECK-GI-NEXT:    csel x9, x21, x1, lt
+; CHECK-GI-NEXT:    fcmp d0, d10
+; CHECK-GI-NEXT:    csinv x8, x8, xzr, le
+; CHECK-GI-NEXT:    csel x9, x22, x9, gt
+; CHECK-GI-NEXT:    fcmp d0, d0
+; CHECK-GI-NEXT:    fmov d0, d8
+; CHECK-GI-NEXT:    csel x19, xzr, x8, vs
+; CHECK-GI-NEXT:    csel x20, xzr, x9, vs
+; CHECK-GI-NEXT:    bl __fixdfti
+; CHECK-GI-NEXT:    fcmp d8, d9
+; CHECK-GI-NEXT:    ldr x30, [sp, #40] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    csel x8, xzr, x0, lt
+; CHECK-GI-NEXT:    csel x9, x21, x1, lt
+; CHECK-GI-NEXT:    fcmp d8, d10
+; CHECK-GI-NEXT:    mov x0, x19
+; CHECK-GI-NEXT:    mov x1, x20
+; CHECK-GI-NEXT:    ldr d10, [sp, #16] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    csinv x8, x8, xzr, le
+; CHECK-GI-NEXT:    csel x9, x22, x9, gt
+; CHECK-GI-NEXT:    fcmp d8, d8
+; CHECK-GI-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #24] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    csel x2, xzr, x8, vs
+; CHECK-GI-NEXT:    csel x3, xzr, x9, vs
+; CHECK-GI-NEXT:    add sp, sp, #80
+; CHECK-GI-NEXT:    ret
     %x = call <2 x i128> @llvm.fptosi.sat.v2f64.v2i128(<2 x double> %f)
     ret <2 x i128> %x
 }
@@ -1570,89 +2933,165 @@ declare <4 x i100> @llvm.fptosi.sat.v4f16.v4i100(<4 x half>)
 declare <4 x i128> @llvm.fptosi.sat.v4f16.v4i128(<4 x half>)
 
 define <4 x i1> @test_signed_v4f16_v4i1(<4 x half> %f) {
-; CHECK-CVT-LABEL: test_signed_v4f16_v4i1:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT:    movi v1.2d, #0000000000000000
-; CHECK-CVT-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-CVT-NEXT:    smin v0.4s, v0.4s, v1.4s
-; CHECK-CVT-NEXT:    movi v1.2d, #0xffffffffffffffff
-; CHECK-CVT-NEXT:    smax v0.4s, v0.4s, v1.4s
-; CHECK-CVT-NEXT:    xtn v0.4h, v0.4s
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_signed_v4f16_v4i1:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    movi v1.2d, #0000000000000000
-; CHECK-FP16-NEXT:    fcvtzs v0.4h, v0.4h
-; CHECK-FP16-NEXT:    movi v2.2d, #0xffffffffffffffff
-; CHECK-FP16-NEXT:    smin v0.4h, v0.4h, v1.4h
-; CHECK-FP16-NEXT:    smax v0.4h, v0.4h, v2.4h
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_signed_v4f16_v4i1:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-SD-CVT-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-SD-CVT-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-SD-CVT-NEXT:    smin v0.4s, v0.4s, v1.4s
+; CHECK-SD-CVT-NEXT:    movi v1.2d, #0xffffffffffffffff
+; CHECK-SD-CVT-NEXT:    smax v0.4s, v0.4s, v1.4s
+; CHECK-SD-CVT-NEXT:    xtn v0.4h, v0.4s
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_signed_v4f16_v4i1:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-SD-FP16-NEXT:    fcvtzs v0.4h, v0.4h
+; CHECK-SD-FP16-NEXT:    movi v2.2d, #0xffffffffffffffff
+; CHECK-SD-FP16-NEXT:    smin v0.4h, v0.4h, v1.4h
+; CHECK-SD-FP16-NEXT:    smax v0.4h, v0.4h, v2.4h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_v4f16_v4i1:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-GI-CVT-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-GI-CVT-NEXT:    movi v2.2d, #0xffffffffffffffff
+; CHECK-GI-CVT-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-GI-CVT-NEXT:    smin v0.4s, v0.4s, v1.4s
+; CHECK-GI-CVT-NEXT:    smax v0.4s, v0.4s, v2.4s
+; CHECK-GI-CVT-NEXT:    xtn v0.4h, v0.4s
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_signed_v4f16_v4i1:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-GI-FP16-NEXT:    fcvtzs v0.4h, v0.4h
+; CHECK-GI-FP16-NEXT:    movi d2, #0xffffffffffffffff
+; CHECK-GI-FP16-NEXT:    smin v0.4h, v0.4h, v1.4h
+; CHECK-GI-FP16-NEXT:    smax v0.4h, v0.4h, v2.4h
+; CHECK-GI-FP16-NEXT:    ret
     %x = call <4 x i1> @llvm.fptosi.sat.v4f16.v4i1(<4 x half> %f)
     ret <4 x i1> %x
 }
 
 define <4 x i8> @test_signed_v4f16_v4i8(<4 x half> %f) {
-; CHECK-CVT-LABEL: test_signed_v4f16_v4i8:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT:    movi v1.4s, #127
-; CHECK-CVT-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-CVT-NEXT:    smin v0.4s, v0.4s, v1.4s
-; CHECK-CVT-NEXT:    mvni v1.4s, #127
-; CHECK-CVT-NEXT:    smax v0.4s, v0.4s, v1.4s
-; CHECK-CVT-NEXT:    xtn v0.4h, v0.4s
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_signed_v4f16_v4i8:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    movi v1.4h, #127
-; CHECK-FP16-NEXT:    fcvtzs v0.4h, v0.4h
-; CHECK-FP16-NEXT:    smin v0.4h, v0.4h, v1.4h
-; CHECK-FP16-NEXT:    mvni v1.4h, #127
-; CHECK-FP16-NEXT:    smax v0.4h, v0.4h, v1.4h
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_signed_v4f16_v4i8:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-SD-CVT-NEXT:    movi v1.4s, #127
+; CHECK-SD-CVT-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-SD-CVT-NEXT:    smin v0.4s, v0.4s, v1.4s
+; CHECK-SD-CVT-NEXT:    mvni v1.4s, #127
+; CHECK-SD-CVT-NEXT:    smax v0.4s, v0.4s, v1.4s
+; CHECK-SD-CVT-NEXT:    xtn v0.4h, v0.4s
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_signed_v4f16_v4i8:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    movi v1.4h, #127
+; CHECK-SD-FP16-NEXT:    fcvtzs v0.4h, v0.4h
+; CHECK-SD-FP16-NEXT:    smin v0.4h, v0.4h, v1.4h
+; CHECK-SD-FP16-NEXT:    mvni v1.4h, #127
+; CHECK-SD-FP16-NEXT:    smax v0.4h, v0.4h, v1.4h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_v4f16_v4i8:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-GI-CVT-NEXT:    movi v1.4s, #127
+; CHECK-GI-CVT-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-GI-CVT-NEXT:    smin v0.4s, v0.4s, v1.4s
+; CHECK-GI-CVT-NEXT:    mvni v1.4s, #127
+; CHECK-GI-CVT-NEXT:    smax v0.4s, v0.4s, v1.4s
+; CHECK-GI-CVT-NEXT:    xtn v0.4h, v0.4s
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_signed_v4f16_v4i8:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    movi v1.4h, #127
+; CHECK-GI-FP16-NEXT:    fcvtzs v0.4h, v0.4h
+; CHECK-GI-FP16-NEXT:    mvni v2.4h, #127
+; CHECK-GI-FP16-NEXT:    smin v0.4h, v0.4h, v1.4h
+; CHECK-GI-FP16-NEXT:    smax v0.4h, v0.4h, v2.4h
+; CHECK-GI-FP16-NEXT:    ret
     %x = call <4 x i8> @llvm.fptosi.sat.v4f16.v4i8(<4 x half> %f)
     ret <4 x i8> %x
 }
 
 define <4 x i13> @test_signed_v4f16_v4i13(<4 x half> %f) {
-; CHECK-CVT-LABEL: test_signed_v4f16_v4i13:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT:    movi v1.4s, #15, msl #8
-; CHECK-CVT-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-CVT-NEXT:    smin v0.4s, v0.4s, v1.4s
-; CHECK-CVT-NEXT:    mvni v1.4s, #15, msl #8
-; CHECK-CVT-NEXT:    smax v0.4s, v0.4s, v1.4s
-; CHECK-CVT-NEXT:    xtn v0.4h, v0.4s
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_signed_v4f16_v4i13:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzs v0.4h, v0.4h
-; CHECK-FP16-NEXT:    mvni v1.4h, #240, lsl #8
-; CHECK-FP16-NEXT:    movi v2.4h, #240, lsl #8
-; CHECK-FP16-NEXT:    smin v0.4h, v0.4h, v1.4h
-; CHECK-FP16-NEXT:    smax v0.4h, v0.4h, v2.4h
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_signed_v4f16_v4i13:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-SD-CVT-NEXT:    movi v1.4s, #15, msl #8
+; CHECK-SD-CVT-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-SD-CVT-NEXT:    smin v0.4s, v0.4s, v1.4s
+; CHECK-SD-CVT-NEXT:    mvni v1.4s, #15, msl #8
+; CHECK-SD-CVT-NEXT:    smax v0.4s, v0.4s, v1.4s
+; CHECK-SD-CVT-NEXT:    xtn v0.4h, v0.4s
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_signed_v4f16_v4i13:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzs v0.4h, v0.4h
+; CHECK-SD-FP16-NEXT:    mvni v1.4h, #240, lsl #8
+; CHECK-SD-FP16-NEXT:    movi v2.4h, #240, lsl #8
+; CHECK-SD-FP16-NEXT:    smin v0.4h, v0.4h, v1.4h
+; CHECK-SD-FP16-NEXT:    smax v0.4h, v0.4h, v2.4h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_v4f16_v4i13:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-GI-CVT-NEXT:    movi v1.4s, #15, msl #8
+; CHECK-GI-CVT-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-GI-CVT-NEXT:    smin v0.4s, v0.4s, v1.4s
+; CHECK-GI-CVT-NEXT:    mvni v1.4s, #15, msl #8
+; CHECK-GI-CVT-NEXT:    smax v0.4s, v0.4s, v1.4s
+; CHECK-GI-CVT-NEXT:    xtn v0.4h, v0.4s
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_signed_v4f16_v4i13:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzs v0.4h, v0.4h
+; CHECK-GI-FP16-NEXT:    mvni v1.4h, #240, lsl #8
+; CHECK-GI-FP16-NEXT:    movi v2.4h, #240, lsl #8
+; CHECK-GI-FP16-NEXT:    smin v0.4h, v0.4h, v1.4h
+; CHECK-GI-FP16-NEXT:    smax v0.4h, v0.4h, v2.4h
+; CHECK-GI-FP16-NEXT:    ret
     %x = call <4 x i13> @llvm.fptosi.sat.v4f16.v4i13(<4 x half> %f)
     ret <4 x i13> %x
 }
 
 define <4 x i16> @test_signed_v4f16_v4i16(<4 x half> %f) {
-; CHECK-CVT-LABEL: test_signed_v4f16_v4i16:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-CVT-NEXT:    sqxtn v0.4h, v0.4s
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_signed_v4f16_v4i16:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzs v0.4h, v0.4h
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_signed_v4f16_v4i16:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-SD-CVT-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-SD-CVT-NEXT:    sqxtn v0.4h, v0.4s
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_signed_v4f16_v4i16:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzs v0.4h, v0.4h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_v4f16_v4i16:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-GI-CVT-NEXT:    movi v1.4s, #127, msl #8
+; CHECK-GI-CVT-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-GI-CVT-NEXT:    smin v0.4s, v0.4s, v1.4s
+; CHECK-GI-CVT-NEXT:    mvni v1.4s, #127, msl #8
+; CHECK-GI-CVT-NEXT:    smax v0.4s, v0.4s, v1.4s
+; CHECK-GI-CVT-NEXT:    xtn v0.4h, v0.4s
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_signed_v4f16_v4i16:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzs v0.4h, v0.4h
+; CHECK-GI-FP16-NEXT:    ret
     %x = call <4 x i16> @llvm.fptosi.sat.v4f16.v4i16(<4 x half> %f)
     ret <4 x i16> %x
 }
@@ -1682,317 +3121,478 @@ define <4 x i32> @test_signed_v4f16_v4i32_duplicate(<4 x half> %f) {
 }
 
 define <4 x i50> @test_signed_v4f16_v4i50(<4 x half> %f) {
-; CHECK-CVT-LABEL: test_signed_v4f16_v4i50:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-CVT-NEXT:    mov h1, v0.h[1]
-; CHECK-CVT-NEXT:    fcvt s2, h0
-; CHECK-CVT-NEXT:    mov x8, #562949953421311 // =0x1ffffffffffff
-; CHECK-CVT-NEXT:    mov h3, v0.h[2]
-; CHECK-CVT-NEXT:    mov h0, v0.h[3]
-; CHECK-CVT-NEXT:    mov x11, #-562949953421312 // =0xfffe000000000000
-; CHECK-CVT-NEXT:    fcvt s1, h1
-; CHECK-CVT-NEXT:    fcvtzs x9, s2
-; CHECK-CVT-NEXT:    fcvt s2, h3
-; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    fcvtzs x10, s1
-; CHECK-CVT-NEXT:    cmp x9, x8
-; CHECK-CVT-NEXT:    csel x9, x9, x8, lt
-; CHECK-CVT-NEXT:    fcvtzs x12, s2
-; CHECK-CVT-NEXT:    cmp x9, x11
-; CHECK-CVT-NEXT:    csel x0, x9, x11, gt
-; CHECK-CVT-NEXT:    cmp x10, x8
-; CHECK-CVT-NEXT:    csel x9, x10, x8, lt
-; CHECK-CVT-NEXT:    fcvtzs x10, s0
-; CHECK-CVT-NEXT:    cmp x9, x11
-; CHECK-CVT-NEXT:    csel x1, x9, x11, gt
-; CHECK-CVT-NEXT:    cmp x12, x8
-; CHECK-CVT-NEXT:    csel x9, x12, x8, lt
-; CHECK-CVT-NEXT:    cmp x9, x11
-; CHECK-CVT-NEXT:    csel x2, x9, x11, gt
-; CHECK-CVT-NEXT:    cmp x10, x8
-; CHECK-CVT-NEXT:    csel x8, x10, x8, lt
-; CHECK-CVT-NEXT:    cmp x8, x11
-; CHECK-CVT-NEXT:    csel x3, x8, x11, gt
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_signed_v4f16_v4i50:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-FP16-NEXT:    mov h1, v0.h[1]
-; CHECK-FP16-NEXT:    fcvtzs x9, h0
-; CHECK-FP16-NEXT:    mov x8, #562949953421311 // =0x1ffffffffffff
-; CHECK-FP16-NEXT:    mov h2, v0.h[2]
-; CHECK-FP16-NEXT:    mov x11, #-562949953421312 // =0xfffe000000000000
-; CHECK-FP16-NEXT:    mov h0, v0.h[3]
-; CHECK-FP16-NEXT:    fcvtzs x10, h1
-; CHECK-FP16-NEXT:    cmp x9, x8
-; CHECK-FP16-NEXT:    csel x9, x9, x8, lt
-; CHECK-FP16-NEXT:    fcvtzs x12, h2
-; CHECK-FP16-NEXT:    cmp x9, x11
-; CHECK-FP16-NEXT:    csel x0, x9, x11, gt
-; CHECK-FP16-NEXT:    cmp x10, x8
-; CHECK-FP16-NEXT:    csel x9, x10, x8, lt
-; CHECK-FP16-NEXT:    fcvtzs x10, h0
-; CHECK-FP16-NEXT:    cmp x9, x11
-; CHECK-FP16-NEXT:    csel x1, x9, x11, gt
-; CHECK-FP16-NEXT:    cmp x12, x8
-; CHECK-FP16-NEXT:    csel x9, x12, x8, lt
-; CHECK-FP16-NEXT:    cmp x9, x11
-; CHECK-FP16-NEXT:    csel x2, x9, x11, gt
-; CHECK-FP16-NEXT:    cmp x10, x8
-; CHECK-FP16-NEXT:    csel x8, x10, x8, lt
-; CHECK-FP16-NEXT:    cmp x8, x11
-; CHECK-FP16-NEXT:    csel x3, x8, x11, gt
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_signed_v4f16_v4i50:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-CVT-NEXT:    mov h1, v0.h[1]
+; CHECK-SD-CVT-NEXT:    fcvt s2, h0
+; CHECK-SD-CVT-NEXT:    mov x8, #562949953421311 // =0x1ffffffffffff
+; CHECK-SD-CVT-NEXT:    mov h3, v0.h[2]
+; CHECK-SD-CVT-NEXT:    mov h0, v0.h[3]
+; CHECK-SD-CVT-NEXT:    mov x11, #-562949953421312 // =0xfffe000000000000
+; CHECK-SD-CVT-NEXT:    fcvt s1, h1
+; CHECK-SD-CVT-NEXT:    fcvtzs x9, s2
+; CHECK-SD-CVT-NEXT:    fcvt s2, h3
+; CHECK-SD-CVT-NEXT:    fcvt s0, h0
+; CHECK-SD-CVT-NEXT:    fcvtzs x10, s1
+; CHECK-SD-CVT-NEXT:    cmp x9, x8
+; CHECK-SD-CVT-NEXT:    csel x9, x9, x8, lt
+; CHECK-SD-CVT-NEXT:    fcvtzs x12, s2
+; CHECK-SD-CVT-NEXT:    cmp x9, x11
+; CHECK-SD-CVT-NEXT:    csel x0, x9, x11, gt
+; CHECK-SD-CVT-NEXT:    cmp x10, x8
+; CHECK-SD-CVT-NEXT:    csel x9, x10, x8, lt
+; CHECK-SD-CVT-NEXT:    fcvtzs x10, s0
+; CHECK-SD-CVT-NEXT:    cmp x9, x11
+; CHECK-SD-CVT-NEXT:    csel x1, x9, x11, gt
+; CHECK-SD-CVT-NEXT:    cmp x12, x8
+; CHECK-SD-CVT-NEXT:    csel x9, x12, x8, lt
+; CHECK-SD-CVT-NEXT:    cmp x9, x11
+; CHECK-SD-CVT-NEXT:    csel x2, x9, x11, gt
+; CHECK-SD-CVT-NEXT:    cmp x10, x8
+; CHECK-SD-CVT-NEXT:    csel x8, x10, x8, lt
+; CHECK-SD-CVT-NEXT:    cmp x8, x11
+; CHECK-SD-CVT-NEXT:    csel x3, x8, x11, gt
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_signed_v4f16_v4i50:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-FP16-NEXT:    mov h1, v0.h[1]
+; CHECK-SD-FP16-NEXT:    fcvtzs x9, h0
+; CHECK-SD-FP16-NEXT:    mov x8, #562949953421311 // =0x1ffffffffffff
+; CHECK-SD-FP16-NEXT:    mov h2, v0.h[2]
+; CHECK-SD-FP16-NEXT:    mov x11, #-562949953421312 // =0xfffe000000000000
+; CHECK-SD-FP16-NEXT:    mov h0, v0.h[3]
+; CHECK-SD-FP16-NEXT:    fcvtzs x10, h1
+; CHECK-SD-FP16-NEXT:    cmp x9, x8
+; CHECK-SD-FP16-NEXT:    csel x9, x9, x8, lt
+; CHECK-SD-FP16-NEXT:    fcvtzs x12, h2
+; CHECK-SD-FP16-NEXT:    cmp x9, x11
+; CHECK-SD-FP16-NEXT:    csel x0, x9, x11, gt
+; CHECK-SD-FP16-NEXT:    cmp x10, x8
+; CHECK-SD-FP16-NEXT:    csel x9, x10, x8, lt
+; CHECK-SD-FP16-NEXT:    fcvtzs x10, h0
+; CHECK-SD-FP16-NEXT:    cmp x9, x11
+; CHECK-SD-FP16-NEXT:    csel x1, x9, x11, gt
+; CHECK-SD-FP16-NEXT:    cmp x12, x8
+; CHECK-SD-FP16-NEXT:    csel x9, x12, x8, lt
+; CHECK-SD-FP16-NEXT:    cmp x9, x11
+; CHECK-SD-FP16-NEXT:    csel x2, x9, x11, gt
+; CHECK-SD-FP16-NEXT:    cmp x10, x8
+; CHECK-SD-FP16-NEXT:    csel x8, x10, x8, lt
+; CHECK-SD-FP16-NEXT:    cmp x8, x11
+; CHECK-SD-FP16-NEXT:    csel x3, x8, x11, gt
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_v4f16_v4i50:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-CVT-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-CVT-NEXT:    fcvt s2, h0
+; CHECK-GI-CVT-NEXT:    mov x8, #562949953421311 // =0x1ffffffffffff
+; CHECK-GI-CVT-NEXT:    mov h3, v0.h[2]
+; CHECK-GI-CVT-NEXT:    mov h0, v0.h[3]
+; CHECK-GI-CVT-NEXT:    mov x11, #-562949953421312 // =0xfffe000000000000
+; CHECK-GI-CVT-NEXT:    fcvt s1, h1
+; CHECK-GI-CVT-NEXT:    fcvtzs x9, s2
+; CHECK-GI-CVT-NEXT:    fcvt s2, h3
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    fcvtzs x10, s1
+; CHECK-GI-CVT-NEXT:    cmp x9, x8
+; CHECK-GI-CVT-NEXT:    csel x9, x9, x8, lt
+; CHECK-GI-CVT-NEXT:    fcvtzs x12, s2
+; CHECK-GI-CVT-NEXT:    cmp x9, x11
+; CHECK-GI-CVT-NEXT:    csel x0, x9, x11, gt
+; CHECK-GI-CVT-NEXT:    cmp x10, x8
+; CHECK-GI-CVT-NEXT:    csel x9, x10, x8, lt
+; CHECK-GI-CVT-NEXT:    fcvtzs x10, s0
+; CHECK-GI-CVT-NEXT:    cmp x9, x11
+; CHECK-GI-CVT-NEXT:    csel x1, x9, x11, gt
+; CHECK-GI-CVT-NEXT:    cmp x12, x8
+; CHECK-GI-CVT-NEXT:    csel x9, x12, x8, lt
+; CHECK-GI-CVT-NEXT:    cmp x9, x11
+; CHECK-GI-CVT-NEXT:    csel x2, x9, x11, gt
+; CHECK-GI-CVT-NEXT:    cmp x10, x8
+; CHECK-GI-CVT-NEXT:    csel x8, x10, x8, lt
+; CHECK-GI-CVT-NEXT:    cmp x8, x11
+; CHECK-GI-CVT-NEXT:    csel x3, x8, x11, gt
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_signed_v4f16_v4i50:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-FP16-NEXT:    fcvtzs x9, h0
+; CHECK-GI-FP16-NEXT:    mov x8, #562949953421311 // =0x1ffffffffffff
+; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
+; CHECK-GI-FP16-NEXT:    mov x11, #-562949953421312 // =0xfffe000000000000
+; CHECK-GI-FP16-NEXT:    mov h0, v0.h[3]
+; CHECK-GI-FP16-NEXT:    fcvtzs x10, h1
+; CHECK-GI-FP16-NEXT:    cmp x9, x8
+; CHECK-GI-FP16-NEXT:    csel x9, x9, x8, lt
+; CHECK-GI-FP16-NEXT:    fcvtzs x12, h2
+; CHECK-GI-FP16-NEXT:    cmp x9, x11
+; CHECK-GI-FP16-NEXT:    csel x0, x9, x11, gt
+; CHECK-GI-FP16-NEXT:    cmp x10, x8
+; CHECK-GI-FP16-NEXT:    csel x9, x10, x8, lt
+; CHECK-GI-FP16-NEXT:    fcvtzs x10, h0
+; CHECK-GI-FP16-NEXT:    cmp x9, x11
+; CHECK-GI-FP16-NEXT:    csel x1, x9, x11, gt
+; CHECK-GI-FP16-NEXT:    cmp x12, x8
+; CHECK-GI-FP16-NEXT:    csel x9, x12, x8, lt
+; CHECK-GI-FP16-NEXT:    cmp x9, x11
+; CHECK-GI-FP16-NEXT:    csel x2, x9, x11, gt
+; CHECK-GI-FP16-NEXT:    cmp x10, x8
+; CHECK-GI-FP16-NEXT:    csel x8, x10, x8, lt
+; CHECK-GI-FP16-NEXT:    cmp x8, x11
+; CHECK-GI-FP16-NEXT:    csel x3, x8, x11, gt
+; CHECK-GI-FP16-NEXT:    ret
     %x = call <4 x i50> @llvm.fptosi.sat.v4f16.v4i50(<4 x half> %f)
     ret <4 x i50> %x
 }
 
 define <4 x i64> @test_signed_v4f16_v4i64(<4 x half> %f) {
-; CHECK-CVT-LABEL: test_signed_v4f16_v4i64:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-CVT-NEXT:    mov h1, v0.h[2]
-; CHECK-CVT-NEXT:    mov h2, v0.h[1]
-; CHECK-CVT-NEXT:    mov h3, v0.h[3]
-; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    fcvt s1, h1
-; CHECK-CVT-NEXT:    fcvt s2, h2
-; CHECK-CVT-NEXT:    fcvt s3, h3
-; CHECK-CVT-NEXT:    fcvtzs x8, s0
-; CHECK-CVT-NEXT:    fcvtzs x9, s1
-; CHECK-CVT-NEXT:    fcvtzs x10, s2
-; CHECK-CVT-NEXT:    fcvtzs x11, s3
-; CHECK-CVT-NEXT:    fmov d0, x8
-; CHECK-CVT-NEXT:    fmov d1, x9
-; CHECK-CVT-NEXT:    mov v0.d[1], x10
-; CHECK-CVT-NEXT:    mov v1.d[1], x11
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_signed_v4f16_v4i64:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-FP16-NEXT:    mov h1, v0.h[2]
-; CHECK-FP16-NEXT:    mov h2, v0.h[1]
-; CHECK-FP16-NEXT:    mov h3, v0.h[3]
-; CHECK-FP16-NEXT:    fcvtzs x8, h0
-; CHECK-FP16-NEXT:    fcvtzs x9, h1
-; CHECK-FP16-NEXT:    fcvtzs x10, h2
-; CHECK-FP16-NEXT:    fcvtzs x11, h3
-; CHECK-FP16-NEXT:    fmov d0, x8
-; CHECK-FP16-NEXT:    fmov d1, x9
-; CHECK-FP16-NEXT:    mov v0.d[1], x10
-; CHECK-FP16-NEXT:    mov v1.d[1], x11
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_signed_v4f16_v4i64:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-CVT-NEXT:    mov h1, v0.h[2]
+; CHECK-SD-CVT-NEXT:    mov h2, v0.h[1]
+; CHECK-SD-CVT-NEXT:    mov h3, v0.h[3]
+; CHECK-SD-CVT-NEXT:    fcvt s0, h0
+; CHECK-SD-CVT-NEXT:    fcvt s1, h1
+; CHECK-SD-CVT-NEXT:    fcvt s2, h2
+; CHECK-SD-CVT-NEXT:    fcvt s3, h3
+; CHECK-SD-CVT-NEXT:    fcvtzs x8, s0
+; CHECK-SD-CVT-NEXT:    fcvtzs x9, s1
+; CHECK-SD-CVT-NEXT:    fcvtzs x10, s2
+; CHECK-SD-CVT-NEXT:    fcvtzs x11, s3
+; CHECK-SD-CVT-NEXT:    fmov d0, x8
+; CHECK-SD-CVT-NEXT:    fmov d1, x9
+; CHECK-SD-CVT-NEXT:    mov v0.d[1], x10
+; CHECK-SD-CVT-NEXT:    mov v1.d[1], x11
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_signed_v4f16_v4i64:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-FP16-NEXT:    mov h1, v0.h[2]
+; CHECK-SD-FP16-NEXT:    mov h2, v0.h[1]
+; CHECK-SD-FP16-NEXT:    mov h3, v0.h[3]
+; CHECK-SD-FP16-NEXT:    fcvtzs x8, h0
+; CHECK-SD-FP16-NEXT:    fcvtzs x9, h1
+; CHECK-SD-FP16-NEXT:    fcvtzs x10, h2
+; CHECK-SD-FP16-NEXT:    fcvtzs x11, h3
+; CHECK-SD-FP16-NEXT:    fmov d0, x8
+; CHECK-SD-FP16-NEXT:    fmov d1, x9
+; CHECK-SD-FP16-NEXT:    mov v0.d[1], x10
+; CHECK-SD-FP16-NEXT:    mov v1.d[1], x11
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_v4f16_v4i64:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-GI-CVT-NEXT:    fcvtl v1.2d, v0.2s
+; CHECK-GI-CVT-NEXT:    fcvtl2 v2.2d, v0.4s
+; CHECK-GI-CVT-NEXT:    fcvtzs v0.2d, v1.2d
+; CHECK-GI-CVT-NEXT:    fcvtzs v1.2d, v2.2d
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_signed_v4f16_v4i64:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-FP16-NEXT:    mov s1, v0.s[1]
+; CHECK-GI-FP16-NEXT:    mov h2, v0.h[1]
+; CHECK-GI-FP16-NEXT:    fcvt d0, h0
+; CHECK-GI-FP16-NEXT:    mov h3, v1.h[1]
+; CHECK-GI-FP16-NEXT:    fcvt d2, h2
+; CHECK-GI-FP16-NEXT:    fcvt d1, h1
+; CHECK-GI-FP16-NEXT:    fcvt d3, h3
+; CHECK-GI-FP16-NEXT:    mov v0.d[1], v2.d[0]
+; CHECK-GI-FP16-NEXT:    mov v1.d[1], v3.d[0]
+; CHECK-GI-FP16-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-GI-FP16-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-GI-FP16-NEXT:    ret
     %x = call <4 x i64> @llvm.fptosi.sat.v4f16.v4i64(<4 x half> %f)
     ret <4 x i64> %x
 }
 
 define <4 x i100> @test_signed_v4f16_v4i100(<4 x half> %f) {
-; CHECK-LABEL: test_signed_v4f16_v4i100:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #112
-; CHECK-NEXT:    str d10, [sp, #16] // 8-byte Folded Spill
-; CHECK-NEXT:    stp d9, d8, [sp, #24] // 16-byte Folded Spill
-; CHECK-NEXT:    str x30, [sp, #40] // 8-byte Folded Spill
-; CHECK-NEXT:    stp x26, x25, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x24, x23, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x22, x21, [sp, #80] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x20, x19, [sp, #96] // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 112
-; CHECK-NEXT:    .cfi_offset w19, -8
-; CHECK-NEXT:    .cfi_offset w20, -16
-; CHECK-NEXT:    .cfi_offset w21, -24
-; CHECK-NEXT:    .cfi_offset w22, -32
-; CHECK-NEXT:    .cfi_offset w23, -40
-; CHECK-NEXT:    .cfi_offset w24, -48
-; CHECK-NEXT:    .cfi_offset w25, -56
-; CHECK-NEXT:    .cfi_offset w26, -64
-; CHECK-NEXT:    .cfi_offset w30, -72
-; CHECK-NEXT:    .cfi_offset b8, -80
-; CHECK-NEXT:    .cfi_offset b9, -88
-; CHECK-NEXT:    .cfi_offset b10, -96
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    movi v9.2s, #241, lsl #24
-; CHECK-NEXT:    mov w8, #1895825407 // =0x70ffffff
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    fmov s10, w8
-; CHECK-NEXT:    mov x25, #-34359738368 // =0xfffffff800000000
-; CHECK-NEXT:    mov x26, #34359738367 // =0x7ffffffff
-; CHECK-NEXT:    mov h0, v0.h[1]
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, x25, x1, lt
-; CHECK-NEXT:    fcmp s8, s10
-; CHECK-NEXT:    csel x9, x26, x9, gt
-; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    fcmp s8, s8
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    csel x19, xzr, x8, vs
-; CHECK-NEXT:    csel x20, xzr, x9, vs
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    mov h0, v0.h[2]
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, x25, x1, lt
-; CHECK-NEXT:    fcmp s8, s10
-; CHECK-NEXT:    csel x9, x26, x9, gt
-; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    fcmp s8, s8
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    csel x21, xzr, x8, vs
-; CHECK-NEXT:    csel x22, xzr, x9, vs
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    mov h0, v0.h[3]
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, x25, x1, lt
-; CHECK-NEXT:    fcmp s8, s10
-; CHECK-NEXT:    csel x9, x26, x9, gt
-; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    fcmp s8, s8
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    csel x23, xzr, x8, vs
-; CHECK-NEXT:    csel x24, xzr, x9, vs
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    mov x2, x21
-; CHECK-NEXT:    mov x3, x22
-; CHECK-NEXT:    mov x4, x23
-; CHECK-NEXT:    mov x5, x24
-; CHECK-NEXT:    ldr x30, [sp, #40] // 8-byte Folded Reload
-; CHECK-NEXT:    ldp x22, x21, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, x25, x1, lt
-; CHECK-NEXT:    fcmp s8, s10
-; CHECK-NEXT:    mov x0, x19
-; CHECK-NEXT:    mov x1, x20
-; CHECK-NEXT:    ldr d10, [sp, #16] // 8-byte Folded Reload
-; CHECK-NEXT:    ldp x20, x19, [sp, #96] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x9, x26, x9, gt
-; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    fcmp s8, s8
-; CHECK-NEXT:    ldp x24, x23, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp x26, x25, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp d9, d8, [sp, #24] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x6, xzr, x8, vs
-; CHECK-NEXT:    csel x7, xzr, x9, vs
-; CHECK-NEXT:    add sp, sp, #112
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v4f16_v4i100:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sub sp, sp, #112
+; CHECK-SD-NEXT:    str d10, [sp, #16] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    stp d9, d8, [sp, #24] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    str x30, [sp, #40] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    stp x26, x25, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x24, x23, [sp, #64] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x22, x21, [sp, #80] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x20, x19, [sp, #96] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 112
+; CHECK-SD-NEXT:    .cfi_offset w19, -8
+; CHECK-SD-NEXT:    .cfi_offset w20, -16
+; CHECK-SD-NEXT:    .cfi_offset w21, -24
+; CHECK-SD-NEXT:    .cfi_offset w22, -32
+; CHECK-SD-NEXT:    .cfi_offset w23, -40
+; CHECK-SD-NEXT:    .cfi_offset w24, -48
+; CHECK-SD-NEXT:    .cfi_offset w25, -56
+; CHECK-SD-NEXT:    .cfi_offset w26, -64
+; CHECK-SD-NEXT:    .cfi_offset w30, -72
+; CHECK-SD-NEXT:    .cfi_offset b8, -80
+; CHECK-SD-NEXT:    .cfi_offset b9, -88
+; CHECK-SD-NEXT:    .cfi_offset b10, -96
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixsfti
+; CHECK-SD-NEXT:    movi v9.2s, #241, lsl #24
+; CHECK-SD-NEXT:    mov w8, #1895825407 // =0x70ffffff
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    fmov s10, w8
+; CHECK-SD-NEXT:    mov x25, #-34359738368 // =0xfffffff800000000
+; CHECK-SD-NEXT:    mov x26, #34359738367 // =0x7ffffffff
+; CHECK-SD-NEXT:    mov h0, v0.h[1]
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, x25, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s10
+; CHECK-SD-NEXT:    csel x9, x26, x9, gt
+; CHECK-SD-NEXT:    csinv x8, x8, xzr, le
+; CHECK-SD-NEXT:    fcmp s8, s8
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    csel x19, xzr, x8, vs
+; CHECK-SD-NEXT:    csel x20, xzr, x9, vs
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixsfti
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov h0, v0.h[2]
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, x25, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s10
+; CHECK-SD-NEXT:    csel x9, x26, x9, gt
+; CHECK-SD-NEXT:    csinv x8, x8, xzr, le
+; CHECK-SD-NEXT:    fcmp s8, s8
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    csel x21, xzr, x8, vs
+; CHECK-SD-NEXT:    csel x22, xzr, x9, vs
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixsfti
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov h0, v0.h[3]
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, x25, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s10
+; CHECK-SD-NEXT:    csel x9, x26, x9, gt
+; CHECK-SD-NEXT:    csinv x8, x8, xzr, le
+; CHECK-SD-NEXT:    fcmp s8, s8
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    csel x23, xzr, x8, vs
+; CHECK-SD-NEXT:    csel x24, xzr, x9, vs
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixsfti
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    mov x2, x21
+; CHECK-SD-NEXT:    mov x3, x22
+; CHECK-SD-NEXT:    mov x4, x23
+; CHECK-SD-NEXT:    mov x5, x24
+; CHECK-SD-NEXT:    ldr x30, [sp, #40] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x22, x21, [sp, #80] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, x25, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s10
+; CHECK-SD-NEXT:    mov x0, x19
+; CHECK-SD-NEXT:    mov x1, x20
+; CHECK-SD-NEXT:    ldr d10, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x20, x19, [sp, #96] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csel x9, x26, x9, gt
+; CHECK-SD-NEXT:    csinv x8, x8, xzr, le
+; CHECK-SD-NEXT:    fcmp s8, s8
+; CHECK-SD-NEXT:    ldp x24, x23, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x26, x25, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp d9, d8, [sp, #24] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csel x6, xzr, x8, vs
+; CHECK-SD-NEXT:    csel x7, xzr, x9, vs
+; CHECK-SD-NEXT:    add sp, sp, #112
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_v4f16_v4i100:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-CVT-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-CVT-NEXT:    mov h2, v0.h[2]
+; CHECK-GI-CVT-NEXT:    mov x1, xzr
+; CHECK-GI-CVT-NEXT:    mov h3, v0.h[3]
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    mov x3, xzr
+; CHECK-GI-CVT-NEXT:    mov x5, xzr
+; CHECK-GI-CVT-NEXT:    mov x7, xzr
+; CHECK-GI-CVT-NEXT:    fcvt s1, h1
+; CHECK-GI-CVT-NEXT:    fcvt s2, h2
+; CHECK-GI-CVT-NEXT:    fcvt s3, h3
+; CHECK-GI-CVT-NEXT:    fcvtzs x0, s0
+; CHECK-GI-CVT-NEXT:    fcvtzs x2, s1
+; CHECK-GI-CVT-NEXT:    fcvtzs x4, s2
+; CHECK-GI-CVT-NEXT:    fcvtzs x6, s3
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_signed_v4f16_v4i100:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
+; CHECK-GI-FP16-NEXT:    mov x1, xzr
+; CHECK-GI-FP16-NEXT:    mov h3, v0.h[3]
+; CHECK-GI-FP16-NEXT:    fcvtzs x0, h0
+; CHECK-GI-FP16-NEXT:    mov x3, xzr
+; CHECK-GI-FP16-NEXT:    mov x5, xzr
+; CHECK-GI-FP16-NEXT:    mov x7, xzr
+; CHECK-GI-FP16-NEXT:    fcvtzs x2, h1
+; CHECK-GI-FP16-NEXT:    fcvtzs x4, h2
+; CHECK-GI-FP16-NEXT:    fcvtzs x6, h3
+; CHECK-GI-FP16-NEXT:    ret
     %x = call <4 x i100> @llvm.fptosi.sat.v4f16.v4i100(<4 x half> %f)
     ret <4 x i100> %x
 }
 
 define <4 x i128> @test_signed_v4f16_v4i128(<4 x half> %f) {
-; CHECK-LABEL: test_signed_v4f16_v4i128:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #112
-; CHECK-NEXT:    str d10, [sp, #16] // 8-byte Folded Spill
-; CHECK-NEXT:    stp d9, d8, [sp, #24] // 16-byte Folded Spill
-; CHECK-NEXT:    str x30, [sp, #40] // 8-byte Folded Spill
-; CHECK-NEXT:    stp x26, x25, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x24, x23, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x22, x21, [sp, #80] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x20, x19, [sp, #96] // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 112
-; CHECK-NEXT:    .cfi_offset w19, -8
-; CHECK-NEXT:    .cfi_offset w20, -16
-; CHECK-NEXT:    .cfi_offset w21, -24
-; CHECK-NEXT:    .cfi_offset w22, -32
-; CHECK-NEXT:    .cfi_offset w23, -40
-; CHECK-NEXT:    .cfi_offset w24, -48
-; CHECK-NEXT:    .cfi_offset w25, -56
-; CHECK-NEXT:    .cfi_offset w26, -64
-; CHECK-NEXT:    .cfi_offset w30, -72
-; CHECK-NEXT:    .cfi_offset b8, -80
-; CHECK-NEXT:    .cfi_offset b9, -88
-; CHECK-NEXT:    .cfi_offset b10, -96
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    movi v9.2s, #255, lsl #24
-; CHECK-NEXT:    mov w8, #2130706431 // =0x7effffff
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    fmov s10, w8
-; CHECK-NEXT:    mov x25, #-9223372036854775808 // =0x8000000000000000
-; CHECK-NEXT:    mov x26, #9223372036854775807 // =0x7fffffffffffffff
-; CHECK-NEXT:    mov h0, v0.h[1]
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, x25, x1, lt
-; CHECK-NEXT:    fcmp s8, s10
-; CHECK-NEXT:    csel x9, x26, x9, gt
-; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    fcmp s8, s8
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    csel x19, xzr, x8, vs
-; CHECK-NEXT:    csel x20, xzr, x9, vs
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    mov h0, v0.h[2]
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, x25, x1, lt
-; CHECK-NEXT:    fcmp s8, s10
-; CHECK-NEXT:    csel x9, x26, x9, gt
-; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    fcmp s8, s8
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    csel x21, xzr, x8, vs
-; CHECK-NEXT:    csel x22, xzr, x9, vs
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    mov h0, v0.h[3]
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, x25, x1, lt
-; CHECK-NEXT:    fcmp s8, s10
-; CHECK-NEXT:    csel x9, x26, x9, gt
-; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    fcmp s8, s8
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    csel x23, xzr, x8, vs
-; CHECK-NEXT:    csel x24, xzr, x9, vs
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    mov x2, x21
-; CHECK-NEXT:    mov x3, x22
-; CHECK-NEXT:    mov x4, x23
-; CHECK-NEXT:    mov x5, x24
-; CHECK-NEXT:    ldr x30, [sp, #40] // 8-byte Folded Reload
-; CHECK-NEXT:    ldp x22, x21, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, x25, x1, lt
-; CHECK-NEXT:    fcmp s8, s10
-; CHECK-NEXT:    mov x0, x19
-; CHECK-NEXT:    mov x1, x20
-; CHECK-NEXT:    ldr d10, [sp, #16] // 8-byte Folded Reload
-; CHECK-NEXT:    ldp x20, x19, [sp, #96] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x9, x26, x9, gt
-; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    fcmp s8, s8
-; CHECK-NEXT:    ldp x24, x23, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp x26, x25, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp d9, d8, [sp, #24] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x6, xzr, x8, vs
-; CHECK-NEXT:    csel x7, xzr, x9, vs
-; CHECK-NEXT:    add sp, sp, #112
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v4f16_v4i128:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sub sp, sp, #112
+; CHECK-SD-NEXT:    str d10, [sp, #16] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    stp d9, d8, [sp, #24] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    str x30, [sp, #40] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    stp x26, x25, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x24, x23, [sp, #64] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x22, x21, [sp, #80] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x20, x19, [sp, #96] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 112
+; CHECK-SD-NEXT:    .cfi_offset w19, -8
+; CHECK-SD-NEXT:    .cfi_offset w20, -16
+; CHECK-SD-NEXT:    .cfi_offset w21, -24
+; CHECK-SD-NEXT:    .cfi_offset w22, -32
+; CHECK-SD-NEXT:    .cfi_offset w23, -40
+; CHECK-SD-NEXT:    .cfi_offset w24, -48
+; CHECK-SD-NEXT:    .cfi_offset w25, -56
+; CHECK-SD-NEXT:    .cfi_offset w26, -64
+; CHECK-SD-NEXT:    .cfi_offset w30, -72
+; CHECK-SD-NEXT:    .cfi_offset b8, -80
+; CHECK-SD-NEXT:    .cfi_offset b9, -88
+; CHECK-SD-NEXT:    .cfi_offset b10, -96
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixsfti
+; CHECK-SD-NEXT:    movi v9.2s, #255, lsl #24
+; CHECK-SD-NEXT:    mov w8, #2130706431 // =0x7effffff
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    fmov s10, w8
+; CHECK-SD-NEXT:    mov x25, #-9223372036854775808 // =0x8000000000000000
+; CHECK-SD-NEXT:    mov x26, #9223372036854775807 // =0x7fffffffffffffff
+; CHECK-SD-NEXT:    mov h0, v0.h[1]
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, x25, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s10
+; CHECK-SD-NEXT:    csel x9, x26, x9, gt
+; CHECK-SD-NEXT:    csinv x8, x8, xzr, le
+; CHECK-SD-NEXT:    fcmp s8, s8
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    csel x19, xzr, x8, vs
+; CHECK-SD-NEXT:    csel x20, xzr, x9, vs
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixsfti
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov h0, v0.h[2]
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, x25, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s10
+; CHECK-SD-NEXT:    csel x9, x26, x9, gt
+; CHECK-SD-NEXT:    csinv x8, x8, xzr, le
+; CHECK-SD-NEXT:    fcmp s8, s8
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    csel x21, xzr, x8, vs
+; CHECK-SD-NEXT:    csel x22, xzr, x9, vs
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixsfti
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov h0, v0.h[3]
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, x25, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s10
+; CHECK-SD-NEXT:    csel x9, x26, x9, gt
+; CHECK-SD-NEXT:    csinv x8, x8, xzr, le
+; CHECK-SD-NEXT:    fcmp s8, s8
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    csel x23, xzr, x8, vs
+; CHECK-SD-NEXT:    csel x24, xzr, x9, vs
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixsfti
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    mov x2, x21
+; CHECK-SD-NEXT:    mov x3, x22
+; CHECK-SD-NEXT:    mov x4, x23
+; CHECK-SD-NEXT:    mov x5, x24
+; CHECK-SD-NEXT:    ldr x30, [sp, #40] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x22, x21, [sp, #80] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, x25, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s10
+; CHECK-SD-NEXT:    mov x0, x19
+; CHECK-SD-NEXT:    mov x1, x20
+; CHECK-SD-NEXT:    ldr d10, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x20, x19, [sp, #96] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csel x9, x26, x9, gt
+; CHECK-SD-NEXT:    csinv x8, x8, xzr, le
+; CHECK-SD-NEXT:    fcmp s8, s8
+; CHECK-SD-NEXT:    ldp x24, x23, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x26, x25, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp d9, d8, [sp, #24] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csel x6, xzr, x8, vs
+; CHECK-SD-NEXT:    csel x7, xzr, x9, vs
+; CHECK-SD-NEXT:    add sp, sp, #112
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_v4f16_v4i128:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-CVT-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-CVT-NEXT:    mov h2, v0.h[2]
+; CHECK-GI-CVT-NEXT:    mov x1, xzr
+; CHECK-GI-CVT-NEXT:    mov h3, v0.h[3]
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    mov x3, xzr
+; CHECK-GI-CVT-NEXT:    mov x5, xzr
+; CHECK-GI-CVT-NEXT:    mov x7, xzr
+; CHECK-GI-CVT-NEXT:    fcvt s1, h1
+; CHECK-GI-CVT-NEXT:    fcvt s2, h2
+; CHECK-GI-CVT-NEXT:    fcvt s3, h3
+; CHECK-GI-CVT-NEXT:    fcvtzs x0, s0
+; CHECK-GI-CVT-NEXT:    fcvtzs x2, s1
+; CHECK-GI-CVT-NEXT:    fcvtzs x4, s2
+; CHECK-GI-CVT-NEXT:    fcvtzs x6, s3
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_signed_v4f16_v4i128:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
+; CHECK-GI-FP16-NEXT:    mov x1, xzr
+; CHECK-GI-FP16-NEXT:    mov h3, v0.h[3]
+; CHECK-GI-FP16-NEXT:    fcvtzs x0, h0
+; CHECK-GI-FP16-NEXT:    mov x3, xzr
+; CHECK-GI-FP16-NEXT:    mov x5, xzr
+; CHECK-GI-FP16-NEXT:    mov x7, xzr
+; CHECK-GI-FP16-NEXT:    fcvtzs x2, h1
+; CHECK-GI-FP16-NEXT:    fcvtzs x4, h2
+; CHECK-GI-FP16-NEXT:    fcvtzs x6, h3
+; CHECK-GI-FP16-NEXT:    ret
     %x = call <4 x i128> @llvm.fptosi.sat.v4f16.v4i128(<4 x half> %f)
     ret <4 x i128> %x
 }
@@ -2012,104 +3612,200 @@ declare <8 x i100> @llvm.fptosi.sat.v8f16.v8i100(<8 x half>)
 declare <8 x i128> @llvm.fptosi.sat.v8f16.v8i128(<8 x half>)
 
 define <8 x i1> @test_signed_v8f16_v8i1(<8 x half> %f) {
-; CHECK-CVT-LABEL: test_signed_v8f16_v8i1:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvtl2 v2.4s, v0.8h
-; CHECK-CVT-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT:    movi v1.2d, #0000000000000000
-; CHECK-CVT-NEXT:    movi v3.2d, #0xffffffffffffffff
-; CHECK-CVT-NEXT:    fcvtzs v2.4s, v2.4s
-; CHECK-CVT-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-CVT-NEXT:    smin v2.4s, v2.4s, v1.4s
-; CHECK-CVT-NEXT:    smin v0.4s, v0.4s, v1.4s
-; CHECK-CVT-NEXT:    smax v1.4s, v2.4s, v3.4s
-; CHECK-CVT-NEXT:    smax v0.4s, v0.4s, v3.4s
-; CHECK-CVT-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
-; CHECK-CVT-NEXT:    xtn v0.8b, v0.8h
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_signed_v8f16_v8i1:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    movi v1.2d, #0000000000000000
-; CHECK-FP16-NEXT:    fcvtzs v0.8h, v0.8h
-; CHECK-FP16-NEXT:    movi v2.2d, #0xffffffffffffffff
-; CHECK-FP16-NEXT:    smin v0.8h, v0.8h, v1.8h
-; CHECK-FP16-NEXT:    smax v0.8h, v0.8h, v2.8h
-; CHECK-FP16-NEXT:    xtn v0.8b, v0.8h
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_signed_v8f16_v8i1:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvtl2 v2.4s, v0.8h
+; CHECK-SD-CVT-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-SD-CVT-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-SD-CVT-NEXT:    movi v3.2d, #0xffffffffffffffff
+; CHECK-SD-CVT-NEXT:    fcvtzs v2.4s, v2.4s
+; CHECK-SD-CVT-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-SD-CVT-NEXT:    smin v2.4s, v2.4s, v1.4s
+; CHECK-SD-CVT-NEXT:    smin v0.4s, v0.4s, v1.4s
+; CHECK-SD-CVT-NEXT:    smax v1.4s, v2.4s, v3.4s
+; CHECK-SD-CVT-NEXT:    smax v0.4s, v0.4s, v3.4s
+; CHECK-SD-CVT-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-SD-CVT-NEXT:    xtn v0.8b, v0.8h
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_signed_v8f16_v8i1:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-SD-FP16-NEXT:    fcvtzs v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT:    movi v2.2d, #0xffffffffffffffff
+; CHECK-SD-FP16-NEXT:    smin v0.8h, v0.8h, v1.8h
+; CHECK-SD-FP16-NEXT:    smax v0.8h, v0.8h, v2.8h
+; CHECK-SD-FP16-NEXT:    xtn v0.8b, v0.8h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_v8f16_v8i1:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvtl v2.4s, v0.4h
+; CHECK-GI-CVT-NEXT:    fcvtl2 v0.4s, v0.8h
+; CHECK-GI-CVT-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-GI-CVT-NEXT:    movi v3.2d, #0xffffffffffffffff
+; CHECK-GI-CVT-NEXT:    fcvtzs v2.4s, v2.4s
+; CHECK-GI-CVT-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-GI-CVT-NEXT:    smin v2.4s, v2.4s, v1.4s
+; CHECK-GI-CVT-NEXT:    smin v0.4s, v0.4s, v1.4s
+; CHECK-GI-CVT-NEXT:    smax v1.4s, v2.4s, v3.4s
+; CHECK-GI-CVT-NEXT:    smax v0.4s, v0.4s, v3.4s
+; CHECK-GI-CVT-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-GI-CVT-NEXT:    xtn v0.8b, v0.8h
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_signed_v8f16_v8i1:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-GI-FP16-NEXT:    fcvtzs v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT:    movi v2.2d, #0xffffffffffffffff
+; CHECK-GI-FP16-NEXT:    smin v0.8h, v0.8h, v1.8h
+; CHECK-GI-FP16-NEXT:    smax v0.8h, v0.8h, v2.8h
+; CHECK-GI-FP16-NEXT:    xtn v0.8b, v0.8h
+; CHECK-GI-FP16-NEXT:    ret
     %x = call <8 x i1> @llvm.fptosi.sat.v8f16.v8i1(<8 x half> %f)
     ret <8 x i1> %x
 }
 
 define <8 x i8> @test_signed_v8f16_v8i8(<8 x half> %f) {
-; CHECK-CVT-LABEL: test_signed_v8f16_v8i8:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvtl2 v2.4s, v0.8h
-; CHECK-CVT-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT:    movi v1.4s, #127
-; CHECK-CVT-NEXT:    fcvtzs v2.4s, v2.4s
-; CHECK-CVT-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-CVT-NEXT:    smin v2.4s, v2.4s, v1.4s
-; CHECK-CVT-NEXT:    smin v0.4s, v0.4s, v1.4s
-; CHECK-CVT-NEXT:    mvni v1.4s, #127
-; CHECK-CVT-NEXT:    smax v2.4s, v2.4s, v1.4s
-; CHECK-CVT-NEXT:    smax v0.4s, v0.4s, v1.4s
-; CHECK-CVT-NEXT:    uzp1 v0.8h, v0.8h, v2.8h
-; CHECK-CVT-NEXT:    xtn v0.8b, v0.8h
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_signed_v8f16_v8i8:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzs v0.8h, v0.8h
-; CHECK-FP16-NEXT:    sqxtn v0.8b, v0.8h
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_signed_v8f16_v8i8:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvtl2 v2.4s, v0.8h
+; CHECK-SD-CVT-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-SD-CVT-NEXT:    movi v1.4s, #127
+; CHECK-SD-CVT-NEXT:    fcvtzs v2.4s, v2.4s
+; CHECK-SD-CVT-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-SD-CVT-NEXT:    smin v2.4s, v2.4s, v1.4s
+; CHECK-SD-CVT-NEXT:    smin v0.4s, v0.4s, v1.4s
+; CHECK-SD-CVT-NEXT:    mvni v1.4s, #127
+; CHECK-SD-CVT-NEXT:    smax v2.4s, v2.4s, v1.4s
+; CHECK-SD-CVT-NEXT:    smax v0.4s, v0.4s, v1.4s
+; CHECK-SD-CVT-NEXT:    uzp1 v0.8h, v0.8h, v2.8h
+; CHECK-SD-CVT-NEXT:    xtn v0.8b, v0.8h
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_signed_v8f16_v8i8:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzs v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT:    sqxtn v0.8b, v0.8h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_v8f16_v8i8:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvtl v2.4s, v0.4h
+; CHECK-GI-CVT-NEXT:    fcvtl2 v0.4s, v0.8h
+; CHECK-GI-CVT-NEXT:    movi v1.4s, #127
+; CHECK-GI-CVT-NEXT:    fcvtzs v2.4s, v2.4s
+; CHECK-GI-CVT-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-GI-CVT-NEXT:    smin v2.4s, v2.4s, v1.4s
+; CHECK-GI-CVT-NEXT:    smin v0.4s, v0.4s, v1.4s
+; CHECK-GI-CVT-NEXT:    mvni v1.4s, #127
+; CHECK-GI-CVT-NEXT:    smax v2.4s, v2.4s, v1.4s
+; CHECK-GI-CVT-NEXT:    smax v0.4s, v0.4s, v1.4s
+; CHECK-GI-CVT-NEXT:    uzp1 v0.8h, v2.8h, v0.8h
+; CHECK-GI-CVT-NEXT:    xtn v0.8b, v0.8h
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_signed_v8f16_v8i8:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    movi v1.8h, #127
+; CHECK-GI-FP16-NEXT:    fcvtzs v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT:    mvni v2.8h, #127
+; CHECK-GI-FP16-NEXT:    smin v0.8h, v0.8h, v1.8h
+; CHECK-GI-FP16-NEXT:    smax v0.8h, v0.8h, v2.8h
+; CHECK-GI-FP16-NEXT:    xtn v0.8b, v0.8h
+; CHECK-GI-FP16-NEXT:    ret
     %x = call <8 x i8> @llvm.fptosi.sat.v8f16.v8i8(<8 x half> %f)
     ret <8 x i8> %x
 }
 
 define <8 x i13> @test_signed_v8f16_v8i13(<8 x half> %f) {
-; CHECK-CVT-LABEL: test_signed_v8f16_v8i13:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvtl2 v2.4s, v0.8h
-; CHECK-CVT-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT:    movi v1.4s, #15, msl #8
-; CHECK-CVT-NEXT:    fcvtzs v2.4s, v2.4s
-; CHECK-CVT-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-CVT-NEXT:    smin v2.4s, v2.4s, v1.4s
-; CHECK-CVT-NEXT:    smin v0.4s, v0.4s, v1.4s
-; CHECK-CVT-NEXT:    mvni v1.4s, #15, msl #8
-; CHECK-CVT-NEXT:    smax v2.4s, v2.4s, v1.4s
-; CHECK-CVT-NEXT:    smax v0.4s, v0.4s, v1.4s
-; CHECK-CVT-NEXT:    uzp1 v0.8h, v0.8h, v2.8h
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_signed_v8f16_v8i13:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzs v0.8h, v0.8h
-; CHECK-FP16-NEXT:    mvni v1.8h, #240, lsl #8
-; CHECK-FP16-NEXT:    movi v2.8h, #240, lsl #8
-; CHECK-FP16-NEXT:    smin v0.8h, v0.8h, v1.8h
-; CHECK-FP16-NEXT:    smax v0.8h, v0.8h, v2.8h
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_signed_v8f16_v8i13:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvtl2 v2.4s, v0.8h
+; CHECK-SD-CVT-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-SD-CVT-NEXT:    movi v1.4s, #15, msl #8
+; CHECK-SD-CVT-NEXT:    fcvtzs v2.4s, v2.4s
+; CHECK-SD-CVT-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-SD-CVT-NEXT:    smin v2.4s, v2.4s, v1.4s
+; CHECK-SD-CVT-NEXT:    smin v0.4s, v0.4s, v1.4s
+; CHECK-SD-CVT-NEXT:    mvni v1.4s, #15, msl #8
+; CHECK-SD-CVT-NEXT:    smax v2.4s, v2.4s, v1.4s
+; CHECK-SD-CVT-NEXT:    smax v0.4s, v0.4s, v1.4s
+; CHECK-SD-CVT-NEXT:    uzp1 v0.8h, v0.8h, v2.8h
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_signed_v8f16_v8i13:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzs v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT:    mvni v1.8h, #240, lsl #8
+; CHECK-SD-FP16-NEXT:    movi v2.8h, #240, lsl #8
+; CHECK-SD-FP16-NEXT:    smin v0.8h, v0.8h, v1.8h
+; CHECK-SD-FP16-NEXT:    smax v0.8h, v0.8h, v2.8h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_v8f16_v8i13:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvtl v2.4s, v0.4h
+; CHECK-GI-CVT-NEXT:    fcvtl2 v0.4s, v0.8h
+; CHECK-GI-CVT-NEXT:    movi v1.4s, #15, msl #8
+; CHECK-GI-CVT-NEXT:    fcvtzs v2.4s, v2.4s
+; CHECK-GI-CVT-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-GI-CVT-NEXT:    smin v2.4s, v2.4s, v1.4s
+; CHECK-GI-CVT-NEXT:    smin v0.4s, v0.4s, v1.4s
+; CHECK-GI-CVT-NEXT:    mvni v1.4s, #15, msl #8
+; CHECK-GI-CVT-NEXT:    smax v2.4s, v2.4s, v1.4s
+; CHECK-GI-CVT-NEXT:    smax v0.4s, v0.4s, v1.4s
+; CHECK-GI-CVT-NEXT:    uzp1 v0.8h, v2.8h, v0.8h
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_signed_v8f16_v8i13:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzs v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT:    mvni v1.8h, #240, lsl #8
+; CHECK-GI-FP16-NEXT:    movi v2.8h, #240, lsl #8
+; CHECK-GI-FP16-NEXT:    smin v0.8h, v0.8h, v1.8h
+; CHECK-GI-FP16-NEXT:    smax v0.8h, v0.8h, v2.8h
+; CHECK-GI-FP16-NEXT:    ret
     %x = call <8 x i13> @llvm.fptosi.sat.v8f16.v8i13(<8 x half> %f)
     ret <8 x i13> %x
 }
 
 define <8 x i16> @test_signed_v8f16_v8i16(<8 x half> %f) {
-; CHECK-CVT-LABEL: test_signed_v8f16_v8i16:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvtl v1.4s, v0.4h
-; CHECK-CVT-NEXT:    fcvtl2 v2.4s, v0.8h
-; CHECK-CVT-NEXT:    fcvtzs v1.4s, v1.4s
-; CHECK-CVT-NEXT:    sqxtn v0.4h, v1.4s
-; CHECK-CVT-NEXT:    fcvtzs v1.4s, v2.4s
-; CHECK-CVT-NEXT:    sqxtn2 v0.8h, v1.4s
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_signed_v8f16_v8i16:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzs v0.8h, v0.8h
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_signed_v8f16_v8i16:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvtl v1.4s, v0.4h
+; CHECK-SD-CVT-NEXT:    fcvtl2 v2.4s, v0.8h
+; CHECK-SD-CVT-NEXT:    fcvtzs v1.4s, v1.4s
+; CHECK-SD-CVT-NEXT:    sqxtn v0.4h, v1.4s
+; CHECK-SD-CVT-NEXT:    fcvtzs v1.4s, v2.4s
+; CHECK-SD-CVT-NEXT:    sqxtn2 v0.8h, v1.4s
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_signed_v8f16_v8i16:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzs v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_v8f16_v8i16:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvtl v2.4s, v0.4h
+; CHECK-GI-CVT-NEXT:    fcvtl2 v0.4s, v0.8h
+; CHECK-GI-CVT-NEXT:    movi v1.4s, #127, msl #8
+; CHECK-GI-CVT-NEXT:    fcvtzs v2.4s, v2.4s
+; CHECK-GI-CVT-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-GI-CVT-NEXT:    smin v2.4s, v2.4s, v1.4s
+; CHECK-GI-CVT-NEXT:    smin v0.4s, v0.4s, v1.4s
+; CHECK-GI-CVT-NEXT:    mvni v1.4s, #127, msl #8
+; CHECK-GI-CVT-NEXT:    smax v2.4s, v2.4s, v1.4s
+; CHECK-GI-CVT-NEXT:    smax v0.4s, v0.4s, v1.4s
+; CHECK-GI-CVT-NEXT:    uzp1 v0.8h, v2.8h, v0.8h
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_signed_v8f16_v8i16:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzs v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT:    ret
     %x = call <8 x i16> @llvm.fptosi.sat.v8f16.v8i16(<8 x half> %f)
     ret <8 x i16> %x
 }
@@ -2141,394 +3837,663 @@ define <8 x i19> @test_signed_v8f16_v8i19(<8 x half> %f) {
 }
 
 define <8 x i32> @test_signed_v8f16_v8i32_duplicate(<8 x half> %f) {
-; CHECK-LABEL: test_signed_v8f16_v8i32_duplicate:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtl2 v1.4s, v0.8h
-; CHECK-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-NEXT:    fcvtzs v1.4s, v1.4s
-; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v8f16_v8i32_duplicate:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtl2 v1.4s, v0.8h
+; CHECK-SD-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-SD-NEXT:    fcvtzs v1.4s, v1.4s
+; CHECK-SD-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_v8f16_v8i32_duplicate:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcvtl v1.4s, v0.4h
+; CHECK-GI-NEXT:    fcvtl2 v2.4s, v0.8h
+; CHECK-GI-NEXT:    fcvtzs v0.4s, v1.4s
+; CHECK-GI-NEXT:    fcvtzs v1.4s, v2.4s
+; CHECK-GI-NEXT:    ret
     %x = call <8 x i32> @llvm.fptosi.sat.v8f16.v8i32(<8 x half> %f)
     ret <8 x i32> %x
 }
 
 define <8 x i50> @test_signed_v8f16_v8i50(<8 x half> %f) {
-; CHECK-CVT-LABEL: test_signed_v8f16_v8i50:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-CVT-NEXT:    mov x8, #562949953421311 // =0x1ffffffffffff
-; CHECK-CVT-NEXT:    mov x9, #-562949953421312 // =0xfffe000000000000
-; CHECK-CVT-NEXT:    mov h2, v1.h[1]
-; CHECK-CVT-NEXT:    fcvt s3, h1
-; CHECK-CVT-NEXT:    mov h4, v1.h[2]
-; CHECK-CVT-NEXT:    mov h1, v1.h[3]
-; CHECK-CVT-NEXT:    fcvt s2, h2
-; CHECK-CVT-NEXT:    fcvtzs x10, s3
-; CHECK-CVT-NEXT:    fcvt s3, h4
-; CHECK-CVT-NEXT:    fcvt s1, h1
-; CHECK-CVT-NEXT:    fcvtzs x11, s2
-; CHECK-CVT-NEXT:    cmp x10, x8
-; CHECK-CVT-NEXT:    fcvtzs x12, s3
-; CHECK-CVT-NEXT:    csel x10, x10, x8, lt
-; CHECK-CVT-NEXT:    mov h2, v0.h[1]
-; CHECK-CVT-NEXT:    fcvt s3, h0
-; CHECK-CVT-NEXT:    cmp x10, x9
-; CHECK-CVT-NEXT:    csel x4, x10, x9, gt
-; CHECK-CVT-NEXT:    cmp x11, x8
-; CHECK-CVT-NEXT:    csel x10, x11, x8, lt
-; CHECK-CVT-NEXT:    fcvtzs x11, s1
-; CHECK-CVT-NEXT:    mov h1, v0.h[2]
-; CHECK-CVT-NEXT:    cmp x10, x9
-; CHECK-CVT-NEXT:    fcvt s2, h2
-; CHECK-CVT-NEXT:    mov h0, v0.h[3]
-; CHECK-CVT-NEXT:    csel x5, x10, x9, gt
-; CHECK-CVT-NEXT:    cmp x12, x8
-; CHECK-CVT-NEXT:    csel x10, x12, x8, lt
-; CHECK-CVT-NEXT:    fcvtzs x12, s3
-; CHECK-CVT-NEXT:    cmp x10, x9
-; CHECK-CVT-NEXT:    fcvt s1, h1
-; CHECK-CVT-NEXT:    csel x6, x10, x9, gt
-; CHECK-CVT-NEXT:    cmp x11, x8
-; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    csel x10, x11, x8, lt
-; CHECK-CVT-NEXT:    fcvtzs x11, s2
-; CHECK-CVT-NEXT:    cmp x10, x9
-; CHECK-CVT-NEXT:    csel x7, x10, x9, gt
-; CHECK-CVT-NEXT:    cmp x12, x8
-; CHECK-CVT-NEXT:    csel x10, x12, x8, lt
-; CHECK-CVT-NEXT:    fcvtzs x12, s1
-; CHECK-CVT-NEXT:    cmp x10, x9
-; CHECK-CVT-NEXT:    csel x0, x10, x9, gt
-; CHECK-CVT-NEXT:    cmp x11, x8
-; CHECK-CVT-NEXT:    csel x10, x11, x8, lt
-; CHECK-CVT-NEXT:    fcvtzs x11, s0
-; CHECK-CVT-NEXT:    cmp x10, x9
-; CHECK-CVT-NEXT:    csel x1, x10, x9, gt
-; CHECK-CVT-NEXT:    cmp x12, x8
-; CHECK-CVT-NEXT:    csel x10, x12, x8, lt
-; CHECK-CVT-NEXT:    cmp x10, x9
-; CHECK-CVT-NEXT:    csel x2, x10, x9, gt
-; CHECK-CVT-NEXT:    cmp x11, x8
-; CHECK-CVT-NEXT:    csel x8, x11, x8, lt
-; CHECK-CVT-NEXT:    cmp x8, x9
-; CHECK-CVT-NEXT:    csel x3, x8, x9, gt
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_signed_v8f16_v8i50:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-FP16-NEXT:    mov x8, #562949953421311 // =0x1ffffffffffff
-; CHECK-FP16-NEXT:    mov x9, #-562949953421312 // =0xfffe000000000000
-; CHECK-FP16-NEXT:    mov h2, v1.h[1]
-; CHECK-FP16-NEXT:    fcvtzs x10, h1
-; CHECK-FP16-NEXT:    mov h3, v1.h[2]
-; CHECK-FP16-NEXT:    mov h1, v1.h[3]
-; CHECK-FP16-NEXT:    fcvtzs x11, h2
-; CHECK-FP16-NEXT:    cmp x10, x8
-; CHECK-FP16-NEXT:    fcvtzs x12, h3
-; CHECK-FP16-NEXT:    csel x10, x10, x8, lt
-; CHECK-FP16-NEXT:    mov h2, v0.h[2]
-; CHECK-FP16-NEXT:    cmp x10, x9
-; CHECK-FP16-NEXT:    csel x4, x10, x9, gt
-; CHECK-FP16-NEXT:    cmp x11, x8
-; CHECK-FP16-NEXT:    csel x10, x11, x8, lt
-; CHECK-FP16-NEXT:    fcvtzs x11, h1
-; CHECK-FP16-NEXT:    mov h1, v0.h[1]
-; CHECK-FP16-NEXT:    cmp x10, x9
-; CHECK-FP16-NEXT:    csel x5, x10, x9, gt
-; CHECK-FP16-NEXT:    cmp x12, x8
-; CHECK-FP16-NEXT:    csel x10, x12, x8, lt
-; CHECK-FP16-NEXT:    fcvtzs x12, h0
-; CHECK-FP16-NEXT:    mov h0, v0.h[3]
-; CHECK-FP16-NEXT:    cmp x10, x9
-; CHECK-FP16-NEXT:    csel x6, x10, x9, gt
-; CHECK-FP16-NEXT:    cmp x11, x8
-; CHECK-FP16-NEXT:    csel x10, x11, x8, lt
-; CHECK-FP16-NEXT:    fcvtzs x11, h1
-; CHECK-FP16-NEXT:    cmp x10, x9
-; CHECK-FP16-NEXT:    csel x7, x10, x9, gt
-; CHECK-FP16-NEXT:    cmp x12, x8
-; CHECK-FP16-NEXT:    csel x10, x12, x8, lt
-; CHECK-FP16-NEXT:    fcvtzs x12, h2
-; CHECK-FP16-NEXT:    cmp x10, x9
-; CHECK-FP16-NEXT:    csel x0, x10, x9, gt
-; CHECK-FP16-NEXT:    cmp x11, x8
-; CHECK-FP16-NEXT:    csel x10, x11, x8, lt
-; CHECK-FP16-NEXT:    fcvtzs x11, h0
-; CHECK-FP16-NEXT:    cmp x10, x9
-; CHECK-FP16-NEXT:    csel x1, x10, x9, gt
-; CHECK-FP16-NEXT:    cmp x12, x8
-; CHECK-FP16-NEXT:    csel x10, x12, x8, lt
-; CHECK-FP16-NEXT:    cmp x10, x9
-; CHECK-FP16-NEXT:    csel x2, x10, x9, gt
-; CHECK-FP16-NEXT:    cmp x11, x8
-; CHECK-FP16-NEXT:    csel x8, x11, x8, lt
-; CHECK-FP16-NEXT:    cmp x8, x9
-; CHECK-FP16-NEXT:    csel x3, x8, x9, gt
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_signed_v8f16_v8i50:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-CVT-NEXT:    mov x8, #562949953421311 // =0x1ffffffffffff
+; CHECK-SD-CVT-NEXT:    mov x9, #-562949953421312 // =0xfffe000000000000
+; CHECK-SD-CVT-NEXT:    mov h2, v1.h[1]
+; CHECK-SD-CVT-NEXT:    fcvt s3, h1
+; CHECK-SD-CVT-NEXT:    mov h4, v1.h[2]
+; CHECK-SD-CVT-NEXT:    mov h1, v1.h[3]
+; CHECK-SD-CVT-NEXT:    fcvt s2, h2
+; CHECK-SD-CVT-NEXT:    fcvtzs x10, s3
+; CHECK-SD-CVT-NEXT:    fcvt s3, h4
+; CHECK-SD-CVT-NEXT:    fcvt s1, h1
+; CHECK-SD-CVT-NEXT:    fcvtzs x11, s2
+; CHECK-SD-CVT-NEXT:    cmp x10, x8
+; CHECK-SD-CVT-NEXT:    fcvtzs x12, s3
+; CHECK-SD-CVT-NEXT:    csel x10, x10, x8, lt
+; CHECK-SD-CVT-NEXT:    mov h2, v0.h[1]
+; CHECK-SD-CVT-NEXT:    fcvt s3, h0
+; CHECK-SD-CVT-NEXT:    cmp x10, x9
+; CHECK-SD-CVT-NEXT:    csel x4, x10, x9, gt
+; CHECK-SD-CVT-NEXT:    cmp x11, x8
+; CHECK-SD-CVT-NEXT:    csel x10, x11, x8, lt
+; CHECK-SD-CVT-NEXT:    fcvtzs x11, s1
+; CHECK-SD-CVT-NEXT:    mov h1, v0.h[2]
+; CHECK-SD-CVT-NEXT:    cmp x10, x9
+; CHECK-SD-CVT-NEXT:    fcvt s2, h2
+; CHECK-SD-CVT-NEXT:    mov h0, v0.h[3]
+; CHECK-SD-CVT-NEXT:    csel x5, x10, x9, gt
+; CHECK-SD-CVT-NEXT:    cmp x12, x8
+; CHECK-SD-CVT-NEXT:    csel x10, x12, x8, lt
+; CHECK-SD-CVT-NEXT:    fcvtzs x12, s3
+; CHECK-SD-CVT-NEXT:    cmp x10, x9
+; CHECK-SD-CVT-NEXT:    fcvt s1, h1
+; CHECK-SD-CVT-NEXT:    csel x6, x10, x9, gt
+; CHECK-SD-CVT-NEXT:    cmp x11, x8
+; CHECK-SD-CVT-NEXT:    fcvt s0, h0
+; CHECK-SD-CVT-NEXT:    csel x10, x11, x8, lt
+; CHECK-SD-CVT-NEXT:    fcvtzs x11, s2
+; CHECK-SD-CVT-NEXT:    cmp x10, x9
+; CHECK-SD-CVT-NEXT:    csel x7, x10, x9, gt
+; CHECK-SD-CVT-NEXT:    cmp x12, x8
+; CHECK-SD-CVT-NEXT:    csel x10, x12, x8, lt
+; CHECK-SD-CVT-NEXT:    fcvtzs x12, s1
+; CHECK-SD-CVT-NEXT:    cmp x10, x9
+; CHECK-SD-CVT-NEXT:    csel x0, x10, x9, gt
+; CHECK-SD-CVT-NEXT:    cmp x11, x8
+; CHECK-SD-CVT-NEXT:    csel x10, x11, x8, lt
+; CHECK-SD-CVT-NEXT:    fcvtzs x11, s0
+; CHECK-SD-CVT-NEXT:    cmp x10, x9
+; CHECK-SD-CVT-NEXT:    csel x1, x10, x9, gt
+; CHECK-SD-CVT-NEXT:    cmp x12, x8
+; CHECK-SD-CVT-NEXT:    csel x10, x12, x8, lt
+; CHECK-SD-CVT-NEXT:    cmp x10, x9
+; CHECK-SD-CVT-NEXT:    csel x2, x10, x9, gt
+; CHECK-SD-CVT-NEXT:    cmp x11, x8
+; CHECK-SD-CVT-NEXT:    csel x8, x11, x8, lt
+; CHECK-SD-CVT-NEXT:    cmp x8, x9
+; CHECK-SD-CVT-NEXT:    csel x3, x8, x9, gt
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_signed_v8f16_v8i50:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-FP16-NEXT:    mov x8, #562949953421311 // =0x1ffffffffffff
+; CHECK-SD-FP16-NEXT:    mov x9, #-562949953421312 // =0xfffe000000000000
+; CHECK-SD-FP16-NEXT:    mov h2, v1.h[1]
+; CHECK-SD-FP16-NEXT:    fcvtzs x10, h1
+; CHECK-SD-FP16-NEXT:    mov h3, v1.h[2]
+; CHECK-SD-FP16-NEXT:    mov h1, v1.h[3]
+; CHECK-SD-FP16-NEXT:    fcvtzs x11, h2
+; CHECK-SD-FP16-NEXT:    cmp x10, x8
+; CHECK-SD-FP16-NEXT:    fcvtzs x12, h3
+; CHECK-SD-FP16-NEXT:    csel x10, x10, x8, lt
+; CHECK-SD-FP16-NEXT:    mov h2, v0.h[2]
+; CHECK-SD-FP16-NEXT:    cmp x10, x9
+; CHECK-SD-FP16-NEXT:    csel x4, x10, x9, gt
+; CHECK-SD-FP16-NEXT:    cmp x11, x8
+; CHECK-SD-FP16-NEXT:    csel x10, x11, x8, lt
+; CHECK-SD-FP16-NEXT:    fcvtzs x11, h1
+; CHECK-SD-FP16-NEXT:    mov h1, v0.h[1]
+; CHECK-SD-FP16-NEXT:    cmp x10, x9
+; CHECK-SD-FP16-NEXT:    csel x5, x10, x9, gt
+; CHECK-SD-FP16-NEXT:    cmp x12, x8
+; CHECK-SD-FP16-NEXT:    csel x10, x12, x8, lt
+; CHECK-SD-FP16-NEXT:    fcvtzs x12, h0
+; CHECK-SD-FP16-NEXT:    mov h0, v0.h[3]
+; CHECK-SD-FP16-NEXT:    cmp x10, x9
+; CHECK-SD-FP16-NEXT:    csel x6, x10, x9, gt
+; CHECK-SD-FP16-NEXT:    cmp x11, x8
+; CHECK-SD-FP16-NEXT:    csel x10, x11, x8, lt
+; CHECK-SD-FP16-NEXT:    fcvtzs x11, h1
+; CHECK-SD-FP16-NEXT:    cmp x10, x9
+; CHECK-SD-FP16-NEXT:    csel x7, x10, x9, gt
+; CHECK-SD-FP16-NEXT:    cmp x12, x8
+; CHECK-SD-FP16-NEXT:    csel x10, x12, x8, lt
+; CHECK-SD-FP16-NEXT:    fcvtzs x12, h2
+; CHECK-SD-FP16-NEXT:    cmp x10, x9
+; CHECK-SD-FP16-NEXT:    csel x0, x10, x9, gt
+; CHECK-SD-FP16-NEXT:    cmp x11, x8
+; CHECK-SD-FP16-NEXT:    csel x10, x11, x8, lt
+; CHECK-SD-FP16-NEXT:    fcvtzs x11, h0
+; CHECK-SD-FP16-NEXT:    cmp x10, x9
+; CHECK-SD-FP16-NEXT:    csel x1, x10, x9, gt
+; CHECK-SD-FP16-NEXT:    cmp x12, x8
+; CHECK-SD-FP16-NEXT:    csel x10, x12, x8, lt
+; CHECK-SD-FP16-NEXT:    cmp x10, x9
+; CHECK-SD-FP16-NEXT:    csel x2, x10, x9, gt
+; CHECK-SD-FP16-NEXT:    cmp x11, x8
+; CHECK-SD-FP16-NEXT:    csel x8, x11, x8, lt
+; CHECK-SD-FP16-NEXT:    cmp x8, x9
+; CHECK-SD-FP16-NEXT:    csel x3, x8, x9, gt
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_v8f16_v8i50:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-GI-CVT-NEXT:    mov x8, #562949953421311 // =0x1ffffffffffff
+; CHECK-GI-CVT-NEXT:    mov x9, #-562949953421312 // =0xfffe000000000000
+; CHECK-GI-CVT-NEXT:    mov h2, v1.h[1]
+; CHECK-GI-CVT-NEXT:    fcvt s3, h1
+; CHECK-GI-CVT-NEXT:    mov h4, v1.h[2]
+; CHECK-GI-CVT-NEXT:    mov h1, v1.h[3]
+; CHECK-GI-CVT-NEXT:    fcvt s2, h2
+; CHECK-GI-CVT-NEXT:    fcvtzs x10, s3
+; CHECK-GI-CVT-NEXT:    fcvt s3, h4
+; CHECK-GI-CVT-NEXT:    fcvt s1, h1
+; CHECK-GI-CVT-NEXT:    fcvtzs x11, s2
+; CHECK-GI-CVT-NEXT:    cmp x10, x8
+; CHECK-GI-CVT-NEXT:    fcvtzs x12, s3
+; CHECK-GI-CVT-NEXT:    csel x10, x10, x8, lt
+; CHECK-GI-CVT-NEXT:    mov h2, v0.h[1]
+; CHECK-GI-CVT-NEXT:    fcvt s3, h0
+; CHECK-GI-CVT-NEXT:    cmp x10, x9
+; CHECK-GI-CVT-NEXT:    csel x4, x10, x9, gt
+; CHECK-GI-CVT-NEXT:    cmp x11, x8
+; CHECK-GI-CVT-NEXT:    csel x10, x11, x8, lt
+; CHECK-GI-CVT-NEXT:    fcvtzs x11, s1
+; CHECK-GI-CVT-NEXT:    mov h1, v0.h[2]
+; CHECK-GI-CVT-NEXT:    cmp x10, x9
+; CHECK-GI-CVT-NEXT:    fcvt s2, h2
+; CHECK-GI-CVT-NEXT:    mov h0, v0.h[3]
+; CHECK-GI-CVT-NEXT:    csel x5, x10, x9, gt
+; CHECK-GI-CVT-NEXT:    cmp x12, x8
+; CHECK-GI-CVT-NEXT:    csel x10, x12, x8, lt
+; CHECK-GI-CVT-NEXT:    fcvtzs x12, s3
+; CHECK-GI-CVT-NEXT:    cmp x10, x9
+; CHECK-GI-CVT-NEXT:    fcvt s1, h1
+; CHECK-GI-CVT-NEXT:    csel x6, x10, x9, gt
+; CHECK-GI-CVT-NEXT:    cmp x11, x8
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    csel x10, x11, x8, lt
+; CHECK-GI-CVT-NEXT:    fcvtzs x11, s2
+; CHECK-GI-CVT-NEXT:    cmp x10, x9
+; CHECK-GI-CVT-NEXT:    csel x7, x10, x9, gt
+; CHECK-GI-CVT-NEXT:    cmp x12, x8
+; CHECK-GI-CVT-NEXT:    csel x10, x12, x8, lt
+; CHECK-GI-CVT-NEXT:    fcvtzs x12, s1
+; CHECK-GI-CVT-NEXT:    cmp x10, x9
+; CHECK-GI-CVT-NEXT:    csel x0, x10, x9, gt
+; CHECK-GI-CVT-NEXT:    cmp x11, x8
+; CHECK-GI-CVT-NEXT:    csel x10, x11, x8, lt
+; CHECK-GI-CVT-NEXT:    fcvtzs x11, s0
+; CHECK-GI-CVT-NEXT:    cmp x10, x9
+; CHECK-GI-CVT-NEXT:    csel x1, x10, x9, gt
+; CHECK-GI-CVT-NEXT:    cmp x12, x8
+; CHECK-GI-CVT-NEXT:    csel x10, x12, x8, lt
+; CHECK-GI-CVT-NEXT:    cmp x10, x9
+; CHECK-GI-CVT-NEXT:    csel x2, x10, x9, gt
+; CHECK-GI-CVT-NEXT:    cmp x11, x8
+; CHECK-GI-CVT-NEXT:    csel x8, x11, x8, lt
+; CHECK-GI-CVT-NEXT:    cmp x8, x9
+; CHECK-GI-CVT-NEXT:    csel x3, x8, x9, gt
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_signed_v8f16_v8i50:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-GI-FP16-NEXT:    mov x8, #562949953421311 // =0x1ffffffffffff
+; CHECK-GI-FP16-NEXT:    mov x9, #-562949953421312 // =0xfffe000000000000
+; CHECK-GI-FP16-NEXT:    mov h2, v1.h[1]
+; CHECK-GI-FP16-NEXT:    fcvtzs x10, h1
+; CHECK-GI-FP16-NEXT:    mov h3, v1.h[2]
+; CHECK-GI-FP16-NEXT:    mov h1, v1.h[3]
+; CHECK-GI-FP16-NEXT:    fcvtzs x11, h2
+; CHECK-GI-FP16-NEXT:    cmp x10, x8
+; CHECK-GI-FP16-NEXT:    fcvtzs x12, h3
+; CHECK-GI-FP16-NEXT:    csel x10, x10, x8, lt
+; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
+; CHECK-GI-FP16-NEXT:    cmp x10, x9
+; CHECK-GI-FP16-NEXT:    csel x4, x10, x9, gt
+; CHECK-GI-FP16-NEXT:    cmp x11, x8
+; CHECK-GI-FP16-NEXT:    csel x10, x11, x8, lt
+; CHECK-GI-FP16-NEXT:    fcvtzs x11, h1
+; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-FP16-NEXT:    cmp x10, x9
+; CHECK-GI-FP16-NEXT:    csel x5, x10, x9, gt
+; CHECK-GI-FP16-NEXT:    cmp x12, x8
+; CHECK-GI-FP16-NEXT:    csel x10, x12, x8, lt
+; CHECK-GI-FP16-NEXT:    fcvtzs x12, h0
+; CHECK-GI-FP16-NEXT:    mov h0, v0.h[3]
+; CHECK-GI-FP16-NEXT:    cmp x10, x9
+; CHECK-GI-FP16-NEXT:    csel x6, x10, x9, gt
+; CHECK-GI-FP16-NEXT:    cmp x11, x8
+; CHECK-GI-FP16-NEXT:    csel x10, x11, x8, lt
+; CHECK-GI-FP16-NEXT:    fcvtzs x11, h1
+; CHECK-GI-FP16-NEXT:    cmp x10, x9
+; CHECK-GI-FP16-NEXT:    csel x7, x10, x9, gt
+; CHECK-GI-FP16-NEXT:    cmp x12, x8
+; CHECK-GI-FP16-NEXT:    csel x10, x12, x8, lt
+; CHECK-GI-FP16-NEXT:    fcvtzs x12, h2
+; CHECK-GI-FP16-NEXT:    cmp x10, x9
+; CHECK-GI-FP16-NEXT:    csel x0, x10, x9, gt
+; CHECK-GI-FP16-NEXT:    cmp x11, x8
+; CHECK-GI-FP16-NEXT:    csel x10, x11, x8, lt
+; CHECK-GI-FP16-NEXT:    fcvtzs x11, h0
+; CHECK-GI-FP16-NEXT:    cmp x10, x9
+; CHECK-GI-FP16-NEXT:    csel x1, x10, x9, gt
+; CHECK-GI-FP16-NEXT:    cmp x12, x8
+; CHECK-GI-FP16-NEXT:    csel x10, x12, x8, lt
+; CHECK-GI-FP16-NEXT:    cmp x10, x9
+; CHECK-GI-FP16-NEXT:    csel x2, x10, x9, gt
+; CHECK-GI-FP16-NEXT:    cmp x11, x8
+; CHECK-GI-FP16-NEXT:    csel x8, x11, x8, lt
+; CHECK-GI-FP16-NEXT:    cmp x8, x9
+; CHECK-GI-FP16-NEXT:    csel x3, x8, x9, gt
+; CHECK-GI-FP16-NEXT:    ret
     %x = call <8 x i50> @llvm.fptosi.sat.v8f16.v8i50(<8 x half> %f)
     ret <8 x i50> %x
 }
 
 define <8 x i64> @test_signed_v8f16_v8i64(<8 x half> %f) {
-; CHECK-CVT-LABEL: test_signed_v8f16_v8i64:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-CVT-NEXT:    mov h4, v0.h[2]
-; CHECK-CVT-NEXT:    mov h3, v0.h[1]
-; CHECK-CVT-NEXT:    mov h7, v0.h[3]
-; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    mov h2, v1.h[2]
-; CHECK-CVT-NEXT:    mov h5, v1.h[1]
-; CHECK-CVT-NEXT:    mov h6, v1.h[3]
-; CHECK-CVT-NEXT:    fcvt s1, h1
-; CHECK-CVT-NEXT:    fcvt s4, h4
-; CHECK-CVT-NEXT:    fcvt s3, h3
-; CHECK-CVT-NEXT:    fcvt s7, h7
-; CHECK-CVT-NEXT:    fcvtzs x9, s0
-; CHECK-CVT-NEXT:    fcvt s2, h2
-; CHECK-CVT-NEXT:    fcvt s5, h5
-; CHECK-CVT-NEXT:    fcvt s6, h6
-; CHECK-CVT-NEXT:    fcvtzs x8, s1
-; CHECK-CVT-NEXT:    fcvtzs x12, s4
-; CHECK-CVT-NEXT:    fcvtzs x11, s3
-; CHECK-CVT-NEXT:    fcvtzs x15, s7
-; CHECK-CVT-NEXT:    fmov d0, x9
-; CHECK-CVT-NEXT:    fcvtzs x10, s2
-; CHECK-CVT-NEXT:    fcvtzs x13, s5
-; CHECK-CVT-NEXT:    fcvtzs x14, s6
-; CHECK-CVT-NEXT:    fmov d2, x8
-; CHECK-CVT-NEXT:    fmov d1, x12
-; CHECK-CVT-NEXT:    mov v0.d[1], x11
-; CHECK-CVT-NEXT:    fmov d3, x10
-; CHECK-CVT-NEXT:    mov v2.d[1], x13
-; CHECK-CVT-NEXT:    mov v1.d[1], x15
-; CHECK-CVT-NEXT:    mov v3.d[1], x14
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_signed_v8f16_v8i64:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-FP16-NEXT:    mov h4, v0.h[2]
-; CHECK-FP16-NEXT:    mov h3, v0.h[1]
-; CHECK-FP16-NEXT:    mov h7, v0.h[3]
-; CHECK-FP16-NEXT:    fcvtzs x9, h0
-; CHECK-FP16-NEXT:    mov h2, v1.h[2]
-; CHECK-FP16-NEXT:    mov h5, v1.h[1]
-; CHECK-FP16-NEXT:    mov h6, v1.h[3]
-; CHECK-FP16-NEXT:    fcvtzs x8, h1
-; CHECK-FP16-NEXT:    fcvtzs x12, h4
-; CHECK-FP16-NEXT:    fcvtzs x11, h3
-; CHECK-FP16-NEXT:    fcvtzs x15, h7
-; CHECK-FP16-NEXT:    fmov d0, x9
-; CHECK-FP16-NEXT:    fcvtzs x10, h2
-; CHECK-FP16-NEXT:    fcvtzs x13, h5
-; CHECK-FP16-NEXT:    fcvtzs x14, h6
-; CHECK-FP16-NEXT:    fmov d2, x8
-; CHECK-FP16-NEXT:    fmov d1, x12
-; CHECK-FP16-NEXT:    mov v0.d[1], x11
-; CHECK-FP16-NEXT:    fmov d3, x10
-; CHECK-FP16-NEXT:    mov v2.d[1], x13
-; CHECK-FP16-NEXT:    mov v1.d[1], x15
-; CHECK-FP16-NEXT:    mov v3.d[1], x14
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_signed_v8f16_v8i64:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-CVT-NEXT:    mov h4, v0.h[2]
+; CHECK-SD-CVT-NEXT:    mov h3, v0.h[1]
+; CHECK-SD-CVT-NEXT:    mov h7, v0.h[3]
+; CHECK-SD-CVT-NEXT:    fcvt s0, h0
+; CHECK-SD-CVT-NEXT:    mov h2, v1.h[2]
+; CHECK-SD-CVT-NEXT:    mov h5, v1.h[1]
+; CHECK-SD-CVT-NEXT:    mov h6, v1.h[3]
+; CHECK-SD-CVT-NEXT:    fcvt s1, h1
+; CHECK-SD-CVT-NEXT:    fcvt s4, h4
+; CHECK-SD-CVT-NEXT:    fcvt s3, h3
+; CHECK-SD-CVT-NEXT:    fcvt s7, h7
+; CHECK-SD-CVT-NEXT:    fcvtzs x9, s0
+; CHECK-SD-CVT-NEXT:    fcvt s2, h2
+; CHECK-SD-CVT-NEXT:    fcvt s5, h5
+; CHECK-SD-CVT-NEXT:    fcvt s6, h6
+; CHECK-SD-CVT-NEXT:    fcvtzs x8, s1
+; CHECK-SD-CVT-NEXT:    fcvtzs x12, s4
+; CHECK-SD-CVT-NEXT:    fcvtzs x11, s3
+; CHECK-SD-CVT-NEXT:    fcvtzs x15, s7
+; CHECK-SD-CVT-NEXT:    fmov d0, x9
+; CHECK-SD-CVT-NEXT:    fcvtzs x10, s2
+; CHECK-SD-CVT-NEXT:    fcvtzs x13, s5
+; CHECK-SD-CVT-NEXT:    fcvtzs x14, s6
+; CHECK-SD-CVT-NEXT:    fmov d2, x8
+; CHECK-SD-CVT-NEXT:    fmov d1, x12
+; CHECK-SD-CVT-NEXT:    mov v0.d[1], x11
+; CHECK-SD-CVT-NEXT:    fmov d3, x10
+; CHECK-SD-CVT-NEXT:    mov v2.d[1], x13
+; CHECK-SD-CVT-NEXT:    mov v1.d[1], x15
+; CHECK-SD-CVT-NEXT:    mov v3.d[1], x14
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_signed_v8f16_v8i64:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-FP16-NEXT:    mov h4, v0.h[2]
+; CHECK-SD-FP16-NEXT:    mov h3, v0.h[1]
+; CHECK-SD-FP16-NEXT:    mov h7, v0.h[3]
+; CHECK-SD-FP16-NEXT:    fcvtzs x9, h0
+; CHECK-SD-FP16-NEXT:    mov h2, v1.h[2]
+; CHECK-SD-FP16-NEXT:    mov h5, v1.h[1]
+; CHECK-SD-FP16-NEXT:    mov h6, v1.h[3]
+; CHECK-SD-FP16-NEXT:    fcvtzs x8, h1
+; CHECK-SD-FP16-NEXT:    fcvtzs x12, h4
+; CHECK-SD-FP16-NEXT:    fcvtzs x11, h3
+; CHECK-SD-FP16-NEXT:    fcvtzs x15, h7
+; CHECK-SD-FP16-NEXT:    fmov d0, x9
+; CHECK-SD-FP16-NEXT:    fcvtzs x10, h2
+; CHECK-SD-FP16-NEXT:    fcvtzs x13, h5
+; CHECK-SD-FP16-NEXT:    fcvtzs x14, h6
+; CHECK-SD-FP16-NEXT:    fmov d2, x8
+; CHECK-SD-FP16-NEXT:    fmov d1, x12
+; CHECK-SD-FP16-NEXT:    mov v0.d[1], x11
+; CHECK-SD-FP16-NEXT:    fmov d3, x10
+; CHECK-SD-FP16-NEXT:    mov v2.d[1], x13
+; CHECK-SD-FP16-NEXT:    mov v1.d[1], x15
+; CHECK-SD-FP16-NEXT:    mov v3.d[1], x14
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_v8f16_v8i64:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvtl v1.4s, v0.4h
+; CHECK-GI-CVT-NEXT:    fcvtl2 v0.4s, v0.8h
+; CHECK-GI-CVT-NEXT:    fcvtl v2.2d, v1.2s
+; CHECK-GI-CVT-NEXT:    fcvtl2 v1.2d, v1.4s
+; CHECK-GI-CVT-NEXT:    fcvtl v3.2d, v0.2s
+; CHECK-GI-CVT-NEXT:    fcvtl2 v4.2d, v0.4s
+; CHECK-GI-CVT-NEXT:    fcvtzs v0.2d, v2.2d
+; CHECK-GI-CVT-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-GI-CVT-NEXT:    fcvtzs v2.2d, v3.2d
+; CHECK-GI-CVT-NEXT:    fcvtzs v3.2d, v4.2d
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_signed_v8f16_v8i64:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    mov s1, v0.s[1]
+; CHECK-GI-FP16-NEXT:    mov s2, v0.s[2]
+; CHECK-GI-FP16-NEXT:    mov s3, v0.s[3]
+; CHECK-GI-FP16-NEXT:    mov h4, v0.h[1]
+; CHECK-GI-FP16-NEXT:    fcvt d0, h0
+; CHECK-GI-FP16-NEXT:    mov h5, v1.h[1]
+; CHECK-GI-FP16-NEXT:    mov h6, v2.h[1]
+; CHECK-GI-FP16-NEXT:    mov h7, v3.h[1]
+; CHECK-GI-FP16-NEXT:    fcvt d4, h4
+; CHECK-GI-FP16-NEXT:    fcvt d1, h1
+; CHECK-GI-FP16-NEXT:    fcvt d2, h2
+; CHECK-GI-FP16-NEXT:    fcvt d3, h3
+; CHECK-GI-FP16-NEXT:    fcvt d5, h5
+; CHECK-GI-FP16-NEXT:    fcvt d6, h6
+; CHECK-GI-FP16-NEXT:    fcvt d7, h7
+; CHECK-GI-FP16-NEXT:    mov v0.d[1], v4.d[0]
+; CHECK-GI-FP16-NEXT:    mov v1.d[1], v5.d[0]
+; CHECK-GI-FP16-NEXT:    mov v2.d[1], v6.d[0]
+; CHECK-GI-FP16-NEXT:    mov v3.d[1], v7.d[0]
+; CHECK-GI-FP16-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-GI-FP16-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-GI-FP16-NEXT:    fcvtzs v2.2d, v2.2d
+; CHECK-GI-FP16-NEXT:    fcvtzs v3.2d, v3.2d
+; CHECK-GI-FP16-NEXT:    ret
     %x = call <8 x i64> @llvm.fptosi.sat.v8f16.v8i64(<8 x half> %f)
     ret <8 x i64> %x
 }
 
 define <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
-; CHECK-LABEL: test_signed_v8f16_v8i100:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #192
-; CHECK-NEXT:    str d10, [sp, #64] // 8-byte Folded Spill
-; CHECK-NEXT:    stp d9, d8, [sp, #80] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x29, x30, [sp, #96] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x28, x27, [sp, #112] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x26, x25, [sp, #128] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x24, x23, [sp, #144] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x22, x21, [sp, #160] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x20, x19, [sp, #176] // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 192
-; CHECK-NEXT:    .cfi_offset w19, -8
-; CHECK-NEXT:    .cfi_offset w20, -16
-; CHECK-NEXT:    .cfi_offset w21, -24
-; CHECK-NEXT:    .cfi_offset w22, -32
-; CHECK-NEXT:    .cfi_offset w23, -40
-; CHECK-NEXT:    .cfi_offset w24, -48
-; CHECK-NEXT:    .cfi_offset w25, -56
-; CHECK-NEXT:    .cfi_offset w26, -64
-; CHECK-NEXT:    .cfi_offset w27, -72
-; CHECK-NEXT:    .cfi_offset w28, -80
-; CHECK-NEXT:    .cfi_offset w30, -88
-; CHECK-NEXT:    .cfi_offset w29, -96
-; CHECK-NEXT:    .cfi_offset b8, -104
-; CHECK-NEXT:    .cfi_offset b9, -112
-; CHECK-NEXT:    .cfi_offset b10, -128
-; CHECK-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    mov x19, x8
-; CHECK-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT:    mov h0, v0.h[1]
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    movi v10.2s, #241, lsl #24
-; CHECK-NEXT:    mov w8, #1895825407 // =0x70ffffff
-; CHECK-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT:    fmov s9, w8
-; CHECK-NEXT:    mov x22, #-34359738368 // =0xfffffff800000000
-; CHECK-NEXT:    mov x23, #34359738367 // =0x7ffffffff
-; CHECK-NEXT:    mov h0, v0.h[3]
-; CHECK-NEXT:    fcmp s8, s10
-; CHECK-NEXT:    csel x8, x22, x1, lt
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    csinv x9, x9, xzr, le
-; CHECK-NEXT:    csel x8, x23, x8, gt
-; CHECK-NEXT:    fcmp s8, s8
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    csel x8, xzr, x8, vs
-; CHECK-NEXT:    str x8, [sp, #72] // 8-byte Folded Spill
-; CHECK-NEXT:    csel x8, xzr, x9, vs
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    str x8, [sp, #24] // 8-byte Folded Spill
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    fcmp s8, s10
-; CHECK-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, x22, x1, lt
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    csel x9, x23, x9, gt
-; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    fcmp s8, s8
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    csel x10, xzr, x8, vs
-; CHECK-NEXT:    csel x8, xzr, x9, vs
-; CHECK-NEXT:    stp x8, x10, [sp, #8] // 16-byte Folded Spill
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    fcmp s8, s10
-; CHECK-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT:    mov h0, v0.h[2]
-; CHECK-NEXT:    csel x8, x22, x1, lt
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    csinv x9, x9, xzr, le
-; CHECK-NEXT:    csel x8, x23, x8, gt
-; CHECK-NEXT:    fcmp s8, s8
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    csel x26, xzr, x8, vs
-; CHECK-NEXT:    csel x8, xzr, x9, vs
-; CHECK-NEXT:    str x8, [sp, #32] // 8-byte Folded Spill
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    fcmp s8, s10
-; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    mov h0, v0.h[1]
-; CHECK-NEXT:    csel x8, x22, x1, lt
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    csinv x9, x9, xzr, le
-; CHECK-NEXT:    csel x8, x23, x8, gt
-; CHECK-NEXT:    fcmp s8, s8
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    csel x27, xzr, x8, vs
-; CHECK-NEXT:    csel x8, xzr, x9, vs
-; CHECK-NEXT:    str x8, [sp] // 8-byte Folded Spill
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    fcmp s8, s10
-; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    mov h0, v0.h[3]
-; CHECK-NEXT:    csel x8, x22, x1, lt
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    csinv x9, x9, xzr, le
-; CHECK-NEXT:    csel x8, x23, x8, gt
-; CHECK-NEXT:    fcmp s8, s8
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    csel x20, xzr, x8, vs
-; CHECK-NEXT:    csel x21, xzr, x9, vs
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    fcmp s8, s10
-; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, x22, x1, lt
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    csel x9, x23, x9, gt
-; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    fcmp s8, s8
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    csel x28, xzr, x8, vs
-; CHECK-NEXT:    csel x24, xzr, x9, vs
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    fcmp s8, s10
-; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    mov h0, v0.h[2]
-; CHECK-NEXT:    csel x8, x22, x1, lt
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    csinv x9, x9, xzr, le
-; CHECK-NEXT:    csel x8, x23, x8, gt
-; CHECK-NEXT:    fcmp s8, s8
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    csel x25, xzr, x8, vs
-; CHECK-NEXT:    csel x29, xzr, x9, vs
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    ldr x9, [sp] // 8-byte Folded Reload
-; CHECK-NEXT:    extr x8, x24, x28, #28
-; CHECK-NEXT:    fcmp s8, s10
-; CHECK-NEXT:    bfi x25, x21, #36, #28
-; CHECK-NEXT:    lsr x11, x20, #28
-; CHECK-NEXT:    stur x9, [x19, #75]
-; CHECK-NEXT:    extr x9, x20, x21, #28
-; CHECK-NEXT:    stur x8, [x19, #41]
-; CHECK-NEXT:    csel x8, x22, x1, lt
-; CHECK-NEXT:    str x9, [x19, #16]
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    ldr x10, [sp, #32] // 8-byte Folded Reload
-; CHECK-NEXT:    stp x29, x25, [x19]
-; CHECK-NEXT:    stur x10, [x19, #50]
-; CHECK-NEXT:    lsr x10, x24, #28
-; CHECK-NEXT:    csinv x9, x9, xzr, le
-; CHECK-NEXT:    csel x8, x23, x8, gt
-; CHECK-NEXT:    fcmp s8, s8
-; CHECK-NEXT:    strb w10, [x19, #49]
-; CHECK-NEXT:    ldp x14, x12, [sp, #8] // 16-byte Folded Reload
-; CHECK-NEXT:    strb w11, [x19, #24]
-; CHECK-NEXT:    csel x8, xzr, x8, vs
-; CHECK-NEXT:    ldr x13, [sp, #24] // 8-byte Folded Reload
-; CHECK-NEXT:    csel x9, xzr, x9, vs
-; CHECK-NEXT:    bfi x8, x28, #36, #28
-; CHECK-NEXT:    extr x10, x14, x12, #28
-; CHECK-NEXT:    bfi x27, x12, #36, #28
-; CHECK-NEXT:    ldr x12, [sp, #72] // 8-byte Folded Reload
-; CHECK-NEXT:    bfi x26, x13, #36, #28
-; CHECK-NEXT:    stur x9, [x19, #25]
-; CHECK-NEXT:    lsr x9, x14, #28
-; CHECK-NEXT:    extr x11, x12, x13, #28
-; CHECK-NEXT:    stur x8, [x19, #33]
-; CHECK-NEXT:    lsr x8, x12, #28
-; CHECK-NEXT:    stur x10, [x19, #91]
-; CHECK-NEXT:    stur x27, [x19, #83]
-; CHECK-NEXT:    stur x11, [x19, #66]
-; CHECK-NEXT:    stur x26, [x19, #58]
-; CHECK-NEXT:    strb w9, [x19, #99]
-; CHECK-NEXT:    strb w8, [x19, #74]
-; CHECK-NEXT:    ldp x20, x19, [sp, #176] // 16-byte Folded Reload
-; CHECK-NEXT:    ldr d10, [sp, #64] // 8-byte Folded Reload
-; CHECK-NEXT:    ldp x22, x21, [sp, #160] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp x24, x23, [sp, #144] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp x26, x25, [sp, #128] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp x28, x27, [sp, #112] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp x29, x30, [sp, #96] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp d9, d8, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT:    add sp, sp, #192
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v8f16_v8i100:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sub sp, sp, #192
+; CHECK-SD-NEXT:    str d10, [sp, #64] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    stp d9, d8, [sp, #80] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x29, x30, [sp, #96] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x28, x27, [sp, #112] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x26, x25, [sp, #128] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x24, x23, [sp, #144] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x22, x21, [sp, #160] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x20, x19, [sp, #176] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 192
+; CHECK-SD-NEXT:    .cfi_offset w19, -8
+; CHECK-SD-NEXT:    .cfi_offset w20, -16
+; CHECK-SD-NEXT:    .cfi_offset w21, -24
+; CHECK-SD-NEXT:    .cfi_offset w22, -32
+; CHECK-SD-NEXT:    .cfi_offset w23, -40
+; CHECK-SD-NEXT:    .cfi_offset w24, -48
+; CHECK-SD-NEXT:    .cfi_offset w25, -56
+; CHECK-SD-NEXT:    .cfi_offset w26, -64
+; CHECK-SD-NEXT:    .cfi_offset w27, -72
+; CHECK-SD-NEXT:    .cfi_offset w28, -80
+; CHECK-SD-NEXT:    .cfi_offset w30, -88
+; CHECK-SD-NEXT:    .cfi_offset w29, -96
+; CHECK-SD-NEXT:    .cfi_offset b8, -104
+; CHECK-SD-NEXT:    .cfi_offset b9, -112
+; CHECK-SD-NEXT:    .cfi_offset b10, -128
+; CHECK-SD-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    mov x19, x8
+; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    mov h0, v0.h[1]
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixsfti
+; CHECK-SD-NEXT:    movi v10.2s, #241, lsl #24
+; CHECK-SD-NEXT:    mov w8, #1895825407 // =0x70ffffff
+; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    fmov s9, w8
+; CHECK-SD-NEXT:    mov x22, #-34359738368 // =0xfffffff800000000
+; CHECK-SD-NEXT:    mov x23, #34359738367 // =0x7ffffffff
+; CHECK-SD-NEXT:    mov h0, v0.h[3]
+; CHECK-SD-NEXT:    fcmp s8, s10
+; CHECK-SD-NEXT:    csel x8, x22, x1, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    csinv x9, x9, xzr, le
+; CHECK-SD-NEXT:    csel x8, x23, x8, gt
+; CHECK-SD-NEXT:    fcmp s8, s8
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    csel x8, xzr, x8, vs
+; CHECK-SD-NEXT:    str x8, [sp, #72] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    csel x8, xzr, x9, vs
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    str x8, [sp, #24] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    bl __fixsfti
+; CHECK-SD-NEXT:    fcmp s8, s10
+; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, x22, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    csel x9, x23, x9, gt
+; CHECK-SD-NEXT:    csinv x8, x8, xzr, le
+; CHECK-SD-NEXT:    fcmp s8, s8
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    csel x10, xzr, x8, vs
+; CHECK-SD-NEXT:    csel x8, xzr, x9, vs
+; CHECK-SD-NEXT:    stp x8, x10, [sp, #8] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixsfti
+; CHECK-SD-NEXT:    fcmp s8, s10
+; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov h0, v0.h[2]
+; CHECK-SD-NEXT:    csel x8, x22, x1, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    csinv x9, x9, xzr, le
+; CHECK-SD-NEXT:    csel x8, x23, x8, gt
+; CHECK-SD-NEXT:    fcmp s8, s8
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    csel x26, xzr, x8, vs
+; CHECK-SD-NEXT:    csel x8, xzr, x9, vs
+; CHECK-SD-NEXT:    str x8, [sp, #32] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixsfti
+; CHECK-SD-NEXT:    fcmp s8, s10
+; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov h0, v0.h[1]
+; CHECK-SD-NEXT:    csel x8, x22, x1, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    csinv x9, x9, xzr, le
+; CHECK-SD-NEXT:    csel x8, x23, x8, gt
+; CHECK-SD-NEXT:    fcmp s8, s8
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    csel x27, xzr, x8, vs
+; CHECK-SD-NEXT:    csel x8, xzr, x9, vs
+; CHECK-SD-NEXT:    str x8, [sp] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixsfti
+; CHECK-SD-NEXT:    fcmp s8, s10
+; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov h0, v0.h[3]
+; CHECK-SD-NEXT:    csel x8, x22, x1, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    csinv x9, x9, xzr, le
+; CHECK-SD-NEXT:    csel x8, x23, x8, gt
+; CHECK-SD-NEXT:    fcmp s8, s8
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    csel x20, xzr, x8, vs
+; CHECK-SD-NEXT:    csel x21, xzr, x9, vs
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixsfti
+; CHECK-SD-NEXT:    fcmp s8, s10
+; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, x22, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    csel x9, x23, x9, gt
+; CHECK-SD-NEXT:    csinv x8, x8, xzr, le
+; CHECK-SD-NEXT:    fcmp s8, s8
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    csel x28, xzr, x8, vs
+; CHECK-SD-NEXT:    csel x24, xzr, x9, vs
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixsfti
+; CHECK-SD-NEXT:    fcmp s8, s10
+; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov h0, v0.h[2]
+; CHECK-SD-NEXT:    csel x8, x22, x1, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    csinv x9, x9, xzr, le
+; CHECK-SD-NEXT:    csel x8, x23, x8, gt
+; CHECK-SD-NEXT:    fcmp s8, s8
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    csel x25, xzr, x8, vs
+; CHECK-SD-NEXT:    csel x29, xzr, x9, vs
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixsfti
+; CHECK-SD-NEXT:    ldr x9, [sp] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    extr x8, x24, x28, #28
+; CHECK-SD-NEXT:    fcmp s8, s10
+; CHECK-SD-NEXT:    bfi x25, x21, #36, #28
+; CHECK-SD-NEXT:    lsr x11, x20, #28
+; CHECK-SD-NEXT:    stur x9, [x19, #75]
+; CHECK-SD-NEXT:    extr x9, x20, x21, #28
+; CHECK-SD-NEXT:    stur x8, [x19, #41]
+; CHECK-SD-NEXT:    csel x8, x22, x1, lt
+; CHECK-SD-NEXT:    str x9, [x19, #16]
+; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    ldr x10, [sp, #32] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    stp x29, x25, [x19]
+; CHECK-SD-NEXT:    stur x10, [x19, #50]
+; CHECK-SD-NEXT:    lsr x10, x24, #28
+; CHECK-SD-NEXT:    csinv x9, x9, xzr, le
+; CHECK-SD-NEXT:    csel x8, x23, x8, gt
+; CHECK-SD-NEXT:    fcmp s8, s8
+; CHECK-SD-NEXT:    strb w10, [x19, #49]
+; CHECK-SD-NEXT:    ldp x14, x12, [sp, #8] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    strb w11, [x19, #24]
+; CHECK-SD-NEXT:    csel x8, xzr, x8, vs
+; CHECK-SD-NEXT:    ldr x13, [sp, #24] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    csel x9, xzr, x9, vs
+; CHECK-SD-NEXT:    bfi x8, x28, #36, #28
+; CHECK-SD-NEXT:    extr x10, x14, x12, #28
+; CHECK-SD-NEXT:    bfi x27, x12, #36, #28
+; CHECK-SD-NEXT:    ldr x12, [sp, #72] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    bfi x26, x13, #36, #28
+; CHECK-SD-NEXT:    stur x9, [x19, #25]
+; CHECK-SD-NEXT:    lsr x9, x14, #28
+; CHECK-SD-NEXT:    extr x11, x12, x13, #28
+; CHECK-SD-NEXT:    stur x8, [x19, #33]
+; CHECK-SD-NEXT:    lsr x8, x12, #28
+; CHECK-SD-NEXT:    stur x10, [x19, #91]
+; CHECK-SD-NEXT:    stur x27, [x19, #83]
+; CHECK-SD-NEXT:    stur x11, [x19, #66]
+; CHECK-SD-NEXT:    stur x26, [x19, #58]
+; CHECK-SD-NEXT:    strb w9, [x19, #99]
+; CHECK-SD-NEXT:    strb w8, [x19, #74]
+; CHECK-SD-NEXT:    ldp x20, x19, [sp, #176] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldr d10, [sp, #64] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x22, x21, [sp, #160] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x24, x23, [sp, #144] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x26, x25, [sp, #128] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x28, x27, [sp, #112] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x29, x30, [sp, #96] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp d9, d8, [sp, #80] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    add sp, sp, #192
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_v8f16_v8i100:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-CVT-NEXT:    mov h2, v0.h[2]
+; CHECK-GI-CVT-NEXT:    mov x11, x8
+; CHECK-GI-CVT-NEXT:    fcvt s3, h0
+; CHECK-GI-CVT-NEXT:    mov h4, v0.h[3]
+; CHECK-GI-CVT-NEXT:    str wzr, [x8, #8]
+; CHECK-GI-CVT-NEXT:    strb wzr, [x8, #12]
+; CHECK-GI-CVT-NEXT:    fcvt s1, h1
+; CHECK-GI-CVT-NEXT:    fcvt s2, h2
+; CHECK-GI-CVT-NEXT:    fcvtzs x9, s3
+; CHECK-GI-CVT-NEXT:    fcvt s3, h4
+; CHECK-GI-CVT-NEXT:    fcvtzs x10, s1
+; CHECK-GI-CVT-NEXT:    mov h1, v0.h[4]
+; CHECK-GI-CVT-NEXT:    fcvtzs x12, s2
+; CHECK-GI-CVT-NEXT:    mov h2, v0.h[5]
+; CHECK-GI-CVT-NEXT:    str x9, [x8]
+; CHECK-GI-CVT-NEXT:    mov x9, x8
+; CHECK-GI-CVT-NEXT:    fcvt s1, h1
+; CHECK-GI-CVT-NEXT:    str x10, [x11, #12]!
+; CHECK-GI-CVT-NEXT:    fcvtzs x10, s3
+; CHECK-GI-CVT-NEXT:    mov h3, v0.h[6]
+; CHECK-GI-CVT-NEXT:    fcvt s2, h2
+; CHECK-GI-CVT-NEXT:    mov h0, v0.h[7]
+; CHECK-GI-CVT-NEXT:    str wzr, [x11, #8]
+; CHECK-GI-CVT-NEXT:    strb wzr, [x11, #12]
+; CHECK-GI-CVT-NEXT:    mov x11, x8
+; CHECK-GI-CVT-NEXT:    str x12, [x9, #25]!
+; CHECK-GI-CVT-NEXT:    fcvtzs x12, s1
+; CHECK-GI-CVT-NEXT:    str wzr, [x9, #8]
+; CHECK-GI-CVT-NEXT:    fcvt s1, h3
+; CHECK-GI-CVT-NEXT:    strb wzr, [x9, #12]
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    mov x9, x8
+; CHECK-GI-CVT-NEXT:    str x10, [x11, #37]!
+; CHECK-GI-CVT-NEXT:    fcvtzs x10, s2
+; CHECK-GI-CVT-NEXT:    str wzr, [x11, #8]
+; CHECK-GI-CVT-NEXT:    strb wzr, [x11, #12]
+; CHECK-GI-CVT-NEXT:    fcvtzs x11, s1
+; CHECK-GI-CVT-NEXT:    str x12, [x9, #50]!
+; CHECK-GI-CVT-NEXT:    str wzr, [x9, #8]
+; CHECK-GI-CVT-NEXT:    strb wzr, [x9, #12]
+; CHECK-GI-CVT-NEXT:    mov x9, x8
+; CHECK-GI-CVT-NEXT:    str x10, [x9, #62]!
+; CHECK-GI-CVT-NEXT:    fcvtzs x10, s0
+; CHECK-GI-CVT-NEXT:    str wzr, [x9, #8]
+; CHECK-GI-CVT-NEXT:    strb wzr, [x9, #12]
+; CHECK-GI-CVT-NEXT:    mov x9, x8
+; CHECK-GI-CVT-NEXT:    str x11, [x9, #75]!
+; CHECK-GI-CVT-NEXT:    str wzr, [x9, #8]
+; CHECK-GI-CVT-NEXT:    strb wzr, [x9, #12]
+; CHECK-GI-CVT-NEXT:    str x10, [x8, #87]!
+; CHECK-GI-CVT-NEXT:    str wzr, [x8, #8]
+; CHECK-GI-CVT-NEXT:    strb wzr, [x8, #12]
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_signed_v8f16_v8i100:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
+; CHECK-GI-FP16-NEXT:    mov x11, x8
+; CHECK-GI-FP16-NEXT:    fcvtzs x9, h0
+; CHECK-GI-FP16-NEXT:    str wzr, [x8, #8]
+; CHECK-GI-FP16-NEXT:    strb wzr, [x8, #12]
+; CHECK-GI-FP16-NEXT:    fcvtzs x10, h1
+; CHECK-GI-FP16-NEXT:    mov h1, v0.h[3]
+; CHECK-GI-FP16-NEXT:    fcvtzs x12, h2
+; CHECK-GI-FP16-NEXT:    mov h2, v0.h[4]
+; CHECK-GI-FP16-NEXT:    str x9, [x8]
+; CHECK-GI-FP16-NEXT:    mov x9, x8
+; CHECK-GI-FP16-NEXT:    str x10, [x11, #12]!
+; CHECK-GI-FP16-NEXT:    fcvtzs x10, h1
+; CHECK-GI-FP16-NEXT:    mov h1, v0.h[5]
+; CHECK-GI-FP16-NEXT:    str wzr, [x11, #8]
+; CHECK-GI-FP16-NEXT:    strb wzr, [x11, #12]
+; CHECK-GI-FP16-NEXT:    mov x11, x8
+; CHECK-GI-FP16-NEXT:    str x12, [x9, #25]!
+; CHECK-GI-FP16-NEXT:    fcvtzs x12, h2
+; CHECK-GI-FP16-NEXT:    str wzr, [x9, #8]
+; CHECK-GI-FP16-NEXT:    mov h2, v0.h[6]
+; CHECK-GI-FP16-NEXT:    mov h0, v0.h[7]
+; CHECK-GI-FP16-NEXT:    strb wzr, [x9, #12]
+; CHECK-GI-FP16-NEXT:    fcvtzs x9, h1
+; CHECK-GI-FP16-NEXT:    str x10, [x11, #37]!
+; CHECK-GI-FP16-NEXT:    mov x10, x8
+; CHECK-GI-FP16-NEXT:    str wzr, [x11, #8]
+; CHECK-GI-FP16-NEXT:    strb wzr, [x11, #12]
+; CHECK-GI-FP16-NEXT:    fcvtzs x11, h2
+; CHECK-GI-FP16-NEXT:    str x12, [x10, #50]!
+; CHECK-GI-FP16-NEXT:    str wzr, [x10, #8]
+; CHECK-GI-FP16-NEXT:    strb wzr, [x10, #12]
+; CHECK-GI-FP16-NEXT:    mov x10, x8
+; CHECK-GI-FP16-NEXT:    str x9, [x10, #62]!
+; CHECK-GI-FP16-NEXT:    fcvtzs x9, h0
+; CHECK-GI-FP16-NEXT:    str wzr, [x10, #8]
+; CHECK-GI-FP16-NEXT:    strb wzr, [x10, #12]
+; CHECK-GI-FP16-NEXT:    mov x10, x8
+; CHECK-GI-FP16-NEXT:    str x11, [x10, #75]!
+; CHECK-GI-FP16-NEXT:    str wzr, [x10, #8]
+; CHECK-GI-FP16-NEXT:    strb wzr, [x10, #12]
+; CHECK-GI-FP16-NEXT:    str x9, [x8, #87]!
+; CHECK-GI-FP16-NEXT:    str wzr, [x8, #8]
+; CHECK-GI-FP16-NEXT:    strb wzr, [x8, #12]
+; CHECK-GI-FP16-NEXT:    ret
     %x = call <8 x i100> @llvm.fptosi.sat.v8f16.v8i100(<8 x half> %f)
     ret <8 x i100> %x
 }
@@ -2730,72 +4695,140 @@ declare <16 x i8> @llvm.fptosi.sat.v16f64.v16i8(<16 x double> %f)
 declare <16 x i16> @llvm.fptosi.sat.v16f64.v16i16(<16 x double> %f)
 
 define <8 x i8> @test_signed_v8f32_v8i8(<8 x float> %f) {
-; CHECK-LABEL: test_signed_v8f32_v8i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v2.4s, #127
-; CHECK-NEXT:    fcvtzs v1.4s, v1.4s
-; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-NEXT:    smin v1.4s, v1.4s, v2.4s
-; CHECK-NEXT:    smin v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    mvni v2.4s, #127
-; CHECK-NEXT:    smax v1.4s, v1.4s, v2.4s
-; CHECK-NEXT:    smax v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    xtn v0.8b, v0.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v8f32_v8i8:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v2.4s, #127
+; CHECK-SD-NEXT:    fcvtzs v1.4s, v1.4s
+; CHECK-SD-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-SD-NEXT:    smin v1.4s, v1.4s, v2.4s
+; CHECK-SD-NEXT:    smin v0.4s, v0.4s, v2.4s
+; CHECK-SD-NEXT:    mvni v2.4s, #127
+; CHECK-SD-NEXT:    smax v1.4s, v1.4s, v2.4s
+; CHECK-SD-NEXT:    smax v0.4s, v0.4s, v2.4s
+; CHECK-SD-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT:    xtn v0.8b, v0.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_v8f32_v8i8:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v2.4s, #127
+; CHECK-GI-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-GI-NEXT:    fcvtzs v1.4s, v1.4s
+; CHECK-GI-NEXT:    smin v0.4s, v0.4s, v2.4s
+; CHECK-GI-NEXT:    smin v1.4s, v1.4s, v2.4s
+; CHECK-GI-NEXT:    mvni v2.4s, #127
+; CHECK-GI-NEXT:    smax v0.4s, v0.4s, v2.4s
+; CHECK-GI-NEXT:    smax v1.4s, v1.4s, v2.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    xtn v0.8b, v0.8h
+; CHECK-GI-NEXT:    ret
     %x = call <8 x i8> @llvm.fptosi.sat.v8f32.v8i8(<8 x float> %f)
     ret <8 x i8> %x
 }
 
 define <16 x i8> @test_signed_v16f32_v16i8(<16 x float> %f) {
-; CHECK-LABEL: test_signed_v16f32_v16i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v4.4s, #127
-; CHECK-NEXT:    fcvtzs v3.4s, v3.4s
-; CHECK-NEXT:    fcvtzs v2.4s, v2.4s
-; CHECK-NEXT:    fcvtzs v1.4s, v1.4s
-; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-NEXT:    mvni v5.4s, #127
-; CHECK-NEXT:    smin v3.4s, v3.4s, v4.4s
-; CHECK-NEXT:    smin v2.4s, v2.4s, v4.4s
-; CHECK-NEXT:    smin v1.4s, v1.4s, v4.4s
-; CHECK-NEXT:    smin v0.4s, v0.4s, v4.4s
-; CHECK-NEXT:    smax v3.4s, v3.4s, v5.4s
-; CHECK-NEXT:    smax v2.4s, v2.4s, v5.4s
-; CHECK-NEXT:    smax v1.4s, v1.4s, v5.4s
-; CHECK-NEXT:    smax v0.4s, v0.4s, v5.4s
-; CHECK-NEXT:    uzp1 v2.8h, v2.8h, v3.8h
-; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    uzp1 v0.16b, v0.16b, v2.16b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v16f32_v16i8:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v4.4s, #127
+; CHECK-SD-NEXT:    fcvtzs v3.4s, v3.4s
+; CHECK-SD-NEXT:    fcvtzs v2.4s, v2.4s
+; CHECK-SD-NEXT:    fcvtzs v1.4s, v1.4s
+; CHECK-SD-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-SD-NEXT:    mvni v5.4s, #127
+; CHECK-SD-NEXT:    smin v3.4s, v3.4s, v4.4s
+; CHECK-SD-NEXT:    smin v2.4s, v2.4s, v4.4s
+; CHECK-SD-NEXT:    smin v1.4s, v1.4s, v4.4s
+; CHECK-SD-NEXT:    smin v0.4s, v0.4s, v4.4s
+; CHECK-SD-NEXT:    smax v3.4s, v3.4s, v5.4s
+; CHECK-SD-NEXT:    smax v2.4s, v2.4s, v5.4s
+; CHECK-SD-NEXT:    smax v1.4s, v1.4s, v5.4s
+; CHECK-SD-NEXT:    smax v0.4s, v0.4s, v5.4s
+; CHECK-SD-NEXT:    uzp1 v2.8h, v2.8h, v3.8h
+; CHECK-SD-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT:    uzp1 v0.16b, v0.16b, v2.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_v16f32_v16i8:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v4.4s, #127
+; CHECK-GI-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-GI-NEXT:    fcvtzs v1.4s, v1.4s
+; CHECK-GI-NEXT:    fcvtzs v2.4s, v2.4s
+; CHECK-GI-NEXT:    fcvtzs v3.4s, v3.4s
+; CHECK-GI-NEXT:    mvni v5.4s, #127
+; CHECK-GI-NEXT:    smin v0.4s, v0.4s, v4.4s
+; CHECK-GI-NEXT:    smin v1.4s, v1.4s, v4.4s
+; CHECK-GI-NEXT:    smin v2.4s, v2.4s, v4.4s
+; CHECK-GI-NEXT:    smin v3.4s, v3.4s, v4.4s
+; CHECK-GI-NEXT:    smax v0.4s, v0.4s, v5.4s
+; CHECK-GI-NEXT:    smax v1.4s, v1.4s, v5.4s
+; CHECK-GI-NEXT:    smax v2.4s, v2.4s, v5.4s
+; CHECK-GI-NEXT:    smax v3.4s, v3.4s, v5.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    uzp1 v1.8h, v2.8h, v3.8h
+; CHECK-GI-NEXT:    uzp1 v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT:    ret
     %x = call <16 x i8> @llvm.fptosi.sat.v16f32.v16i8(<16 x float> %f)
     ret <16 x i8> %x
 }
 
 define <8 x i16> @test_signed_v8f32_v8i16(<8 x float> %f) {
-; CHECK-LABEL: test_signed_v8f32_v8i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-NEXT:    fcvtzs v1.4s, v1.4s
-; CHECK-NEXT:    sqxtn v0.4h, v0.4s
-; CHECK-NEXT:    sqxtn2 v0.8h, v1.4s
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v8f32_v8i16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-SD-NEXT:    fcvtzs v1.4s, v1.4s
+; CHECK-SD-NEXT:    sqxtn v0.4h, v0.4s
+; CHECK-SD-NEXT:    sqxtn2 v0.8h, v1.4s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_v8f32_v8i16:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v2.4s, #127, msl #8
+; CHECK-GI-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-GI-NEXT:    fcvtzs v1.4s, v1.4s
+; CHECK-GI-NEXT:    smin v0.4s, v0.4s, v2.4s
+; CHECK-GI-NEXT:    smin v1.4s, v1.4s, v2.4s
+; CHECK-GI-NEXT:    mvni v2.4s, #127, msl #8
+; CHECK-GI-NEXT:    smax v0.4s, v0.4s, v2.4s
+; CHECK-GI-NEXT:    smax v1.4s, v1.4s, v2.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    ret
     %x = call <8 x i16> @llvm.fptosi.sat.v8f32.v8i16(<8 x float> %f)
     ret <8 x i16> %x
 }
 
 define <16 x i16> @test_signed_v16f32_v16i16(<16 x float> %f) {
-; CHECK-LABEL: test_signed_v16f32_v16i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-NEXT:    fcvtzs v2.4s, v2.4s
-; CHECK-NEXT:    fcvtzs v4.4s, v1.4s
-; CHECK-NEXT:    sqxtn v0.4h, v0.4s
-; CHECK-NEXT:    sqxtn v1.4h, v2.4s
-; CHECK-NEXT:    fcvtzs v2.4s, v3.4s
-; CHECK-NEXT:    sqxtn2 v0.8h, v4.4s
-; CHECK-NEXT:    sqxtn2 v1.8h, v2.4s
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v16f32_v16i16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-SD-NEXT:    fcvtzs v2.4s, v2.4s
+; CHECK-SD-NEXT:    fcvtzs v4.4s, v1.4s
+; CHECK-SD-NEXT:    sqxtn v0.4h, v0.4s
+; CHECK-SD-NEXT:    sqxtn v1.4h, v2.4s
+; CHECK-SD-NEXT:    fcvtzs v2.4s, v3.4s
+; CHECK-SD-NEXT:    sqxtn2 v0.8h, v4.4s
+; CHECK-SD-NEXT:    sqxtn2 v1.8h, v2.4s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_v16f32_v16i16:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v4.4s, #127, msl #8
+; CHECK-GI-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-GI-NEXT:    fcvtzs v1.4s, v1.4s
+; CHECK-GI-NEXT:    fcvtzs v2.4s, v2.4s
+; CHECK-GI-NEXT:    fcvtzs v3.4s, v3.4s
+; CHECK-GI-NEXT:    mvni v5.4s, #127, msl #8
+; CHECK-GI-NEXT:    smin v0.4s, v0.4s, v4.4s
+; CHECK-GI-NEXT:    smin v1.4s, v1.4s, v4.4s
+; CHECK-GI-NEXT:    smin v2.4s, v2.4s, v4.4s
+; CHECK-GI-NEXT:    smin v3.4s, v3.4s, v4.4s
+; CHECK-GI-NEXT:    smax v0.4s, v0.4s, v5.4s
+; CHECK-GI-NEXT:    smax v1.4s, v1.4s, v5.4s
+; CHECK-GI-NEXT:    smax v2.4s, v2.4s, v5.4s
+; CHECK-GI-NEXT:    smax v3.4s, v3.4s, v5.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    uzp1 v1.8h, v2.8h, v3.8h
+; CHECK-GI-NEXT:    ret
     %x = call <16 x i16> @llvm.fptosi.sat.v16f32.v16i16(<16 x float> %f)
     ret <16 x i16> %x
 }
@@ -2803,449 +4836,689 @@ define <16 x i16> @test_signed_v16f32_v16i16(<16 x float> %f) {
 
 
 define <16 x i8> @test_signed_v16f16_v16i8(<16 x half> %f) {
-; CHECK-CVT-LABEL: test_signed_v16f16_v16i8:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvtl2 v3.4s, v1.8h
-; CHECK-CVT-NEXT:    fcvtl v1.4s, v1.4h
-; CHECK-CVT-NEXT:    fcvtl2 v4.4s, v0.8h
-; CHECK-CVT-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT:    movi v2.4s, #127
-; CHECK-CVT-NEXT:    fcvtzs v3.4s, v3.4s
-; CHECK-CVT-NEXT:    fcvtzs v1.4s, v1.4s
-; CHECK-CVT-NEXT:    fcvtzs v4.4s, v4.4s
-; CHECK-CVT-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-CVT-NEXT:    smin v3.4s, v3.4s, v2.4s
-; CHECK-CVT-NEXT:    smin v1.4s, v1.4s, v2.4s
-; CHECK-CVT-NEXT:    smin v4.4s, v4.4s, v2.4s
-; CHECK-CVT-NEXT:    smin v0.4s, v0.4s, v2.4s
-; CHECK-CVT-NEXT:    mvni v2.4s, #127
-; CHECK-CVT-NEXT:    smax v3.4s, v3.4s, v2.4s
-; CHECK-CVT-NEXT:    smax v1.4s, v1.4s, v2.4s
-; CHECK-CVT-NEXT:    smax v4.4s, v4.4s, v2.4s
-; CHECK-CVT-NEXT:    smax v0.4s, v0.4s, v2.4s
-; CHECK-CVT-NEXT:    uzp1 v1.8h, v1.8h, v3.8h
-; CHECK-CVT-NEXT:    uzp1 v0.8h, v0.8h, v4.8h
-; CHECK-CVT-NEXT:    uzp1 v0.16b, v0.16b, v1.16b
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_signed_v16f16_v16i8:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzs v0.8h, v0.8h
-; CHECK-FP16-NEXT:    fcvtzs v1.8h, v1.8h
-; CHECK-FP16-NEXT:    sqxtn v0.8b, v0.8h
-; CHECK-FP16-NEXT:    sqxtn2 v0.16b, v1.8h
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_signed_v16f16_v16i8:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvtl2 v3.4s, v1.8h
+; CHECK-SD-CVT-NEXT:    fcvtl v1.4s, v1.4h
+; CHECK-SD-CVT-NEXT:    fcvtl2 v4.4s, v0.8h
+; CHECK-SD-CVT-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-SD-CVT-NEXT:    movi v2.4s, #127
+; CHECK-SD-CVT-NEXT:    fcvtzs v3.4s, v3.4s
+; CHECK-SD-CVT-NEXT:    fcvtzs v1.4s, v1.4s
+; CHECK-SD-CVT-NEXT:    fcvtzs v4.4s, v4.4s
+; CHECK-SD-CVT-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-SD-CVT-NEXT:    smin v3.4s, v3.4s, v2.4s
+; CHECK-SD-CVT-NEXT:    smin v1.4s, v1.4s, v2.4s
+; CHECK-SD-CVT-NEXT:    smin v4.4s, v4.4s, v2.4s
+; CHECK-SD-CVT-NEXT:    smin v0.4s, v0.4s, v2.4s
+; CHECK-SD-CVT-NEXT:    mvni v2.4s, #127
+; CHECK-SD-CVT-NEXT:    smax v3.4s, v3.4s, v2.4s
+; CHECK-SD-CVT-NEXT:    smax v1.4s, v1.4s, v2.4s
+; CHECK-SD-CVT-NEXT:    smax v4.4s, v4.4s, v2.4s
+; CHECK-SD-CVT-NEXT:    smax v0.4s, v0.4s, v2.4s
+; CHECK-SD-CVT-NEXT:    uzp1 v1.8h, v1.8h, v3.8h
+; CHECK-SD-CVT-NEXT:    uzp1 v0.8h, v0.8h, v4.8h
+; CHECK-SD-CVT-NEXT:    uzp1 v0.16b, v0.16b, v1.16b
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_signed_v16f16_v16i8:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzs v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT:    fcvtzs v1.8h, v1.8h
+; CHECK-SD-FP16-NEXT:    sqxtn v0.8b, v0.8h
+; CHECK-SD-FP16-NEXT:    sqxtn2 v0.16b, v1.8h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_v16f16_v16i8:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvtl v3.4s, v0.4h
+; CHECK-GI-CVT-NEXT:    fcvtl2 v0.4s, v0.8h
+; CHECK-GI-CVT-NEXT:    fcvtl v4.4s, v1.4h
+; CHECK-GI-CVT-NEXT:    fcvtl2 v1.4s, v1.8h
+; CHECK-GI-CVT-NEXT:    movi v2.4s, #127
+; CHECK-GI-CVT-NEXT:    mvni v5.4s, #127
+; CHECK-GI-CVT-NEXT:    fcvtzs v3.4s, v3.4s
+; CHECK-GI-CVT-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-GI-CVT-NEXT:    fcvtzs v4.4s, v4.4s
+; CHECK-GI-CVT-NEXT:    fcvtzs v1.4s, v1.4s
+; CHECK-GI-CVT-NEXT:    smin v3.4s, v3.4s, v2.4s
+; CHECK-GI-CVT-NEXT:    smin v0.4s, v0.4s, v2.4s
+; CHECK-GI-CVT-NEXT:    smin v4.4s, v4.4s, v2.4s
+; CHECK-GI-CVT-NEXT:    smin v1.4s, v1.4s, v2.4s
+; CHECK-GI-CVT-NEXT:    smax v2.4s, v3.4s, v5.4s
+; CHECK-GI-CVT-NEXT:    smax v0.4s, v0.4s, v5.4s
+; CHECK-GI-CVT-NEXT:    smax v3.4s, v4.4s, v5.4s
+; CHECK-GI-CVT-NEXT:    smax v1.4s, v1.4s, v5.4s
+; CHECK-GI-CVT-NEXT:    uzp1 v0.8h, v2.8h, v0.8h
+; CHECK-GI-CVT-NEXT:    uzp1 v1.8h, v3.8h, v1.8h
+; CHECK-GI-CVT-NEXT:    uzp1 v0.16b, v0.16b, v1.16b
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_signed_v16f16_v16i8:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    movi v2.8h, #127
+; CHECK-GI-FP16-NEXT:    fcvtzs v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT:    fcvtzs v1.8h, v1.8h
+; CHECK-GI-FP16-NEXT:    mvni v3.8h, #127
+; CHECK-GI-FP16-NEXT:    smin v0.8h, v0.8h, v2.8h
+; CHECK-GI-FP16-NEXT:    smin v1.8h, v1.8h, v2.8h
+; CHECK-GI-FP16-NEXT:    smax v0.8h, v0.8h, v3.8h
+; CHECK-GI-FP16-NEXT:    smax v1.8h, v1.8h, v3.8h
+; CHECK-GI-FP16-NEXT:    uzp1 v0.16b, v0.16b, v1.16b
+; CHECK-GI-FP16-NEXT:    ret
     %x = call <16 x i8> @llvm.fptosi.sat.v16f16.v16i8(<16 x half> %f)
     ret <16 x i8> %x
 }
 
 define <16 x i16> @test_signed_v16f16_v16i16(<16 x half> %f) {
-; CHECK-CVT-LABEL: test_signed_v16f16_v16i16:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvtl v2.4s, v0.4h
-; CHECK-CVT-NEXT:    fcvtl v3.4s, v1.4h
-; CHECK-CVT-NEXT:    fcvtl2 v4.4s, v0.8h
-; CHECK-CVT-NEXT:    fcvtl2 v5.4s, v1.8h
-; CHECK-CVT-NEXT:    fcvtzs v2.4s, v2.4s
-; CHECK-CVT-NEXT:    fcvtzs v1.4s, v3.4s
-; CHECK-CVT-NEXT:    fcvtzs v3.4s, v5.4s
-; CHECK-CVT-NEXT:    sqxtn v0.4h, v2.4s
-; CHECK-CVT-NEXT:    fcvtzs v2.4s, v4.4s
-; CHECK-CVT-NEXT:    sqxtn v1.4h, v1.4s
-; CHECK-CVT-NEXT:    sqxtn2 v0.8h, v2.4s
-; CHECK-CVT-NEXT:    sqxtn2 v1.8h, v3.4s
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_signed_v16f16_v16i16:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzs v0.8h, v0.8h
-; CHECK-FP16-NEXT:    fcvtzs v1.8h, v1.8h
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_signed_v16f16_v16i16:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvtl v2.4s, v0.4h
+; CHECK-SD-CVT-NEXT:    fcvtl v3.4s, v1.4h
+; CHECK-SD-CVT-NEXT:    fcvtl2 v4.4s, v0.8h
+; CHECK-SD-CVT-NEXT:    fcvtl2 v5.4s, v1.8h
+; CHECK-SD-CVT-NEXT:    fcvtzs v2.4s, v2.4s
+; CHECK-SD-CVT-NEXT:    fcvtzs v1.4s, v3.4s
+; CHECK-SD-CVT-NEXT:    fcvtzs v3.4s, v5.4s
+; CHECK-SD-CVT-NEXT:    sqxtn v0.4h, v2.4s
+; CHECK-SD-CVT-NEXT:    fcvtzs v2.4s, v4.4s
+; CHECK-SD-CVT-NEXT:    sqxtn v1.4h, v1.4s
+; CHECK-SD-CVT-NEXT:    sqxtn2 v0.8h, v2.4s
+; CHECK-SD-CVT-NEXT:    sqxtn2 v1.8h, v3.4s
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_signed_v16f16_v16i16:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzs v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT:    fcvtzs v1.8h, v1.8h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_v16f16_v16i16:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvtl v3.4s, v0.4h
+; CHECK-GI-CVT-NEXT:    fcvtl2 v0.4s, v0.8h
+; CHECK-GI-CVT-NEXT:    fcvtl v4.4s, v1.4h
+; CHECK-GI-CVT-NEXT:    fcvtl2 v1.4s, v1.8h
+; CHECK-GI-CVT-NEXT:    movi v2.4s, #127, msl #8
+; CHECK-GI-CVT-NEXT:    mvni v5.4s, #127, msl #8
+; CHECK-GI-CVT-NEXT:    fcvtzs v3.4s, v3.4s
+; CHECK-GI-CVT-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-GI-CVT-NEXT:    fcvtzs v4.4s, v4.4s
+; CHECK-GI-CVT-NEXT:    fcvtzs v1.4s, v1.4s
+; CHECK-GI-CVT-NEXT:    smin v3.4s, v3.4s, v2.4s
+; CHECK-GI-CVT-NEXT:    smin v0.4s, v0.4s, v2.4s
+; CHECK-GI-CVT-NEXT:    smin v4.4s, v4.4s, v2.4s
+; CHECK-GI-CVT-NEXT:    smin v1.4s, v1.4s, v2.4s
+; CHECK-GI-CVT-NEXT:    smax v2.4s, v3.4s, v5.4s
+; CHECK-GI-CVT-NEXT:    smax v0.4s, v0.4s, v5.4s
+; CHECK-GI-CVT-NEXT:    smax v3.4s, v4.4s, v5.4s
+; CHECK-GI-CVT-NEXT:    smax v1.4s, v1.4s, v5.4s
+; CHECK-GI-CVT-NEXT:    uzp1 v0.8h, v2.8h, v0.8h
+; CHECK-GI-CVT-NEXT:    uzp1 v1.8h, v3.8h, v1.8h
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_signed_v16f16_v16i16:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzs v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT:    fcvtzs v1.8h, v1.8h
+; CHECK-GI-FP16-NEXT:    ret
     %x = call <16 x i16> @llvm.fptosi.sat.v16f16.v16i16(<16 x half> %f)
     ret <16 x i16> %x
 }
 
 define <8 x i8> @test_signed_v8f64_v8i8(<8 x double> %f) {
-; CHECK-LABEL: test_signed_v8f64_v8i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov d4, v3.d[1]
-; CHECK-NEXT:    fcvtzs w11, d3
-; CHECK-NEXT:    mov w9, #127 // =0x7f
-; CHECK-NEXT:    mov d3, v1.d[1]
-; CHECK-NEXT:    fcvtzs w13, d2
-; CHECK-NEXT:    fcvtzs w15, d1
-; CHECK-NEXT:    fcvtzs w17, d0
-; CHECK-NEXT:    fcvtzs w8, d4
-; CHECK-NEXT:    mov d4, v2.d[1]
-; CHECK-NEXT:    mov d2, v0.d[1]
-; CHECK-NEXT:    fcvtzs w14, d3
-; CHECK-NEXT:    cmp w8, #127
-; CHECK-NEXT:    fcvtzs w12, d4
-; CHECK-NEXT:    fcvtzs w16, d2
-; CHECK-NEXT:    csel w10, w8, w9, lt
-; CHECK-NEXT:    mov w8, #-128 // =0xffffff80
-; CHECK-NEXT:    cmn w10, #128
-; CHECK-NEXT:    csel w10, w10, w8, gt
-; CHECK-NEXT:    cmp w11, #127
-; CHECK-NEXT:    csel w11, w11, w9, lt
-; CHECK-NEXT:    cmn w11, #128
-; CHECK-NEXT:    csel w11, w11, w8, gt
-; CHECK-NEXT:    cmp w12, #127
-; CHECK-NEXT:    csel w12, w12, w9, lt
-; CHECK-NEXT:    fmov s3, w11
-; CHECK-NEXT:    cmn w12, #128
-; CHECK-NEXT:    csel w12, w12, w8, gt
-; CHECK-NEXT:    cmp w13, #127
-; CHECK-NEXT:    csel w13, w13, w9, lt
-; CHECK-NEXT:    mov v3.s[1], w10
-; CHECK-NEXT:    cmn w13, #128
-; CHECK-NEXT:    csel w13, w13, w8, gt
-; CHECK-NEXT:    cmp w14, #127
-; CHECK-NEXT:    csel w14, w14, w9, lt
-; CHECK-NEXT:    fmov s2, w13
-; CHECK-NEXT:    cmn w14, #128
-; CHECK-NEXT:    csel w14, w14, w8, gt
-; CHECK-NEXT:    cmp w15, #127
-; CHECK-NEXT:    csel w15, w15, w9, lt
-; CHECK-NEXT:    mov v2.s[1], w12
-; CHECK-NEXT:    cmn w15, #128
-; CHECK-NEXT:    csel w15, w15, w8, gt
-; CHECK-NEXT:    cmp w16, #127
-; CHECK-NEXT:    csel w11, w16, w9, lt
-; CHECK-NEXT:    fmov s1, w15
-; CHECK-NEXT:    cmn w11, #128
-; CHECK-NEXT:    csel w10, w11, w8, gt
-; CHECK-NEXT:    cmp w17, #127
-; CHECK-NEXT:    csel w9, w17, w9, lt
-; CHECK-NEXT:    mov v1.s[1], w14
-; CHECK-NEXT:    cmn w9, #128
-; CHECK-NEXT:    csel w8, w9, w8, gt
-; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    adrp x8, .LCPI82_0
-; CHECK-NEXT:    ldr d4, [x8, :lo12:.LCPI82_0]
-; CHECK-NEXT:    mov v0.s[1], w10
-; CHECK-NEXT:    tbl v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.8b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v8f64_v8i8:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov d4, v3.d[1]
+; CHECK-SD-NEXT:    fcvtzs w11, d3
+; CHECK-SD-NEXT:    mov w9, #127 // =0x7f
+; CHECK-SD-NEXT:    mov d3, v1.d[1]
+; CHECK-SD-NEXT:    fcvtzs w13, d2
+; CHECK-SD-NEXT:    fcvtzs w15, d1
+; CHECK-SD-NEXT:    fcvtzs w17, d0
+; CHECK-SD-NEXT:    fcvtzs w8, d4
+; CHECK-SD-NEXT:    mov d4, v2.d[1]
+; CHECK-SD-NEXT:    mov d2, v0.d[1]
+; CHECK-SD-NEXT:    fcvtzs w14, d3
+; CHECK-SD-NEXT:    cmp w8, #127
+; CHECK-SD-NEXT:    fcvtzs w12, d4
+; CHECK-SD-NEXT:    fcvtzs w16, d2
+; CHECK-SD-NEXT:    csel w10, w8, w9, lt
+; CHECK-SD-NEXT:    mov w8, #-128 // =0xffffff80
+; CHECK-SD-NEXT:    cmn w10, #128
+; CHECK-SD-NEXT:    csel w10, w10, w8, gt
+; CHECK-SD-NEXT:    cmp w11, #127
+; CHECK-SD-NEXT:    csel w11, w11, w9, lt
+; CHECK-SD-NEXT:    cmn w11, #128
+; CHECK-SD-NEXT:    csel w11, w11, w8, gt
+; CHECK-SD-NEXT:    cmp w12, #127
+; CHECK-SD-NEXT:    csel w12, w12, w9, lt
+; CHECK-SD-NEXT:    fmov s3, w11
+; CHECK-SD-NEXT:    cmn w12, #128
+; CHECK-SD-NEXT:    csel w12, w12, w8, gt
+; CHECK-SD-NEXT:    cmp w13, #127
+; CHECK-SD-NEXT:    csel w13, w13, w9, lt
+; CHECK-SD-NEXT:    mov v3.s[1], w10
+; CHECK-SD-NEXT:    cmn w13, #128
+; CHECK-SD-NEXT:    csel w13, w13, w8, gt
+; CHECK-SD-NEXT:    cmp w14, #127
+; CHECK-SD-NEXT:    csel w14, w14, w9, lt
+; CHECK-SD-NEXT:    fmov s2, w13
+; CHECK-SD-NEXT:    cmn w14, #128
+; CHECK-SD-NEXT:    csel w14, w14, w8, gt
+; CHECK-SD-NEXT:    cmp w15, #127
+; CHECK-SD-NEXT:    csel w15, w15, w9, lt
+; CHECK-SD-NEXT:    mov v2.s[1], w12
+; CHECK-SD-NEXT:    cmn w15, #128
+; CHECK-SD-NEXT:    csel w15, w15, w8, gt
+; CHECK-SD-NEXT:    cmp w16, #127
+; CHECK-SD-NEXT:    csel w11, w16, w9, lt
+; CHECK-SD-NEXT:    fmov s1, w15
+; CHECK-SD-NEXT:    cmn w11, #128
+; CHECK-SD-NEXT:    csel w10, w11, w8, gt
+; CHECK-SD-NEXT:    cmp w17, #127
+; CHECK-SD-NEXT:    csel w9, w17, w9, lt
+; CHECK-SD-NEXT:    mov v1.s[1], w14
+; CHECK-SD-NEXT:    cmn w9, #128
+; CHECK-SD-NEXT:    csel w8, w9, w8, gt
+; CHECK-SD-NEXT:    fmov s0, w8
+; CHECK-SD-NEXT:    adrp x8, .LCPI82_0
+; CHECK-SD-NEXT:    ldr d4, [x8, :lo12:.LCPI82_0]
+; CHECK-SD-NEXT:    mov v0.s[1], w10
+; CHECK-SD-NEXT:    tbl v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.8b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_v8f64_v8i8:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-GI-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-GI-NEXT:    adrp x8, .LCPI82_1
+; CHECK-GI-NEXT:    fcvtzs v2.2d, v2.2d
+; CHECK-GI-NEXT:    fcvtzs v3.2d, v3.2d
+; CHECK-GI-NEXT:    ldr q4, [x8, :lo12:.LCPI82_1]
+; CHECK-GI-NEXT:    adrp x8, .LCPI82_0
+; CHECK-GI-NEXT:    cmgt v5.2d, v4.2d, v0.2d
+; CHECK-GI-NEXT:    cmgt v6.2d, v4.2d, v1.2d
+; CHECK-GI-NEXT:    cmgt v7.2d, v4.2d, v2.2d
+; CHECK-GI-NEXT:    cmgt v16.2d, v4.2d, v3.2d
+; CHECK-GI-NEXT:    bif v0.16b, v4.16b, v5.16b
+; CHECK-GI-NEXT:    bif v1.16b, v4.16b, v6.16b
+; CHECK-GI-NEXT:    bif v2.16b, v4.16b, v7.16b
+; CHECK-GI-NEXT:    bif v3.16b, v4.16b, v16.16b
+; CHECK-GI-NEXT:    ldr q4, [x8, :lo12:.LCPI82_0]
+; CHECK-GI-NEXT:    cmgt v5.2d, v0.2d, v4.2d
+; CHECK-GI-NEXT:    cmgt v6.2d, v1.2d, v4.2d
+; CHECK-GI-NEXT:    cmgt v7.2d, v2.2d, v4.2d
+; CHECK-GI-NEXT:    cmgt v16.2d, v3.2d, v4.2d
+; CHECK-GI-NEXT:    bif v0.16b, v4.16b, v5.16b
+; CHECK-GI-NEXT:    bif v1.16b, v4.16b, v6.16b
+; CHECK-GI-NEXT:    bif v2.16b, v4.16b, v7.16b
+; CHECK-GI-NEXT:    bif v3.16b, v4.16b, v16.16b
+; CHECK-GI-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    uzp1 v1.4s, v2.4s, v3.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    xtn v0.8b, v0.8h
+; CHECK-GI-NEXT:    ret
     %x = call <8 x i8> @llvm.fptosi.sat.v8f64.v8i8(<8 x double> %f)
     ret <8 x i8> %x
 }
 
 define <16 x i8> @test_signed_v16f64_v16i8(<16 x double> %f) {
-; CHECK-LABEL: test_signed_v16f64_v16i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov d16, v0.d[1]
-; CHECK-NEXT:    fcvtzs w10, d0
-; CHECK-NEXT:    mov w8, #127 // =0x7f
-; CHECK-NEXT:    mov d0, v1.d[1]
-; CHECK-NEXT:    fcvtzs w13, d1
-; CHECK-NEXT:    mov d1, v2.d[1]
-; CHECK-NEXT:    fcvtzs w9, d16
-; CHECK-NEXT:    fcvtzs w12, d0
-; CHECK-NEXT:    cmp w9, #127
-; CHECK-NEXT:    csel w11, w9, w8, lt
-; CHECK-NEXT:    mov w9, #-128 // =0xffffff80
-; CHECK-NEXT:    cmn w11, #128
-; CHECK-NEXT:    csel w11, w11, w9, gt
-; CHECK-NEXT:    cmp w10, #127
-; CHECK-NEXT:    csel w10, w10, w8, lt
-; CHECK-NEXT:    cmn w10, #128
-; CHECK-NEXT:    csel w10, w10, w9, gt
-; CHECK-NEXT:    cmp w12, #127
-; CHECK-NEXT:    fmov s0, w10
-; CHECK-NEXT:    csel w10, w12, w8, lt
-; CHECK-NEXT:    cmn w10, #128
-; CHECK-NEXT:    csel w10, w10, w9, gt
-; CHECK-NEXT:    cmp w13, #127
-; CHECK-NEXT:    csel w12, w13, w8, lt
-; CHECK-NEXT:    mov v0.s[1], w11
-; CHECK-NEXT:    fcvtzs w11, d1
-; CHECK-NEXT:    cmn w12, #128
-; CHECK-NEXT:    csel w12, w12, w9, gt
-; CHECK-NEXT:    fmov s1, w12
-; CHECK-NEXT:    fcvtzs w12, d2
-; CHECK-NEXT:    mov d2, v3.d[1]
-; CHECK-NEXT:    cmp w11, #127
-; CHECK-NEXT:    mov w13, v0.s[1]
-; CHECK-NEXT:    mov v1.s[1], w10
-; CHECK-NEXT:    csel w10, w11, w8, lt
-; CHECK-NEXT:    cmn w10, #128
-; CHECK-NEXT:    fcvtzs w11, d2
-; CHECK-NEXT:    csel w10, w10, w9, gt
-; CHECK-NEXT:    cmp w12, #127
-; CHECK-NEXT:    mov v0.b[1], w13
-; CHECK-NEXT:    csel w12, w12, w8, lt
-; CHECK-NEXT:    cmn w12, #128
-; CHECK-NEXT:    mov w13, v1.s[1]
-; CHECK-NEXT:    csel w12, w12, w9, gt
-; CHECK-NEXT:    cmp w11, #127
-; CHECK-NEXT:    fmov s2, w12
-; CHECK-NEXT:    fcvtzs w12, d3
-; CHECK-NEXT:    mov d3, v4.d[1]
-; CHECK-NEXT:    mov v0.b[2], v1.b[0]
-; CHECK-NEXT:    mov v2.s[1], w10
-; CHECK-NEXT:    csel w10, w11, w8, lt
-; CHECK-NEXT:    cmn w10, #128
-; CHECK-NEXT:    fcvtzs w11, d3
-; CHECK-NEXT:    csel w10, w10, w9, gt
-; CHECK-NEXT:    cmp w12, #127
-; CHECK-NEXT:    mov v0.b[3], w13
-; CHECK-NEXT:    csel w12, w12, w8, lt
-; CHECK-NEXT:    cmn w12, #128
-; CHECK-NEXT:    mov w13, v2.s[1]
-; CHECK-NEXT:    csel w12, w12, w9, gt
-; CHECK-NEXT:    cmp w11, #127
-; CHECK-NEXT:    fmov s3, w12
-; CHECK-NEXT:    fcvtzs w12, d4
-; CHECK-NEXT:    mov v0.b[4], v2.b[0]
-; CHECK-NEXT:    mov d4, v5.d[1]
-; CHECK-NEXT:    mov v3.s[1], w10
-; CHECK-NEXT:    csel w10, w11, w8, lt
-; CHECK-NEXT:    cmn w10, #128
-; CHECK-NEXT:    mov v0.b[5], w13
-; CHECK-NEXT:    csel w10, w10, w9, gt
-; CHECK-NEXT:    cmp w12, #127
-; CHECK-NEXT:    fcvtzs w11, d4
-; CHECK-NEXT:    csel w12, w12, w8, lt
-; CHECK-NEXT:    cmn w12, #128
-; CHECK-NEXT:    mov w13, v3.s[1]
-; CHECK-NEXT:    csel w12, w12, w9, gt
-; CHECK-NEXT:    mov v0.b[6], v3.b[0]
-; CHECK-NEXT:    fmov s4, w12
-; CHECK-NEXT:    fcvtzs w12, d5
-; CHECK-NEXT:    cmp w11, #127
-; CHECK-NEXT:    mov d5, v6.d[1]
-; CHECK-NEXT:    mov v4.s[1], w10
-; CHECK-NEXT:    csel w10, w11, w8, lt
-; CHECK-NEXT:    mov v0.b[7], w13
-; CHECK-NEXT:    cmn w10, #128
-; CHECK-NEXT:    csel w10, w10, w9, gt
-; CHECK-NEXT:    cmp w12, #127
-; CHECK-NEXT:    fcvtzs w13, d5
-; CHECK-NEXT:    csel w11, w12, w8, lt
-; CHECK-NEXT:    cmn w11, #128
-; CHECK-NEXT:    mov w12, v4.s[1]
-; CHECK-NEXT:    mov v0.b[8], v4.b[0]
-; CHECK-NEXT:    csel w11, w11, w9, gt
-; CHECK-NEXT:    fmov s5, w11
-; CHECK-NEXT:    fcvtzs w11, d6
-; CHECK-NEXT:    cmp w13, #127
-; CHECK-NEXT:    mov d6, v7.d[1]
-; CHECK-NEXT:    mov v0.b[9], w12
-; CHECK-NEXT:    mov v5.s[1], w10
-; CHECK-NEXT:    csel w10, w13, w8, lt
-; CHECK-NEXT:    cmn w10, #128
-; CHECK-NEXT:    csel w10, w10, w9, gt
-; CHECK-NEXT:    cmp w11, #127
-; CHECK-NEXT:    fcvtzs w13, d6
-; CHECK-NEXT:    csel w11, w11, w8, lt
-; CHECK-NEXT:    cmn w11, #128
-; CHECK-NEXT:    mov v0.b[10], v5.b[0]
-; CHECK-NEXT:    mov w12, v5.s[1]
-; CHECK-NEXT:    csel w11, w11, w9, gt
-; CHECK-NEXT:    fmov s6, w11
-; CHECK-NEXT:    fcvtzs w11, d7
-; CHECK-NEXT:    cmp w13, #127
-; CHECK-NEXT:    mov v0.b[11], w12
-; CHECK-NEXT:    mov v6.s[1], w10
-; CHECK-NEXT:    csel w10, w13, w8, lt
-; CHECK-NEXT:    cmn w10, #128
-; CHECK-NEXT:    csel w10, w10, w9, gt
-; CHECK-NEXT:    cmp w11, #127
-; CHECK-NEXT:    csel w8, w11, w8, lt
-; CHECK-NEXT:    cmn w8, #128
-; CHECK-NEXT:    mov v0.b[12], v6.b[0]
-; CHECK-NEXT:    mov w11, v6.s[1]
-; CHECK-NEXT:    csel w8, w8, w9, gt
-; CHECK-NEXT:    fmov s7, w8
-; CHECK-NEXT:    mov v0.b[13], w11
-; CHECK-NEXT:    mov v7.s[1], w10
-; CHECK-NEXT:    mov v0.b[14], v7.b[0]
-; CHECK-NEXT:    mov w8, v7.s[1]
-; CHECK-NEXT:    mov v0.b[15], w8
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v16f64_v16i8:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov d16, v0.d[1]
+; CHECK-SD-NEXT:    fcvtzs w10, d0
+; CHECK-SD-NEXT:    mov w8, #127 // =0x7f
+; CHECK-SD-NEXT:    mov d0, v1.d[1]
+; CHECK-SD-NEXT:    fcvtzs w13, d1
+; CHECK-SD-NEXT:    mov d1, v2.d[1]
+; CHECK-SD-NEXT:    fcvtzs w9, d16
+; CHECK-SD-NEXT:    fcvtzs w12, d0
+; CHECK-SD-NEXT:    cmp w9, #127
+; CHECK-SD-NEXT:    csel w11, w9, w8, lt
+; CHECK-SD-NEXT:    mov w9, #-128 // =0xffffff80
+; CHECK-SD-NEXT:    cmn w11, #128
+; CHECK-SD-NEXT:    csel w11, w11, w9, gt
+; CHECK-SD-NEXT:    cmp w10, #127
+; CHECK-SD-NEXT:    csel w10, w10, w8, lt
+; CHECK-SD-NEXT:    cmn w10, #128
+; CHECK-SD-NEXT:    csel w10, w10, w9, gt
+; CHECK-SD-NEXT:    cmp w12, #127
+; CHECK-SD-NEXT:    fmov s0, w10
+; CHECK-SD-NEXT:    csel w10, w12, w8, lt
+; CHECK-SD-NEXT:    cmn w10, #128
+; CHECK-SD-NEXT:    csel w10, w10, w9, gt
+; CHECK-SD-NEXT:    cmp w13, #127
+; CHECK-SD-NEXT:    csel w12, w13, w8, lt
+; CHECK-SD-NEXT:    mov v0.s[1], w11
+; CHECK-SD-NEXT:    fcvtzs w11, d1
+; CHECK-SD-NEXT:    cmn w12, #128
+; CHECK-SD-NEXT:    csel w12, w12, w9, gt
+; CHECK-SD-NEXT:    fmov s1, w12
+; CHECK-SD-NEXT:    fcvtzs w12, d2
+; CHECK-SD-NEXT:    mov d2, v3.d[1]
+; CHECK-SD-NEXT:    cmp w11, #127
+; CHECK-SD-NEXT:    mov w13, v0.s[1]
+; CHECK-SD-NEXT:    mov v1.s[1], w10
+; CHECK-SD-NEXT:    csel w10, w11, w8, lt
+; CHECK-SD-NEXT:    cmn w10, #128
+; CHECK-SD-NEXT:    fcvtzs w11, d2
+; CHECK-SD-NEXT:    csel w10, w10, w9, gt
+; CHECK-SD-NEXT:    cmp w12, #127
+; CHECK-SD-NEXT:    mov v0.b[1], w13
+; CHECK-SD-NEXT:    csel w12, w12, w8, lt
+; CHECK-SD-NEXT:    cmn w12, #128
+; CHECK-SD-NEXT:    mov w13, v1.s[1]
+; CHECK-SD-NEXT:    csel w12, w12, w9, gt
+; CHECK-SD-NEXT:    cmp w11, #127
+; CHECK-SD-NEXT:    fmov s2, w12
+; CHECK-SD-NEXT:    fcvtzs w12, d3
+; CHECK-SD-NEXT:    mov d3, v4.d[1]
+; CHECK-SD-NEXT:    mov v0.b[2], v1.b[0]
+; CHECK-SD-NEXT:    mov v2.s[1], w10
+; CHECK-SD-NEXT:    csel w10, w11, w8, lt
+; CHECK-SD-NEXT:    cmn w10, #128
+; CHECK-SD-NEXT:    fcvtzs w11, d3
+; CHECK-SD-NEXT:    csel w10, w10, w9, gt
+; CHECK-SD-NEXT:    cmp w12, #127
+; CHECK-SD-NEXT:    mov v0.b[3], w13
+; CHECK-SD-NEXT:    csel w12, w12, w8, lt
+; CHECK-SD-NEXT:    cmn w12, #128
+; CHECK-SD-NEXT:    mov w13, v2.s[1]
+; CHECK-SD-NEXT:    csel w12, w12, w9, gt
+; CHECK-SD-NEXT:    cmp w11, #127
+; CHECK-SD-NEXT:    fmov s3, w12
+; CHECK-SD-NEXT:    fcvtzs w12, d4
+; CHECK-SD-NEXT:    mov v0.b[4], v2.b[0]
+; CHECK-SD-NEXT:    mov d4, v5.d[1]
+; CHECK-SD-NEXT:    mov v3.s[1], w10
+; CHECK-SD-NEXT:    csel w10, w11, w8, lt
+; CHECK-SD-NEXT:    cmn w10, #128
+; CHECK-SD-NEXT:    mov v0.b[5], w13
+; CHECK-SD-NEXT:    csel w10, w10, w9, gt
+; CHECK-SD-NEXT:    cmp w12, #127
+; CHECK-SD-NEXT:    fcvtzs w11, d4
+; CHECK-SD-NEXT:    csel w12, w12, w8, lt
+; CHECK-SD-NEXT:    cmn w12, #128
+; CHECK-SD-NEXT:    mov w13, v3.s[1]
+; CHECK-SD-NEXT:    csel w12, w12, w9, gt
+; CHECK-SD-NEXT:    mov v0.b[6], v3.b[0]
+; CHECK-SD-NEXT:    fmov s4, w12
+; CHECK-SD-NEXT:    fcvtzs w12, d5
+; CHECK-SD-NEXT:    cmp w11, #127
+; CHECK-SD-NEXT:    mov d5, v6.d[1]
+; CHECK-SD-NEXT:    mov v4.s[1], w10
+; CHECK-SD-NEXT:    csel w10, w11, w8, lt
+; CHECK-SD-NEXT:    mov v0.b[7], w13
+; CHECK-SD-NEXT:    cmn w10, #128
+; CHECK-SD-NEXT:    csel w10, w10, w9, gt
+; CHECK-SD-NEXT:    cmp w12, #127
+; CHECK-SD-NEXT:    fcvtzs w13, d5
+; CHECK-SD-NEXT:    csel w11, w12, w8, lt
+; CHECK-SD-NEXT:    cmn w11, #128
+; CHECK-SD-NEXT:    mov w12, v4.s[1]
+; CHECK-SD-NEXT:    mov v0.b[8], v4.b[0]
+; CHECK-SD-NEXT:    csel w11, w11, w9, gt
+; CHECK-SD-NEXT:    fmov s5, w11
+; CHECK-SD-NEXT:    fcvtzs w11, d6
+; CHECK-SD-NEXT:    cmp w13, #127
+; CHECK-SD-NEXT:    mov d6, v7.d[1]
+; CHECK-SD-NEXT:    mov v0.b[9], w12
+; CHECK-SD-NEXT:    mov v5.s[1], w10
+; CHECK-SD-NEXT:    csel w10, w13, w8, lt
+; CHECK-SD-NEXT:    cmn w10, #128
+; CHECK-SD-NEXT:    csel w10, w10, w9, gt
+; CHECK-SD-NEXT:    cmp w11, #127
+; CHECK-SD-NEXT:    fcvtzs w13, d6
+; CHECK-SD-NEXT:    csel w11, w11, w8, lt
+; CHECK-SD-NEXT:    cmn w11, #128
+; CHECK-SD-NEXT:    mov v0.b[10], v5.b[0]
+; CHECK-SD-NEXT:    mov w12, v5.s[1]
+; CHECK-SD-NEXT:    csel w11, w11, w9, gt
+; CHECK-SD-NEXT:    fmov s6, w11
+; CHECK-SD-NEXT:    fcvtzs w11, d7
+; CHECK-SD-NEXT:    cmp w13, #127
+; CHECK-SD-NEXT:    mov v0.b[11], w12
+; CHECK-SD-NEXT:    mov v6.s[1], w10
+; CHECK-SD-NEXT:    csel w10, w13, w8, lt
+; CHECK-SD-NEXT:    cmn w10, #128
+; CHECK-SD-NEXT:    csel w10, w10, w9, gt
+; CHECK-SD-NEXT:    cmp w11, #127
+; CHECK-SD-NEXT:    csel w8, w11, w8, lt
+; CHECK-SD-NEXT:    cmn w8, #128
+; CHECK-SD-NEXT:    mov v0.b[12], v6.b[0]
+; CHECK-SD-NEXT:    mov w11, v6.s[1]
+; CHECK-SD-NEXT:    csel w8, w8, w9, gt
+; CHECK-SD-NEXT:    fmov s7, w8
+; CHECK-SD-NEXT:    mov v0.b[13], w11
+; CHECK-SD-NEXT:    mov v7.s[1], w10
+; CHECK-SD-NEXT:    mov v0.b[14], v7.b[0]
+; CHECK-SD-NEXT:    mov w8, v7.s[1]
+; CHECK-SD-NEXT:    mov v0.b[15], w8
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_v16f64_v16i8:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-GI-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-GI-NEXT:    adrp x8, .LCPI83_1
+; CHECK-GI-NEXT:    fcvtzs v2.2d, v2.2d
+; CHECK-GI-NEXT:    fcvtzs v3.2d, v3.2d
+; CHECK-GI-NEXT:    ldr q16, [x8, :lo12:.LCPI83_1]
+; CHECK-GI-NEXT:    fcvtzs v4.2d, v4.2d
+; CHECK-GI-NEXT:    fcvtzs v5.2d, v5.2d
+; CHECK-GI-NEXT:    adrp x8, .LCPI83_0
+; CHECK-GI-NEXT:    fcvtzs v6.2d, v6.2d
+; CHECK-GI-NEXT:    fcvtzs v7.2d, v7.2d
+; CHECK-GI-NEXT:    cmgt v17.2d, v16.2d, v0.2d
+; CHECK-GI-NEXT:    cmgt v18.2d, v16.2d, v1.2d
+; CHECK-GI-NEXT:    cmgt v19.2d, v16.2d, v2.2d
+; CHECK-GI-NEXT:    cmgt v20.2d, v16.2d, v3.2d
+; CHECK-GI-NEXT:    cmgt v21.2d, v16.2d, v4.2d
+; CHECK-GI-NEXT:    cmgt v22.2d, v16.2d, v5.2d
+; CHECK-GI-NEXT:    cmgt v23.2d, v16.2d, v6.2d
+; CHECK-GI-NEXT:    cmgt v24.2d, v16.2d, v7.2d
+; CHECK-GI-NEXT:    bif v0.16b, v16.16b, v17.16b
+; CHECK-GI-NEXT:    bif v1.16b, v16.16b, v18.16b
+; CHECK-GI-NEXT:    ldr q17, [x8, :lo12:.LCPI83_0]
+; CHECK-GI-NEXT:    bif v2.16b, v16.16b, v19.16b
+; CHECK-GI-NEXT:    bif v3.16b, v16.16b, v20.16b
+; CHECK-GI-NEXT:    bif v4.16b, v16.16b, v21.16b
+; CHECK-GI-NEXT:    bif v5.16b, v16.16b, v22.16b
+; CHECK-GI-NEXT:    bif v6.16b, v16.16b, v23.16b
+; CHECK-GI-NEXT:    bif v7.16b, v16.16b, v24.16b
+; CHECK-GI-NEXT:    cmgt v16.2d, v0.2d, v17.2d
+; CHECK-GI-NEXT:    cmgt v18.2d, v1.2d, v17.2d
+; CHECK-GI-NEXT:    cmgt v19.2d, v2.2d, v17.2d
+; CHECK-GI-NEXT:    cmgt v20.2d, v3.2d, v17.2d
+; CHECK-GI-NEXT:    cmgt v21.2d, v4.2d, v17.2d
+; CHECK-GI-NEXT:    cmgt v22.2d, v5.2d, v17.2d
+; CHECK-GI-NEXT:    cmgt v23.2d, v6.2d, v17.2d
+; CHECK-GI-NEXT:    cmgt v24.2d, v7.2d, v17.2d
+; CHECK-GI-NEXT:    bif v0.16b, v17.16b, v16.16b
+; CHECK-GI-NEXT:    bif v1.16b, v17.16b, v18.16b
+; CHECK-GI-NEXT:    bif v2.16b, v17.16b, v19.16b
+; CHECK-GI-NEXT:    bif v3.16b, v17.16b, v20.16b
+; CHECK-GI-NEXT:    bif v4.16b, v17.16b, v21.16b
+; CHECK-GI-NEXT:    bif v5.16b, v17.16b, v22.16b
+; CHECK-GI-NEXT:    bif v6.16b, v17.16b, v23.16b
+; CHECK-GI-NEXT:    bif v7.16b, v17.16b, v24.16b
+; CHECK-GI-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    uzp1 v1.4s, v2.4s, v3.4s
+; CHECK-GI-NEXT:    uzp1 v2.4s, v4.4s, v5.4s
+; CHECK-GI-NEXT:    uzp1 v3.4s, v6.4s, v7.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    uzp1 v1.8h, v2.8h, v3.8h
+; CHECK-GI-NEXT:    uzp1 v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT:    ret
     %x = call <16 x i8> @llvm.fptosi.sat.v16f64.v16i8(<16 x double> %f)
     ret <16 x i8> %x
 }
 
 define <8 x i16> @test_signed_v8f64_v8i16(<8 x double> %f) {
-; CHECK-LABEL: test_signed_v8f64_v8i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov d4, v3.d[1]
-; CHECK-NEXT:    mov w8, #32767 // =0x7fff
-; CHECK-NEXT:    fcvtzs w11, d3
-; CHECK-NEXT:    mov d3, v1.d[1]
-; CHECK-NEXT:    fcvtzs w13, d2
-; CHECK-NEXT:    fcvtzs w15, d1
-; CHECK-NEXT:    fcvtzs w17, d0
-; CHECK-NEXT:    fcvtzs w9, d4
-; CHECK-NEXT:    mov d4, v2.d[1]
-; CHECK-NEXT:    mov d2, v0.d[1]
-; CHECK-NEXT:    fcvtzs w14, d3
-; CHECK-NEXT:    cmp w9, w8
-; CHECK-NEXT:    fcvtzs w12, d4
-; CHECK-NEXT:    fcvtzs w16, d2
-; CHECK-NEXT:    csel w10, w9, w8, lt
-; CHECK-NEXT:    mov w9, #-32768 // =0xffff8000
-; CHECK-NEXT:    cmn w10, #8, lsl #12 // =32768
-; CHECK-NEXT:    csel w10, w10, w9, gt
-; CHECK-NEXT:    cmp w11, w8
-; CHECK-NEXT:    csel w11, w11, w8, lt
-; CHECK-NEXT:    cmn w11, #8, lsl #12 // =32768
-; CHECK-NEXT:    csel w11, w11, w9, gt
-; CHECK-NEXT:    cmp w12, w8
-; CHECK-NEXT:    csel w12, w12, w8, lt
-; CHECK-NEXT:    fmov s3, w11
-; CHECK-NEXT:    cmn w12, #8, lsl #12 // =32768
-; CHECK-NEXT:    csel w12, w12, w9, gt
-; CHECK-NEXT:    cmp w13, w8
-; CHECK-NEXT:    csel w13, w13, w8, lt
-; CHECK-NEXT:    mov v3.s[1], w10
-; CHECK-NEXT:    cmn w13, #8, lsl #12 // =32768
-; CHECK-NEXT:    csel w13, w13, w9, gt
-; CHECK-NEXT:    cmp w14, w8
-; CHECK-NEXT:    csel w14, w14, w8, lt
-; CHECK-NEXT:    fmov s2, w13
-; CHECK-NEXT:    cmn w14, #8, lsl #12 // =32768
-; CHECK-NEXT:    csel w14, w14, w9, gt
-; CHECK-NEXT:    cmp w15, w8
-; CHECK-NEXT:    csel w15, w15, w8, lt
-; CHECK-NEXT:    mov v2.s[1], w12
-; CHECK-NEXT:    cmn w15, #8, lsl #12 // =32768
-; CHECK-NEXT:    csel w15, w15, w9, gt
-; CHECK-NEXT:    cmp w16, w8
-; CHECK-NEXT:    csel w11, w16, w8, lt
-; CHECK-NEXT:    fmov s1, w15
-; CHECK-NEXT:    cmn w11, #8, lsl #12 // =32768
-; CHECK-NEXT:    csel w10, w11, w9, gt
-; CHECK-NEXT:    cmp w17, w8
-; CHECK-NEXT:    csel w8, w17, w8, lt
-; CHECK-NEXT:    mov v1.s[1], w14
-; CHECK-NEXT:    cmn w8, #8, lsl #12 // =32768
-; CHECK-NEXT:    csel w8, w8, w9, gt
-; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    adrp x8, .LCPI84_0
-; CHECK-NEXT:    ldr q4, [x8, :lo12:.LCPI84_0]
-; CHECK-NEXT:    mov v0.s[1], w10
-; CHECK-NEXT:    tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v8f64_v8i16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov d4, v3.d[1]
+; CHECK-SD-NEXT:    mov w8, #32767 // =0x7fff
+; CHECK-SD-NEXT:    fcvtzs w11, d3
+; CHECK-SD-NEXT:    mov d3, v1.d[1]
+; CHECK-SD-NEXT:    fcvtzs w13, d2
+; CHECK-SD-NEXT:    fcvtzs w15, d1
+; CHECK-SD-NEXT:    fcvtzs w17, d0
+; CHECK-SD-NEXT:    fcvtzs w9, d4
+; CHECK-SD-NEXT:    mov d4, v2.d[1]
+; CHECK-SD-NEXT:    mov d2, v0.d[1]
+; CHECK-SD-NEXT:    fcvtzs w14, d3
+; CHECK-SD-NEXT:    cmp w9, w8
+; CHECK-SD-NEXT:    fcvtzs w12, d4
+; CHECK-SD-NEXT:    fcvtzs w16, d2
+; CHECK-SD-NEXT:    csel w10, w9, w8, lt
+; CHECK-SD-NEXT:    mov w9, #-32768 // =0xffff8000
+; CHECK-SD-NEXT:    cmn w10, #8, lsl #12 // =32768
+; CHECK-SD-NEXT:    csel w10, w10, w9, gt
+; CHECK-SD-NEXT:    cmp w11, w8
+; CHECK-SD-NEXT:    csel w11, w11, w8, lt
+; CHECK-SD-NEXT:    cmn w11, #8, lsl #12 // =32768
+; CHECK-SD-NEXT:    csel w11, w11, w9, gt
+; CHECK-SD-NEXT:    cmp w12, w8
+; CHECK-SD-NEXT:    csel w12, w12, w8, lt
+; CHECK-SD-NEXT:    fmov s3, w11
+; CHECK-SD-NEXT:    cmn w12, #8, lsl #12 // =32768
+; CHECK-SD-NEXT:    csel w12, w12, w9, gt
+; CHECK-SD-NEXT:    cmp w13, w8
+; CHECK-SD-NEXT:    csel w13, w13, w8, lt
+; CHECK-SD-NEXT:    mov v3.s[1], w10
+; CHECK-SD-NEXT:    cmn w13, #8, lsl #12 // =32768
+; CHECK-SD-NEXT:    csel w13, w13, w9, gt
+; CHECK-SD-NEXT:    cmp w14, w8
+; CHECK-SD-NEXT:    csel w14, w14, w8, lt
+; CHECK-SD-NEXT:    fmov s2, w13
+; CHECK-SD-NEXT:    cmn w14, #8, lsl #12 // =32768
+; CHECK-SD-NEXT:    csel w14, w14, w9, gt
+; CHECK-SD-NEXT:    cmp w15, w8
+; CHECK-SD-NEXT:    csel w15, w15, w8, lt
+; CHECK-SD-NEXT:    mov v2.s[1], w12
+; CHECK-SD-NEXT:    cmn w15, #8, lsl #12 // =32768
+; CHECK-SD-NEXT:    csel w15, w15, w9, gt
+; CHECK-SD-NEXT:    cmp w16, w8
+; CHECK-SD-NEXT:    csel w11, w16, w8, lt
+; CHECK-SD-NEXT:    fmov s1, w15
+; CHECK-SD-NEXT:    cmn w11, #8, lsl #12 // =32768
+; CHECK-SD-NEXT:    csel w10, w11, w9, gt
+; CHECK-SD-NEXT:    cmp w17, w8
+; CHECK-SD-NEXT:    csel w8, w17, w8, lt
+; CHECK-SD-NEXT:    mov v1.s[1], w14
+; CHECK-SD-NEXT:    cmn w8, #8, lsl #12 // =32768
+; CHECK-SD-NEXT:    csel w8, w8, w9, gt
+; CHECK-SD-NEXT:    fmov s0, w8
+; CHECK-SD-NEXT:    adrp x8, .LCPI84_0
+; CHECK-SD-NEXT:    ldr q4, [x8, :lo12:.LCPI84_0]
+; CHECK-SD-NEXT:    mov v0.s[1], w10
+; CHECK-SD-NEXT:    tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_v8f64_v8i16:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-GI-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-GI-NEXT:    adrp x8, .LCPI84_1
+; CHECK-GI-NEXT:    fcvtzs v2.2d, v2.2d
+; CHECK-GI-NEXT:    fcvtzs v3.2d, v3.2d
+; CHECK-GI-NEXT:    ldr q4, [x8, :lo12:.LCPI84_1]
+; CHECK-GI-NEXT:    adrp x8, .LCPI84_0
+; CHECK-GI-NEXT:    cmgt v5.2d, v4.2d, v0.2d
+; CHECK-GI-NEXT:    cmgt v6.2d, v4.2d, v1.2d
+; CHECK-GI-NEXT:    cmgt v7.2d, v4.2d, v2.2d
+; CHECK-GI-NEXT:    cmgt v16.2d, v4.2d, v3.2d
+; CHECK-GI-NEXT:    bif v0.16b, v4.16b, v5.16b
+; CHECK-GI-NEXT:    bif v1.16b, v4.16b, v6.16b
+; CHECK-GI-NEXT:    bif v2.16b, v4.16b, v7.16b
+; CHECK-GI-NEXT:    bif v3.16b, v4.16b, v16.16b
+; CHECK-GI-NEXT:    ldr q4, [x8, :lo12:.LCPI84_0]
+; CHECK-GI-NEXT:    cmgt v5.2d, v0.2d, v4.2d
+; CHECK-GI-NEXT:    cmgt v6.2d, v1.2d, v4.2d
+; CHECK-GI-NEXT:    cmgt v7.2d, v2.2d, v4.2d
+; CHECK-GI-NEXT:    cmgt v16.2d, v3.2d, v4.2d
+; CHECK-GI-NEXT:    bif v0.16b, v4.16b, v5.16b
+; CHECK-GI-NEXT:    bif v1.16b, v4.16b, v6.16b
+; CHECK-GI-NEXT:    bif v2.16b, v4.16b, v7.16b
+; CHECK-GI-NEXT:    bif v3.16b, v4.16b, v16.16b
+; CHECK-GI-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    uzp1 v1.4s, v2.4s, v3.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    ret
     %x = call <8 x i16> @llvm.fptosi.sat.v8f64.v8i16(<8 x double> %f)
     ret <8 x i16> %x
 }
 
 define <16 x i16> @test_signed_v16f64_v16i16(<16 x double> %f) {
-; CHECK-LABEL: test_signed_v16f64_v16i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov d16, v3.d[1]
-; CHECK-NEXT:    mov w9, #32767 // =0x7fff
-; CHECK-NEXT:    fcvtzs w11, d3
-; CHECK-NEXT:    mov d3, v1.d[1]
-; CHECK-NEXT:    fcvtzs w14, d2
-; CHECK-NEXT:    fcvtzs w15, d1
-; CHECK-NEXT:    mov d1, v7.d[1]
-; CHECK-NEXT:    fcvtzs w18, d0
-; CHECK-NEXT:    fcvtzs w1, d7
-; CHECK-NEXT:    fcvtzs w2, d6
-; CHECK-NEXT:    fcvtzs w4, d5
-; CHECK-NEXT:    fcvtzs w6, d4
-; CHECK-NEXT:    fcvtzs w8, d16
-; CHECK-NEXT:    mov d16, v2.d[1]
-; CHECK-NEXT:    mov d2, v0.d[1]
-; CHECK-NEXT:    mov d0, v6.d[1]
-; CHECK-NEXT:    fcvtzs w0, d1
-; CHECK-NEXT:    cmp w8, w9
-; CHECK-NEXT:    fcvtzs w13, d16
-; CHECK-NEXT:    fcvtzs w17, d2
-; CHECK-NEXT:    csel w10, w8, w9, lt
-; CHECK-NEXT:    mov w8, #-32768 // =0xffff8000
-; CHECK-NEXT:    cmn w10, #8, lsl #12 // =32768
-; CHECK-NEXT:    csel w10, w10, w8, gt
-; CHECK-NEXT:    cmp w11, w9
-; CHECK-NEXT:    csel w11, w11, w9, lt
-; CHECK-NEXT:    cmn w11, #8, lsl #12 // =32768
-; CHECK-NEXT:    csel w12, w11, w8, gt
-; CHECK-NEXT:    cmp w13, w9
-; CHECK-NEXT:    csel w11, w13, w9, lt
-; CHECK-NEXT:    fcvtzs w13, d3
-; CHECK-NEXT:    cmn w11, #8, lsl #12 // =32768
-; CHECK-NEXT:    csel w11, w11, w8, gt
-; CHECK-NEXT:    cmp w14, w9
-; CHECK-NEXT:    csel w14, w14, w9, lt
-; CHECK-NEXT:    cmn w14, #8, lsl #12 // =32768
-; CHECK-NEXT:    csel w14, w14, w8, gt
-; CHECK-NEXT:    cmp w13, w9
-; CHECK-NEXT:    csel w13, w13, w9, lt
-; CHECK-NEXT:    cmn w13, #8, lsl #12 // =32768
-; CHECK-NEXT:    csel w13, w13, w8, gt
-; CHECK-NEXT:    cmp w15, w9
-; CHECK-NEXT:    csel w15, w15, w9, lt
-; CHECK-NEXT:    cmn w15, #8, lsl #12 // =32768
-; CHECK-NEXT:    csel w16, w15, w8, gt
-; CHECK-NEXT:    cmp w17, w9
-; CHECK-NEXT:    csel w15, w17, w9, lt
-; CHECK-NEXT:    cmn w15, #8, lsl #12 // =32768
-; CHECK-NEXT:    csel w15, w15, w8, gt
-; CHECK-NEXT:    cmp w18, w9
-; CHECK-NEXT:    csel w17, w18, w9, lt
-; CHECK-NEXT:    cmn w17, #8, lsl #12 // =32768
-; CHECK-NEXT:    csel w17, w17, w8, gt
-; CHECK-NEXT:    cmp w0, w9
-; CHECK-NEXT:    csel w18, w0, w9, lt
-; CHECK-NEXT:    fcvtzs w0, d0
-; CHECK-NEXT:    mov d0, v5.d[1]
-; CHECK-NEXT:    cmn w18, #8, lsl #12 // =32768
-; CHECK-NEXT:    csel w18, w18, w8, gt
-; CHECK-NEXT:    cmp w1, w9
-; CHECK-NEXT:    csel w1, w1, w9, lt
-; CHECK-NEXT:    cmn w1, #8, lsl #12 // =32768
-; CHECK-NEXT:    fcvtzs w3, d0
-; CHECK-NEXT:    mov d0, v4.d[1]
-; CHECK-NEXT:    csel w1, w1, w8, gt
-; CHECK-NEXT:    cmp w0, w9
-; CHECK-NEXT:    csel w0, w0, w9, lt
-; CHECK-NEXT:    fmov s7, w1
-; CHECK-NEXT:    cmn w0, #8, lsl #12 // =32768
-; CHECK-NEXT:    csel w0, w0, w8, gt
-; CHECK-NEXT:    cmp w2, w9
-; CHECK-NEXT:    fcvtzs w5, d0
-; CHECK-NEXT:    csel w2, w2, w9, lt
-; CHECK-NEXT:    fmov s3, w12
-; CHECK-NEXT:    mov v7.s[1], w18
-; CHECK-NEXT:    cmn w2, #8, lsl #12 // =32768
-; CHECK-NEXT:    csel w2, w2, w8, gt
-; CHECK-NEXT:    cmp w3, w9
-; CHECK-NEXT:    csel w3, w3, w9, lt
-; CHECK-NEXT:    mov v3.s[1], w10
-; CHECK-NEXT:    fmov s6, w2
-; CHECK-NEXT:    cmn w3, #8, lsl #12 // =32768
-; CHECK-NEXT:    fmov s2, w14
-; CHECK-NEXT:    csel w3, w3, w8, gt
-; CHECK-NEXT:    cmp w4, w9
-; CHECK-NEXT:    csel w4, w4, w9, lt
-; CHECK-NEXT:    mov v6.s[1], w0
-; CHECK-NEXT:    cmn w4, #8, lsl #12 // =32768
-; CHECK-NEXT:    mov v2.s[1], w11
-; CHECK-NEXT:    csel w12, w4, w8, gt
-; CHECK-NEXT:    cmp w5, w9
-; CHECK-NEXT:    fmov s1, w16
-; CHECK-NEXT:    csel w10, w5, w9, lt
-; CHECK-NEXT:    fmov s5, w12
-; CHECK-NEXT:    cmn w10, #8, lsl #12 // =32768
-; CHECK-NEXT:    csel w10, w10, w8, gt
-; CHECK-NEXT:    cmp w6, w9
-; CHECK-NEXT:    mov v1.s[1], w13
-; CHECK-NEXT:    csel w9, w6, w9, lt
-; CHECK-NEXT:    mov v5.s[1], w3
-; CHECK-NEXT:    fmov s0, w17
-; CHECK-NEXT:    cmn w9, #8, lsl #12 // =32768
-; CHECK-NEXT:    csel w8, w9, w8, gt
-; CHECK-NEXT:    fmov s4, w8
-; CHECK-NEXT:    mov v0.s[1], w15
-; CHECK-NEXT:    adrp x8, .LCPI85_0
-; CHECK-NEXT:    ldr q16, [x8, :lo12:.LCPI85_0]
-; CHECK-NEXT:    mov v4.s[1], w10
-; CHECK-NEXT:    tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v16.16b
-; CHECK-NEXT:    tbl v1.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v16.16b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v16f64_v16i16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov d16, v3.d[1]
+; CHECK-SD-NEXT:    mov w9, #32767 // =0x7fff
+; CHECK-SD-NEXT:    fcvtzs w11, d3
+; CHECK-SD-NEXT:    mov d3, v1.d[1]
+; CHECK-SD-NEXT:    fcvtzs w14, d2
+; CHECK-SD-NEXT:    fcvtzs w15, d1
+; CHECK-SD-NEXT:    mov d1, v7.d[1]
+; CHECK-SD-NEXT:    fcvtzs w18, d0
+; CHECK-SD-NEXT:    fcvtzs w1, d7
+; CHECK-SD-NEXT:    fcvtzs w2, d6
+; CHECK-SD-NEXT:    fcvtzs w4, d5
+; CHECK-SD-NEXT:    fcvtzs w6, d4
+; CHECK-SD-NEXT:    fcvtzs w8, d16
+; CHECK-SD-NEXT:    mov d16, v2.d[1]
+; CHECK-SD-NEXT:    mov d2, v0.d[1]
+; CHECK-SD-NEXT:    mov d0, v6.d[1]
+; CHECK-SD-NEXT:    fcvtzs w0, d1
+; CHECK-SD-NEXT:    cmp w8, w9
+; CHECK-SD-NEXT:    fcvtzs w13, d16
+; CHECK-SD-NEXT:    fcvtzs w17, d2
+; CHECK-SD-NEXT:    csel w10, w8, w9, lt
+; CHECK-SD-NEXT:    mov w8, #-32768 // =0xffff8000
+; CHECK-SD-NEXT:    cmn w10, #8, lsl #12 // =32768
+; CHECK-SD-NEXT:    csel w10, w10, w8, gt
+; CHECK-SD-NEXT:    cmp w11, w9
+; CHECK-SD-NEXT:    csel w11, w11, w9, lt
+; CHECK-SD-NEXT:    cmn w11, #8, lsl #12 // =32768
+; CHECK-SD-NEXT:    csel w12, w11, w8, gt
+; CHECK-SD-NEXT:    cmp w13, w9
+; CHECK-SD-NEXT:    csel w11, w13, w9, lt
+; CHECK-SD-NEXT:    fcvtzs w13, d3
+; CHECK-SD-NEXT:    cmn w11, #8, lsl #12 // =32768
+; CHECK-SD-NEXT:    csel w11, w11, w8, gt
+; CHECK-SD-NEXT:    cmp w14, w9
+; CHECK-SD-NEXT:    csel w14, w14, w9, lt
+; CHECK-SD-NEXT:    cmn w14, #8, lsl #12 // =32768
+; CHECK-SD-NEXT:    csel w14, w14, w8, gt
+; CHECK-SD-NEXT:    cmp w13, w9
+; CHECK-SD-NEXT:    csel w13, w13, w9, lt
+; CHECK-SD-NEXT:    cmn w13, #8, lsl #12 // =32768
+; CHECK-SD-NEXT:    csel w13, w13, w8, gt
+; CHECK-SD-NEXT:    cmp w15, w9
+; CHECK-SD-NEXT:    csel w15, w15, w9, lt
+; CHECK-SD-NEXT:    cmn w15, #8, lsl #12 // =32768
+; CHECK-SD-NEXT:    csel w16, w15, w8, gt
+; CHECK-SD-NEXT:    cmp w17, w9
+; CHECK-SD-NEXT:    csel w15, w17, w9, lt
+; CHECK-SD-NEXT:    cmn w15, #8, lsl #12 // =32768
+; CHECK-SD-NEXT:    csel w15, w15, w8, gt
+; CHECK-SD-NEXT:    cmp w18, w9
+; CHECK-SD-NEXT:    csel w17, w18, w9, lt
+; CHECK-SD-NEXT:    cmn w17, #8, lsl #12 // =32768
+; CHECK-SD-NEXT:    csel w17, w17, w8, gt
+; CHECK-SD-NEXT:    cmp w0, w9
+; CHECK-SD-NEXT:    csel w18, w0, w9, lt
+; CHECK-SD-NEXT:    fcvtzs w0, d0
+; CHECK-SD-NEXT:    mov d0, v5.d[1]
+; CHECK-SD-NEXT:    cmn w18, #8, lsl #12 // =32768
+; CHECK-SD-NEXT:    csel w18, w18, w8, gt
+; CHECK-SD-NEXT:    cmp w1, w9
+; CHECK-SD-NEXT:    csel w1, w1, w9, lt
+; CHECK-SD-NEXT:    cmn w1, #8, lsl #12 // =32768
+; CHECK-SD-NEXT:    fcvtzs w3, d0
+; CHECK-SD-NEXT:    mov d0, v4.d[1]
+; CHECK-SD-NEXT:    csel w1, w1, w8, gt
+; CHECK-SD-NEXT:    cmp w0, w9
+; CHECK-SD-NEXT:    csel w0, w0, w9, lt
+; CHECK-SD-NEXT:    fmov s7, w1
+; CHECK-SD-NEXT:    cmn w0, #8, lsl #12 // =32768
+; CHECK-SD-NEXT:    csel w0, w0, w8, gt
+; CHECK-SD-NEXT:    cmp w2, w9
+; CHECK-SD-NEXT:    fcvtzs w5, d0
+; CHECK-SD-NEXT:    csel w2, w2, w9, lt
+; CHECK-SD-NEXT:    fmov s3, w12
+; CHECK-SD-NEXT:    mov v7.s[1], w18
+; CHECK-SD-NEXT:    cmn w2, #8, lsl #12 // =32768
+; CHECK-SD-NEXT:    csel w2, w2, w8, gt
+; CHECK-SD-NEXT:    cmp w3, w9
+; CHECK-SD-NEXT:    csel w3, w3, w9, lt
+; CHECK-SD-NEXT:    mov v3.s[1], w10
+; CHECK-SD-NEXT:    fmov s6, w2
+; CHECK-SD-NEXT:    cmn w3, #8, lsl #12 // =32768
+; CHECK-SD-NEXT:    fmov s2, w14
+; CHECK-SD-NEXT:    csel w3, w3, w8, gt
+; CHECK-SD-NEXT:    cmp w4, w9
+; CHECK-SD-NEXT:    csel w4, w4, w9, lt
+; CHECK-SD-NEXT:    mov v6.s[1], w0
+; CHECK-SD-NEXT:    cmn w4, #8, lsl #12 // =32768
+; CHECK-SD-NEXT:    mov v2.s[1], w11
+; CHECK-SD-NEXT:    csel w12, w4, w8, gt
+; CHECK-SD-NEXT:    cmp w5, w9
+; CHECK-SD-NEXT:    fmov s1, w16
+; CHECK-SD-NEXT:    csel w10, w5, w9, lt
+; CHECK-SD-NEXT:    fmov s5, w12
+; CHECK-SD-NEXT:    cmn w10, #8, lsl #12 // =32768
+; CHECK-SD-NEXT:    csel w10, w10, w8, gt
+; CHECK-SD-NEXT:    cmp w6, w9
+; CHECK-SD-NEXT:    mov v1.s[1], w13
+; CHECK-SD-NEXT:    csel w9, w6, w9, lt
+; CHECK-SD-NEXT:    mov v5.s[1], w3
+; CHECK-SD-NEXT:    fmov s0, w17
+; CHECK-SD-NEXT:    cmn w9, #8, lsl #12 // =32768
+; CHECK-SD-NEXT:    csel w8, w9, w8, gt
+; CHECK-SD-NEXT:    fmov s4, w8
+; CHECK-SD-NEXT:    mov v0.s[1], w15
+; CHECK-SD-NEXT:    adrp x8, .LCPI85_0
+; CHECK-SD-NEXT:    ldr q16, [x8, :lo12:.LCPI85_0]
+; CHECK-SD-NEXT:    mov v4.s[1], w10
+; CHECK-SD-NEXT:    tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v16.16b
+; CHECK-SD-NEXT:    tbl v1.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v16.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_v16f64_v16i16:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-GI-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-GI-NEXT:    adrp x8, .LCPI85_1
+; CHECK-GI-NEXT:    fcvtzs v2.2d, v2.2d
+; CHECK-GI-NEXT:    fcvtzs v3.2d, v3.2d
+; CHECK-GI-NEXT:    ldr q16, [x8, :lo12:.LCPI85_1]
+; CHECK-GI-NEXT:    fcvtzs v4.2d, v4.2d
+; CHECK-GI-NEXT:    fcvtzs v5.2d, v5.2d
+; CHECK-GI-NEXT:    adrp x8, .LCPI85_0
+; CHECK-GI-NEXT:    fcvtzs v6.2d, v6.2d
+; CHECK-GI-NEXT:    fcvtzs v7.2d, v7.2d
+; CHECK-GI-NEXT:    cmgt v17.2d, v16.2d, v0.2d
+; CHECK-GI-NEXT:    cmgt v18.2d, v16.2d, v1.2d
+; CHECK-GI-NEXT:    cmgt v19.2d, v16.2d, v2.2d
+; CHECK-GI-NEXT:    cmgt v20.2d, v16.2d, v3.2d
+; CHECK-GI-NEXT:    cmgt v21.2d, v16.2d, v4.2d
+; CHECK-GI-NEXT:    cmgt v22.2d, v16.2d, v5.2d
+; CHECK-GI-NEXT:    cmgt v23.2d, v16.2d, v6.2d
+; CHECK-GI-NEXT:    cmgt v24.2d, v16.2d, v7.2d
+; CHECK-GI-NEXT:    bif v0.16b, v16.16b, v17.16b
+; CHECK-GI-NEXT:    bif v1.16b, v16.16b, v18.16b
+; CHECK-GI-NEXT:    ldr q17, [x8, :lo12:.LCPI85_0]
+; CHECK-GI-NEXT:    bif v2.16b, v16.16b, v19.16b
+; CHECK-GI-NEXT:    bif v3.16b, v16.16b, v20.16b
+; CHECK-GI-NEXT:    bif v4.16b, v16.16b, v21.16b
+; CHECK-GI-NEXT:    bif v5.16b, v16.16b, v22.16b
+; CHECK-GI-NEXT:    bif v6.16b, v16.16b, v23.16b
+; CHECK-GI-NEXT:    bif v7.16b, v16.16b, v24.16b
+; CHECK-GI-NEXT:    cmgt v16.2d, v0.2d, v17.2d
+; CHECK-GI-NEXT:    cmgt v18.2d, v1.2d, v17.2d
+; CHECK-GI-NEXT:    cmgt v19.2d, v2.2d, v17.2d
+; CHECK-GI-NEXT:    cmgt v20.2d, v3.2d, v17.2d
+; CHECK-GI-NEXT:    cmgt v21.2d, v4.2d, v17.2d
+; CHECK-GI-NEXT:    cmgt v22.2d, v5.2d, v17.2d
+; CHECK-GI-NEXT:    cmgt v23.2d, v6.2d, v17.2d
+; CHECK-GI-NEXT:    cmgt v24.2d, v7.2d, v17.2d
+; CHECK-GI-NEXT:    bif v0.16b, v17.16b, v16.16b
+; CHECK-GI-NEXT:    bif v1.16b, v17.16b, v18.16b
+; CHECK-GI-NEXT:    bif v2.16b, v17.16b, v19.16b
+; CHECK-GI-NEXT:    bif v3.16b, v17.16b, v20.16b
+; CHECK-GI-NEXT:    bif v4.16b, v17.16b, v21.16b
+; CHECK-GI-NEXT:    bif v5.16b, v17.16b, v22.16b
+; CHECK-GI-NEXT:    bif v6.16b, v17.16b, v23.16b
+; CHECK-GI-NEXT:    bif v7.16b, v17.16b, v24.16b
+; CHECK-GI-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    uzp1 v1.4s, v2.4s, v3.4s
+; CHECK-GI-NEXT:    uzp1 v2.4s, v4.4s, v5.4s
+; CHECK-GI-NEXT:    uzp1 v3.4s, v6.4s, v7.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    uzp1 v1.8h, v2.8h, v3.8h
+; CHECK-GI-NEXT:    ret
     %x = call <16 x i16> @llvm.fptosi.sat.v16f64.v16i16(<16 x double> %f)
     ret <16 x i16> %x
 }
diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
index 1e1e7327f71fdc..60f961fa8f9443 100644
--- a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
+++ b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-CVT
-; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
+; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-CVT
+; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16
+; RUN: llc < %s -mtriple=aarch64 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-CVT
+; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16
 
 ;
 ; 32-bit float to unsigned integer
@@ -18,12 +20,20 @@ declare i100 @llvm.fptoui.sat.i100.f32(float)
 declare i128 @llvm.fptoui.sat.i128.f32(float)
 
 define i1 @test_unsigned_i1_f32(float %f) nounwind {
-; CHECK-LABEL: test_unsigned_i1_f32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu w8, s0
-; CHECK-NEXT:    cmp w8, #1
-; CHECK-NEXT:    csinc w0, w8, wzr, lo
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_i1_f32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu w8, s0
+; CHECK-SD-NEXT:    cmp w8, #1
+; CHECK-SD-NEXT:    csinc w0, w8, wzr, lo
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_i1_f32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcvtzu w8, s0
+; CHECK-GI-NEXT:    cmp w8, #1
+; CHECK-GI-NEXT:    csinc w8, w8, wzr, lo
+; CHECK-GI-NEXT:    and w0, w8, #0x1
+; CHECK-GI-NEXT:    ret
     %x = call i1 @llvm.fptoui.sat.i1.f32(float %f)
     ret i1 %x
 }
@@ -107,46 +117,83 @@ define i64 @test_unsigned_i64_f32(float %f) nounwind {
 }
 
 define i100 @test_unsigned_i100_f32(float %f) nounwind {
-; CHECK-LABEL: test_unsigned_i100_f32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
-; CHECK-NEXT:    fmov s8, s0
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    mov w8, #1904214015 // =0x717fffff
-; CHECK-NEXT:    fcmp s8, #0.0
-; CHECK-NEXT:    mov x10, #68719476735 // =0xfffffffff
-; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, xzr, x1, lt
-; CHECK-NEXT:    fcmp s8, s0
-; CHECK-NEXT:    csel x1, x10, x9, gt
-; CHECK-NEXT:    csinv x0, x8, xzr, le
-; CHECK-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_i100_f32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    fmov s8, s0
+; CHECK-SD-NEXT:    bl __fixunssfti
+; CHECK-SD-NEXT:    mov w8, #1904214015 // =0x717fffff
+; CHECK-SD-NEXT:    fcmp s8, #0.0
+; CHECK-SD-NEXT:    mov x10, #68719476735 // =0xfffffffff
+; CHECK-SD-NEXT:    fmov s0, w8
+; CHECK-SD-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s0
+; CHECK-SD-NEXT:    csel x1, x10, x9, gt
+; CHECK-SD-NEXT:    csinv x0, x8, xzr, le
+; CHECK-SD-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_i100_f32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-GI-NEXT:    fmov s8, s0
+; CHECK-GI-NEXT:    bl __fixunssfti
+; CHECK-GI-NEXT:    mov w8, #1904214015 // =0x717fffff
+; CHECK-GI-NEXT:    fcmp s8, #0.0
+; CHECK-GI-NEXT:    mov x10, #68719476735 // =0xfffffffff
+; CHECK-GI-NEXT:    fmov s0, w8
+; CHECK-GI-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    csel x8, xzr, x0, lt
+; CHECK-GI-NEXT:    csel x9, xzr, x1, lt
+; CHECK-GI-NEXT:    fcmp s8, s0
+; CHECK-GI-NEXT:    csinv x0, x8, xzr, le
+; CHECK-GI-NEXT:    csel x1, x10, x9, gt
+; CHECK-GI-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
     %x = call i100 @llvm.fptoui.sat.i100.f32(float %f)
     ret i100 %x
 }
 
 define i128 @test_unsigned_i128_f32(float %f) nounwind {
-; CHECK-LABEL: test_unsigned_i128_f32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
-; CHECK-NEXT:    fmov s8, s0
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    mov w8, #2139095039 // =0x7f7fffff
-; CHECK-NEXT:    fcmp s8, #0.0
-; CHECK-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    csel x8, xzr, x1, lt
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    fcmp s8, s0
-; CHECK-NEXT:    csinv x0, x9, xzr, le
-; CHECK-NEXT:    csinv x1, x8, xzr, le
-; CHECK-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_i128_f32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    fmov s8, s0
+; CHECK-SD-NEXT:    bl __fixunssfti
+; CHECK-SD-NEXT:    mov w8, #2139095039 // =0x7f7fffff
+; CHECK-SD-NEXT:    fcmp s8, #0.0
+; CHECK-SD-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    fmov s0, w8
+; CHECK-SD-NEXT:    csel x8, xzr, x1, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
+; CHECK-SD-NEXT:    fcmp s8, s0
+; CHECK-SD-NEXT:    csinv x0, x9, xzr, le
+; CHECK-SD-NEXT:    csinv x1, x8, xzr, le
+; CHECK-SD-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_i128_f32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-GI-NEXT:    fmov s8, s0
+; CHECK-GI-NEXT:    bl __fixunssfti
+; CHECK-GI-NEXT:    mov w8, #2139095039 // =0x7f7fffff
+; CHECK-GI-NEXT:    fcmp s8, #0.0
+; CHECK-GI-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    fmov s0, w8
+; CHECK-GI-NEXT:    csel x8, xzr, x0, lt
+; CHECK-GI-NEXT:    csel x9, xzr, x1, lt
+; CHECK-GI-NEXT:    fcmp s8, s0
+; CHECK-GI-NEXT:    csinv x0, x8, xzr, le
+; CHECK-GI-NEXT:    csinv x1, x9, xzr, le
+; CHECK-GI-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
     %x = call i128 @llvm.fptoui.sat.i128.f32(float %f)
     ret i128 %x
 }
@@ -167,12 +214,20 @@ declare i100 @llvm.fptoui.sat.i100.f64(double)
 declare i128 @llvm.fptoui.sat.i128.f64(double)
 
 define i1 @test_unsigned_i1_f64(double %f) nounwind {
-; CHECK-LABEL: test_unsigned_i1_f64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu w8, d0
-; CHECK-NEXT:    cmp w8, #1
-; CHECK-NEXT:    csinc w0, w8, wzr, lo
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_i1_f64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu w8, d0
+; CHECK-SD-NEXT:    cmp w8, #1
+; CHECK-SD-NEXT:    csinc w0, w8, wzr, lo
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_i1_f64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcvtzu w8, d0
+; CHECK-GI-NEXT:    cmp w8, #1
+; CHECK-GI-NEXT:    csinc w8, w8, wzr, lo
+; CHECK-GI-NEXT:    and w0, w8, #0x1
+; CHECK-GI-NEXT:    ret
     %x = call i1 @llvm.fptoui.sat.i1.f64(double %f)
     ret i1 %x
 }
@@ -256,46 +311,83 @@ define i64 @test_unsigned_i64_f64(double %f) nounwind {
 }
 
 define i100 @test_unsigned_i100_f64(double %f) nounwind {
-; CHECK-LABEL: test_unsigned_i100_f64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
-; CHECK-NEXT:    fmov d8, d0
-; CHECK-NEXT:    bl __fixunsdfti
-; CHECK-NEXT:    mov x8, #5057542381537067007 // =0x462fffffffffffff
-; CHECK-NEXT:    fcmp d8, #0.0
-; CHECK-NEXT:    mov x10, #68719476735 // =0xfffffffff
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, xzr, x1, lt
-; CHECK-NEXT:    fcmp d8, d0
-; CHECK-NEXT:    csel x1, x10, x9, gt
-; CHECK-NEXT:    csinv x0, x8, xzr, le
-; CHECK-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_i100_f64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    fmov d8, d0
+; CHECK-SD-NEXT:    bl __fixunsdfti
+; CHECK-SD-NEXT:    mov x8, #5057542381537067007 // =0x462fffffffffffff
+; CHECK-SD-NEXT:    fcmp d8, #0.0
+; CHECK-SD-NEXT:    mov x10, #68719476735 // =0xfffffffff
+; CHECK-SD-NEXT:    fmov d0, x8
+; CHECK-SD-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x1, lt
+; CHECK-SD-NEXT:    fcmp d8, d0
+; CHECK-SD-NEXT:    csel x1, x10, x9, gt
+; CHECK-SD-NEXT:    csinv x0, x8, xzr, le
+; CHECK-SD-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_i100_f64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-GI-NEXT:    fmov d8, d0
+; CHECK-GI-NEXT:    bl __fixunsdfti
+; CHECK-GI-NEXT:    mov x8, #5057542381537067007 // =0x462fffffffffffff
+; CHECK-GI-NEXT:    fcmp d8, #0.0
+; CHECK-GI-NEXT:    mov x10, #68719476735 // =0xfffffffff
+; CHECK-GI-NEXT:    fmov d0, x8
+; CHECK-GI-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    csel x8, xzr, x0, lt
+; CHECK-GI-NEXT:    csel x9, xzr, x1, lt
+; CHECK-GI-NEXT:    fcmp d8, d0
+; CHECK-GI-NEXT:    csinv x0, x8, xzr, le
+; CHECK-GI-NEXT:    csel x1, x10, x9, gt
+; CHECK-GI-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
     %x = call i100 @llvm.fptoui.sat.i100.f64(double %f)
     ret i100 %x
 }
 
 define i128 @test_unsigned_i128_f64(double %f) nounwind {
-; CHECK-LABEL: test_unsigned_i128_f64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
-; CHECK-NEXT:    fmov d8, d0
-; CHECK-NEXT:    bl __fixunsdfti
-; CHECK-NEXT:    mov x8, #5183643171103440895 // =0x47efffffffffffff
-; CHECK-NEXT:    fcmp d8, #0.0
-; CHECK-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    csel x8, xzr, x1, lt
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    fcmp d8, d0
-; CHECK-NEXT:    csinv x0, x9, xzr, le
-; CHECK-NEXT:    csinv x1, x8, xzr, le
-; CHECK-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_i128_f64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    fmov d8, d0
+; CHECK-SD-NEXT:    bl __fixunsdfti
+; CHECK-SD-NEXT:    mov x8, #5183643171103440895 // =0x47efffffffffffff
+; CHECK-SD-NEXT:    fcmp d8, #0.0
+; CHECK-SD-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    fmov d0, x8
+; CHECK-SD-NEXT:    csel x8, xzr, x1, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
+; CHECK-SD-NEXT:    fcmp d8, d0
+; CHECK-SD-NEXT:    csinv x0, x9, xzr, le
+; CHECK-SD-NEXT:    csinv x1, x8, xzr, le
+; CHECK-SD-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_i128_f64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-GI-NEXT:    fmov d8, d0
+; CHECK-GI-NEXT:    bl __fixunsdfti
+; CHECK-GI-NEXT:    mov x8, #5183643171103440895 // =0x47efffffffffffff
+; CHECK-GI-NEXT:    fcmp d8, #0.0
+; CHECK-GI-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    fmov d0, x8
+; CHECK-GI-NEXT:    csel x8, xzr, x0, lt
+; CHECK-GI-NEXT:    csel x9, xzr, x1, lt
+; CHECK-GI-NEXT:    fcmp d8, d0
+; CHECK-GI-NEXT:    csinv x0, x8, xzr, le
+; CHECK-GI-NEXT:    csinv x1, x9, xzr, le
+; CHECK-GI-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
     %x = call i128 @llvm.fptoui.sat.i128.f64(double %f)
     ret i128 %x
 }
@@ -316,202 +408,423 @@ declare i100 @llvm.fptoui.sat.i100.f16(half)
 declare i128 @llvm.fptoui.sat.i128.f16(half)
 
 define i1 @test_unsigned_i1_f16(half %f) nounwind {
-; CHECK-CVT-LABEL: test_unsigned_i1_f16:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    fcvtzu w8, s0
-; CHECK-CVT-NEXT:    cmp w8, #1
-; CHECK-CVT-NEXT:    csinc w0, w8, wzr, lo
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_unsigned_i1_f16:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzu w8, h0
-; CHECK-FP16-NEXT:    cmp w8, #1
-; CHECK-FP16-NEXT:    csinc w0, w8, wzr, lo
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_unsigned_i1_f16:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvt s0, h0
+; CHECK-SD-CVT-NEXT:    fcvtzu w8, s0
+; CHECK-SD-CVT-NEXT:    cmp w8, #1
+; CHECK-SD-CVT-NEXT:    csinc w0, w8, wzr, lo
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_unsigned_i1_f16:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzu w8, h0
+; CHECK-SD-FP16-NEXT:    cmp w8, #1
+; CHECK-SD-FP16-NEXT:    csinc w0, w8, wzr, lo
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_i1_f16:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    fcvtzu w8, s0
+; CHECK-GI-CVT-NEXT:    cmp w8, #1
+; CHECK-GI-CVT-NEXT:    csinc w8, w8, wzr, lo
+; CHECK-GI-CVT-NEXT:    and w0, w8, #0x1
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_unsigned_i1_f16:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzu w8, h0
+; CHECK-GI-FP16-NEXT:    cmp w8, #1
+; CHECK-GI-FP16-NEXT:    csinc w8, w8, wzr, lo
+; CHECK-GI-FP16-NEXT:    and w0, w8, #0x1
+; CHECK-GI-FP16-NEXT:    ret
     %x = call i1 @llvm.fptoui.sat.i1.f16(half %f)
     ret i1 %x
 }
 
 define i8 @test_unsigned_i8_f16(half %f) nounwind {
-; CHECK-CVT-LABEL: test_unsigned_i8_f16:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    mov w8, #255 // =0xff
-; CHECK-CVT-NEXT:    fcvtzu w9, s0
-; CHECK-CVT-NEXT:    cmp w9, #255
-; CHECK-CVT-NEXT:    csel w0, w9, w8, lo
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_unsigned_i8_f16:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzu w9, h0
-; CHECK-FP16-NEXT:    mov w8, #255 // =0xff
-; CHECK-FP16-NEXT:    cmp w9, #255
-; CHECK-FP16-NEXT:    csel w0, w9, w8, lo
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_unsigned_i8_f16:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvt s0, h0
+; CHECK-SD-CVT-NEXT:    mov w8, #255 // =0xff
+; CHECK-SD-CVT-NEXT:    fcvtzu w9, s0
+; CHECK-SD-CVT-NEXT:    cmp w9, #255
+; CHECK-SD-CVT-NEXT:    csel w0, w9, w8, lo
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_unsigned_i8_f16:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzu w9, h0
+; CHECK-SD-FP16-NEXT:    mov w8, #255 // =0xff
+; CHECK-SD-FP16-NEXT:    cmp w9, #255
+; CHECK-SD-FP16-NEXT:    csel w0, w9, w8, lo
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_i8_f16:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    mov w8, #255 // =0xff
+; CHECK-GI-CVT-NEXT:    fcvtzu w9, s0
+; CHECK-GI-CVT-NEXT:    cmp w9, #255
+; CHECK-GI-CVT-NEXT:    csel w0, w9, w8, lo
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_unsigned_i8_f16:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzu w9, h0
+; CHECK-GI-FP16-NEXT:    mov w8, #255 // =0xff
+; CHECK-GI-FP16-NEXT:    cmp w9, #255
+; CHECK-GI-FP16-NEXT:    csel w0, w9, w8, lo
+; CHECK-GI-FP16-NEXT:    ret
     %x = call i8 @llvm.fptoui.sat.i8.f16(half %f)
     ret i8 %x
 }
 
 define i13 @test_unsigned_i13_f16(half %f) nounwind {
-; CHECK-CVT-LABEL: test_unsigned_i13_f16:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    mov w9, #8191 // =0x1fff
-; CHECK-CVT-NEXT:    fcvtzu w8, s0
-; CHECK-CVT-NEXT:    cmp w8, w9
-; CHECK-CVT-NEXT:    csel w0, w8, w9, lo
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_unsigned_i13_f16:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzu w8, h0
-; CHECK-FP16-NEXT:    mov w9, #8191 // =0x1fff
-; CHECK-FP16-NEXT:    cmp w8, w9
-; CHECK-FP16-NEXT:    csel w0, w8, w9, lo
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_unsigned_i13_f16:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvt s0, h0
+; CHECK-SD-CVT-NEXT:    mov w9, #8191 // =0x1fff
+; CHECK-SD-CVT-NEXT:    fcvtzu w8, s0
+; CHECK-SD-CVT-NEXT:    cmp w8, w9
+; CHECK-SD-CVT-NEXT:    csel w0, w8, w9, lo
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_unsigned_i13_f16:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzu w8, h0
+; CHECK-SD-FP16-NEXT:    mov w9, #8191 // =0x1fff
+; CHECK-SD-FP16-NEXT:    cmp w8, w9
+; CHECK-SD-FP16-NEXT:    csel w0, w8, w9, lo
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_i13_f16:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    mov w9, #8191 // =0x1fff
+; CHECK-GI-CVT-NEXT:    fcvtzu w8, s0
+; CHECK-GI-CVT-NEXT:    cmp w8, w9
+; CHECK-GI-CVT-NEXT:    csel w0, w8, w9, lo
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_unsigned_i13_f16:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzu w8, h0
+; CHECK-GI-FP16-NEXT:    mov w9, #8191 // =0x1fff
+; CHECK-GI-FP16-NEXT:    cmp w8, w9
+; CHECK-GI-FP16-NEXT:    csel w0, w8, w9, lo
+; CHECK-GI-FP16-NEXT:    ret
     %x = call i13 @llvm.fptoui.sat.i13.f16(half %f)
     ret i13 %x
 }
 
 define i16 @test_unsigned_i16_f16(half %f) nounwind {
-; CHECK-CVT-LABEL: test_unsigned_i16_f16:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    mov w9, #65535 // =0xffff
-; CHECK-CVT-NEXT:    fcvtzu w8, s0
-; CHECK-CVT-NEXT:    cmp w8, w9
-; CHECK-CVT-NEXT:    csel w0, w8, w9, lo
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_unsigned_i16_f16:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzu w8, h0
-; CHECK-FP16-NEXT:    mov w9, #65535 // =0xffff
-; CHECK-FP16-NEXT:    cmp w8, w9
-; CHECK-FP16-NEXT:    csel w0, w8, w9, lo
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_unsigned_i16_f16:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvt s0, h0
+; CHECK-SD-CVT-NEXT:    mov w9, #65535 // =0xffff
+; CHECK-SD-CVT-NEXT:    fcvtzu w8, s0
+; CHECK-SD-CVT-NEXT:    cmp w8, w9
+; CHECK-SD-CVT-NEXT:    csel w0, w8, w9, lo
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_unsigned_i16_f16:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzu w8, h0
+; CHECK-SD-FP16-NEXT:    mov w9, #65535 // =0xffff
+; CHECK-SD-FP16-NEXT:    cmp w8, w9
+; CHECK-SD-FP16-NEXT:    csel w0, w8, w9, lo
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_i16_f16:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    mov w9, #65535 // =0xffff
+; CHECK-GI-CVT-NEXT:    fcvtzu w8, s0
+; CHECK-GI-CVT-NEXT:    cmp w8, w9
+; CHECK-GI-CVT-NEXT:    csel w0, w8, w9, lo
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_unsigned_i16_f16:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzu w8, h0
+; CHECK-GI-FP16-NEXT:    mov w9, #65535 // =0xffff
+; CHECK-GI-FP16-NEXT:    cmp w8, w9
+; CHECK-GI-FP16-NEXT:    csel w0, w8, w9, lo
+; CHECK-GI-FP16-NEXT:    ret
     %x = call i16 @llvm.fptoui.sat.i16.f16(half %f)
     ret i16 %x
 }
 
 define i19 @test_unsigned_i19_f16(half %f) nounwind {
-; CHECK-CVT-LABEL: test_unsigned_i19_f16:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    mov w9, #524287 // =0x7ffff
-; CHECK-CVT-NEXT:    fcvtzu w8, s0
-; CHECK-CVT-NEXT:    cmp w8, w9
-; CHECK-CVT-NEXT:    csel w0, w8, w9, lo
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_unsigned_i19_f16:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzu w8, h0
-; CHECK-FP16-NEXT:    mov w9, #524287 // =0x7ffff
-; CHECK-FP16-NEXT:    cmp w8, w9
-; CHECK-FP16-NEXT:    csel w0, w8, w9, lo
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_unsigned_i19_f16:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvt s0, h0
+; CHECK-SD-CVT-NEXT:    mov w9, #524287 // =0x7ffff
+; CHECK-SD-CVT-NEXT:    fcvtzu w8, s0
+; CHECK-SD-CVT-NEXT:    cmp w8, w9
+; CHECK-SD-CVT-NEXT:    csel w0, w8, w9, lo
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_unsigned_i19_f16:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzu w8, h0
+; CHECK-SD-FP16-NEXT:    mov w9, #524287 // =0x7ffff
+; CHECK-SD-FP16-NEXT:    cmp w8, w9
+; CHECK-SD-FP16-NEXT:    csel w0, w8, w9, lo
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_i19_f16:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    mov w9, #524287 // =0x7ffff
+; CHECK-GI-CVT-NEXT:    fcvtzu w8, s0
+; CHECK-GI-CVT-NEXT:    cmp w8, w9
+; CHECK-GI-CVT-NEXT:    csel w0, w8, w9, lo
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_unsigned_i19_f16:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzu w8, h0
+; CHECK-GI-FP16-NEXT:    mov w9, #524287 // =0x7ffff
+; CHECK-GI-FP16-NEXT:    cmp w8, w9
+; CHECK-GI-FP16-NEXT:    csel w0, w8, w9, lo
+; CHECK-GI-FP16-NEXT:    ret
     %x = call i19 @llvm.fptoui.sat.i19.f16(half %f)
     ret i19 %x
 }
 
 define i32 @test_unsigned_i32_f16(half %f) nounwind {
-; CHECK-CVT-LABEL: test_unsigned_i32_f16:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    fcvtzu w0, s0
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_unsigned_i32_f16:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzu w0, h0
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_unsigned_i32_f16:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvt s0, h0
+; CHECK-SD-CVT-NEXT:    fcvtzu w0, s0
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_unsigned_i32_f16:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzu w0, h0
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_i32_f16:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    fcvtzu w0, s0
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_unsigned_i32_f16:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzu w0, h0
+; CHECK-GI-FP16-NEXT:    ret
     %x = call i32 @llvm.fptoui.sat.i32.f16(half %f)
     ret i32 %x
 }
 
 define i50 @test_unsigned_i50_f16(half %f) nounwind {
-; CHECK-CVT-LABEL: test_unsigned_i50_f16:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    mov x9, #1125899906842623 // =0x3ffffffffffff
-; CHECK-CVT-NEXT:    fcvtzu x8, s0
-; CHECK-CVT-NEXT:    cmp x8, x9
-; CHECK-CVT-NEXT:    csel x0, x8, x9, lo
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_unsigned_i50_f16:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzu x8, h0
-; CHECK-FP16-NEXT:    mov x9, #1125899906842623 // =0x3ffffffffffff
-; CHECK-FP16-NEXT:    cmp x8, x9
-; CHECK-FP16-NEXT:    csel x0, x8, x9, lo
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_unsigned_i50_f16:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvt s0, h0
+; CHECK-SD-CVT-NEXT:    mov x9, #1125899906842623 // =0x3ffffffffffff
+; CHECK-SD-CVT-NEXT:    fcvtzu x8, s0
+; CHECK-SD-CVT-NEXT:    cmp x8, x9
+; CHECK-SD-CVT-NEXT:    csel x0, x8, x9, lo
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_unsigned_i50_f16:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzu x8, h0
+; CHECK-SD-FP16-NEXT:    mov x9, #1125899906842623 // =0x3ffffffffffff
+; CHECK-SD-FP16-NEXT:    cmp x8, x9
+; CHECK-SD-FP16-NEXT:    csel x0, x8, x9, lo
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_i50_f16:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    mov x9, #1125899906842623 // =0x3ffffffffffff
+; CHECK-GI-CVT-NEXT:    fcvtzu x8, s0
+; CHECK-GI-CVT-NEXT:    cmp x8, x9
+; CHECK-GI-CVT-NEXT:    csel x0, x8, x9, lo
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_unsigned_i50_f16:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzu x8, h0
+; CHECK-GI-FP16-NEXT:    mov x9, #1125899906842623 // =0x3ffffffffffff
+; CHECK-GI-FP16-NEXT:    cmp x8, x9
+; CHECK-GI-FP16-NEXT:    csel x0, x8, x9, lo
+; CHECK-GI-FP16-NEXT:    ret
     %x = call i50 @llvm.fptoui.sat.i50.f16(half %f)
     ret i50 %x
 }
 
 define i64 @test_unsigned_i64_f16(half %f) nounwind {
-; CHECK-CVT-LABEL: test_unsigned_i64_f16:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    fcvtzu x0, s0
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_unsigned_i64_f16:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzu x0, h0
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_unsigned_i64_f16:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvt s0, h0
+; CHECK-SD-CVT-NEXT:    fcvtzu x0, s0
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_unsigned_i64_f16:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzu x0, h0
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_i64_f16:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    fcvtzu x0, s0
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_unsigned_i64_f16:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzu x0, h0
+; CHECK-GI-FP16-NEXT:    ret
     %x = call i64 @llvm.fptoui.sat.i64.f16(half %f)
     ret i64 %x
 }
 
 define i100 @test_unsigned_i100_f16(half %f) nounwind {
-; CHECK-LABEL: test_unsigned_i100_f16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    mov w8, #1904214015 // =0x717fffff
-; CHECK-NEXT:    fcmp s8, #0.0
-; CHECK-NEXT:    mov x10, #68719476735 // =0xfffffffff
-; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, xzr, x1, lt
-; CHECK-NEXT:    fcmp s8, s0
-; CHECK-NEXT:    csel x1, x10, x9, gt
-; CHECK-NEXT:    csinv x0, x8, xzr, le
-; CHECK-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_i100_f16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixunssfti
+; CHECK-SD-NEXT:    mov w8, #1904214015 // =0x717fffff
+; CHECK-SD-NEXT:    fcmp s8, #0.0
+; CHECK-SD-NEXT:    mov x10, #68719476735 // =0xfffffffff
+; CHECK-SD-NEXT:    fmov s0, w8
+; CHECK-SD-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s0
+; CHECK-SD-NEXT:    csel x1, x10, x9, gt
+; CHECK-SD-NEXT:    csinv x0, x8, xzr, le
+; CHECK-SD-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_i100_f16:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    mov x1, xzr
+; CHECK-GI-CVT-NEXT:    fcvtzu x0, s0
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_unsigned_i100_f16:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzu x0, h0
+; CHECK-GI-FP16-NEXT:    mov x1, xzr
+; CHECK-GI-FP16-NEXT:    ret
     %x = call i100 @llvm.fptoui.sat.i100.f16(half %f)
     ret i100 %x
 }
 
 define i128 @test_unsigned_i128_f16(half %f) nounwind {
-; CHECK-LABEL: test_unsigned_i128_f16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    mov w8, #2139095039 // =0x7f7fffff
-; CHECK-NEXT:    fcmp s8, #0.0
-; CHECK-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    csel x8, xzr, x1, lt
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    fcmp s8, s0
-; CHECK-NEXT:    csinv x0, x9, xzr, le
-; CHECK-NEXT:    csinv x1, x8, xzr, le
-; CHECK-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_i128_f16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixunssfti
+; CHECK-SD-NEXT:    mov w8, #2139095039 // =0x7f7fffff
+; CHECK-SD-NEXT:    fcmp s8, #0.0
+; CHECK-SD-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    fmov s0, w8
+; CHECK-SD-NEXT:    csel x8, xzr, x1, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
+; CHECK-SD-NEXT:    fcmp s8, s0
+; CHECK-SD-NEXT:    csinv x0, x9, xzr, le
+; CHECK-SD-NEXT:    csinv x1, x8, xzr, le
+; CHECK-SD-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_i128_f16:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    mov x1, xzr
+; CHECK-GI-CVT-NEXT:    fcvtzu x0, s0
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_unsigned_i128_f16:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzu x0, h0
+; CHECK-GI-FP16-NEXT:    mov x1, xzr
+; CHECK-GI-FP16-NEXT:    ret
     %x = call i128 @llvm.fptoui.sat.i128.f16(half %f)
     ret i128 %x
 }
+
+define i32 @test_unsigned_f128_i32(fp128 %f) {
+; CHECK-SD-LABEL: test_unsigned_f128_i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sub sp, sp, #32
+; CHECK-SD-NEXT:    stp x30, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT:    .cfi_offset w19, -8
+; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    adrp x8, .LCPI30_0
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI30_0]
+; CHECK-SD-NEXT:    bl __getf2
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov w19, w0
+; CHECK-SD-NEXT:    bl __fixunstfsi
+; CHECK-SD-NEXT:    adrp x8, .LCPI30_1
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    cmp w19, #0
+; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI30_1]
+; CHECK-SD-NEXT:    csel w19, wzr, w0, lt
+; CHECK-SD-NEXT:    bl __gttf2
+; CHECK-SD-NEXT:    cmp w0, #0
+; CHECK-SD-NEXT:    csinv w0, w19, wzr, le
+; CHECK-SD-NEXT:    ldp x30, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    add sp, sp, #32
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_f128_i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    sub sp, sp, #64
+; CHECK-GI-NEXT:    stp d9, d8, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    .cfi_offset b8, -24
+; CHECK-GI-NEXT:    .cfi_offset b9, -32
+; CHECK-GI-NEXT:    adrp x8, .LCPI30_1
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI30_1]
+; CHECK-GI-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
+; CHECK-GI-NEXT:    bl __getf2
+; CHECK-GI-NEXT:    ldp q3, q2, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    mov d0, v3.d[1]
+; CHECK-GI-NEXT:    mov d1, v2.d[1]
+; CHECK-GI-NEXT:    fcsel d8, d3, d2, lt
+; CHECK-GI-NEXT:    fmov x8, d8
+; CHECK-GI-NEXT:    fcsel d9, d0, d1, lt
+; CHECK-GI-NEXT:    mov v0.d[0], x8
+; CHECK-GI-NEXT:    fmov x8, d9
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    adrp x8, .LCPI30_0
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI30_0]
+; CHECK-GI-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    bl __gttf2
+; CHECK-GI-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    mov d0, v1.d[1]
+; CHECK-GI-NEXT:    fcsel d1, d8, d1, gt
+; CHECK-GI-NEXT:    fmov x8, d1
+; CHECK-GI-NEXT:    fcsel d2, d9, d0, gt
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v0.d[0], x8
+; CHECK-GI-NEXT:    fmov x8, d2
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    add sp, sp, #64
+; CHECK-GI-NEXT:    b __fixunstfsi
+    %x = call i32 @llvm.fptoui.sat.i32.f128(fp128 %f)
+    ret i32 %x
+}
diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
index a3b94bcf18ab4b..40a865338cd85d 100644
--- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
@@ -1,6 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-CVT
-; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
+; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-CVT
+; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16
+; RUN: llc < %s -mtriple=aarch64 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-CVT
+; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16
+
+; CHECK-GI:       warning: Instruction selection used fallback path for test_unsigned_v4f32_v4i50
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for test_unsigned_v4f16_v4i50
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for test_unsigned_v8f16_v8i19
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for test_unsigned_v8f16_v8i50
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for test_unsigned_v8f16_v8i128
 
 ;
 ; Float to unsigned 32-bit -- Vector size variation
@@ -16,10 +24,17 @@ declare <7 x i32> @llvm.fptoui.sat.v7f32.v7i32 (<7 x float>)
 declare <8 x i32> @llvm.fptoui.sat.v8f32.v8i32 (<8 x float>)
 
 define <1 x i32> @test_unsigned_v1f32_v1i32(<1 x float> %f) {
-; CHECK-LABEL: test_unsigned_v1f32_v1i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu v0.2s, v0.2s
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_v1f32_v1i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu v0.2s, v0.2s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_v1f32_v1i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcvtzu w8, s0
+; CHECK-GI-NEXT:    mov v0.s[0], w8
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    ret
     %x = call <1 x i32> @llvm.fptoui.sat.v1f32.v1i32(<1 x float> %f)
     ret <1 x i32> %x
 }
@@ -52,79 +67,157 @@ define <4 x i32> @test_unsigned_v4f32_v4i32(<4 x float> %f) {
 }
 
 define <5 x i32> @test_unsigned_v5f32_v5i32(<5 x float> %f) {
-; CHECK-LABEL: test_unsigned_v5f32_v5i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
-; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
-; CHECK-NEXT:    // kill: def $s2 killed $s2 def $q2
-; CHECK-NEXT:    // kill: def $s3 killed $s3 def $q3
-; CHECK-NEXT:    // kill: def $s4 killed $s4 def $q4
-; CHECK-NEXT:    mov v0.s[1], v1.s[0]
-; CHECK-NEXT:    fcvtzu v4.4s, v4.4s
-; CHECK-NEXT:    mov v0.s[2], v2.s[0]
-; CHECK-NEXT:    fmov w4, s4
-; CHECK-NEXT:    mov v0.s[3], v3.s[0]
-; CHECK-NEXT:    fcvtzu v0.4s, v0.4s
-; CHECK-NEXT:    mov w1, v0.s[1]
-; CHECK-NEXT:    mov w2, v0.s[2]
-; CHECK-NEXT:    mov w3, v0.s[3]
-; CHECK-NEXT:    fmov w0, s0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_v5f32_v5i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 def $q1
+; CHECK-SD-NEXT:    // kill: def $s2 killed $s2 def $q2
+; CHECK-SD-NEXT:    // kill: def $s3 killed $s3 def $q3
+; CHECK-SD-NEXT:    // kill: def $s4 killed $s4 def $q4
+; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-SD-NEXT:    fcvtzu v4.4s, v4.4s
+; CHECK-SD-NEXT:    mov v0.s[2], v2.s[0]
+; CHECK-SD-NEXT:    fmov w4, s4
+; CHECK-SD-NEXT:    mov v0.s[3], v3.s[0]
+; CHECK-SD-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-SD-NEXT:    mov w1, v0.s[1]
+; CHECK-SD-NEXT:    mov w2, v0.s[2]
+; CHECK-SD-NEXT:    mov w3, v0.s[3]
+; CHECK-SD-NEXT:    fmov w0, s0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_v5f32_v5i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 def $q1
+; CHECK-GI-NEXT:    // kill: def $s2 killed $s2 def $q2
+; CHECK-GI-NEXT:    // kill: def $s3 killed $s3 def $q3
+; CHECK-GI-NEXT:    // kill: def $s4 killed $s4 def $q4
+; CHECK-GI-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-GI-NEXT:    fcvtzu v1.4s, v4.4s
+; CHECK-GI-NEXT:    mov v0.s[2], v2.s[0]
+; CHECK-GI-NEXT:    fmov w4, s1
+; CHECK-GI-NEXT:    mov v0.s[3], v3.s[0]
+; CHECK-GI-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-GI-NEXT:    mov s2, v0.s[1]
+; CHECK-GI-NEXT:    mov s3, v0.s[2]
+; CHECK-GI-NEXT:    mov s4, v0.s[3]
+; CHECK-GI-NEXT:    fmov w0, s0
+; CHECK-GI-NEXT:    fmov w1, s2
+; CHECK-GI-NEXT:    fmov w2, s3
+; CHECK-GI-NEXT:    fmov w3, s4
+; CHECK-GI-NEXT:    ret
     %x = call <5 x i32> @llvm.fptoui.sat.v5f32.v5i32(<5 x float> %f)
     ret <5 x i32> %x
 }
 
 define <6 x i32> @test_unsigned_v6f32_v6i32(<6 x float> %f) {
-; CHECK-LABEL: test_unsigned_v6f32_v6i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
-; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
-; CHECK-NEXT:    // kill: def $s2 killed $s2 def $q2
-; CHECK-NEXT:    // kill: def $s4 killed $s4 def $q4
-; CHECK-NEXT:    // kill: def $s5 killed $s5 def $q5
-; CHECK-NEXT:    // kill: def $s3 killed $s3 def $q3
-; CHECK-NEXT:    mov v0.s[1], v1.s[0]
-; CHECK-NEXT:    mov v4.s[1], v5.s[0]
-; CHECK-NEXT:    mov v0.s[2], v2.s[0]
-; CHECK-NEXT:    fcvtzu v1.4s, v4.4s
-; CHECK-NEXT:    mov v0.s[3], v3.s[0]
-; CHECK-NEXT:    mov w5, v1.s[1]
-; CHECK-NEXT:    fmov w4, s1
-; CHECK-NEXT:    fcvtzu v0.4s, v0.4s
-; CHECK-NEXT:    mov w1, v0.s[1]
-; CHECK-NEXT:    mov w2, v0.s[2]
-; CHECK-NEXT:    mov w3, v0.s[3]
-; CHECK-NEXT:    fmov w0, s0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_v6f32_v6i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 def $q1
+; CHECK-SD-NEXT:    // kill: def $s2 killed $s2 def $q2
+; CHECK-SD-NEXT:    // kill: def $s4 killed $s4 def $q4
+; CHECK-SD-NEXT:    // kill: def $s5 killed $s5 def $q5
+; CHECK-SD-NEXT:    // kill: def $s3 killed $s3 def $q3
+; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-SD-NEXT:    mov v4.s[1], v5.s[0]
+; CHECK-SD-NEXT:    mov v0.s[2], v2.s[0]
+; CHECK-SD-NEXT:    fcvtzu v1.4s, v4.4s
+; CHECK-SD-NEXT:    mov v0.s[3], v3.s[0]
+; CHECK-SD-NEXT:    mov w5, v1.s[1]
+; CHECK-SD-NEXT:    fmov w4, s1
+; CHECK-SD-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-SD-NEXT:    mov w1, v0.s[1]
+; CHECK-SD-NEXT:    mov w2, v0.s[2]
+; CHECK-SD-NEXT:    mov w3, v0.s[3]
+; CHECK-SD-NEXT:    fmov w0, s0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_v6f32_v6i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 def $q1
+; CHECK-GI-NEXT:    // kill: def $s2 killed $s2 def $q2
+; CHECK-GI-NEXT:    // kill: def $s4 killed $s4 def $q4
+; CHECK-GI-NEXT:    // kill: def $s3 killed $s3 def $q3
+; CHECK-GI-NEXT:    // kill: def $s5 killed $s5 def $q5
+; CHECK-GI-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-GI-NEXT:    mov v4.s[1], v5.s[0]
+; CHECK-GI-NEXT:    mov v0.s[2], v2.s[0]
+; CHECK-GI-NEXT:    fcvtzu v1.4s, v4.4s
+; CHECK-GI-NEXT:    mov v0.s[3], v3.s[0]
+; CHECK-GI-NEXT:    mov s4, v1.s[1]
+; CHECK-GI-NEXT:    fmov w4, s1
+; CHECK-GI-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-GI-NEXT:    fmov w5, s4
+; CHECK-GI-NEXT:    mov s2, v0.s[1]
+; CHECK-GI-NEXT:    mov s3, v0.s[2]
+; CHECK-GI-NEXT:    mov s5, v0.s[3]
+; CHECK-GI-NEXT:    fmov w0, s0
+; CHECK-GI-NEXT:    fmov w1, s2
+; CHECK-GI-NEXT:    fmov w2, s3
+; CHECK-GI-NEXT:    fmov w3, s5
+; CHECK-GI-NEXT:    ret
     %x = call <6 x i32> @llvm.fptoui.sat.v6f32.v6i32(<6 x float> %f)
     ret <6 x i32> %x
 }
 
 define <7 x i32> @test_unsigned_v7f32_v7i32(<7 x float> %f) {
-; CHECK-LABEL: test_unsigned_v7f32_v7i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
-; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
-; CHECK-NEXT:    // kill: def $s4 killed $s4 def $q4
-; CHECK-NEXT:    // kill: def $s5 killed $s5 def $q5
-; CHECK-NEXT:    // kill: def $s2 killed $s2 def $q2
-; CHECK-NEXT:    // kill: def $s6 killed $s6 def $q6
-; CHECK-NEXT:    // kill: def $s3 killed $s3 def $q3
-; CHECK-NEXT:    mov v0.s[1], v1.s[0]
-; CHECK-NEXT:    mov v4.s[1], v5.s[0]
-; CHECK-NEXT:    mov v0.s[2], v2.s[0]
-; CHECK-NEXT:    mov v4.s[2], v6.s[0]
-; CHECK-NEXT:    mov v0.s[3], v3.s[0]
-; CHECK-NEXT:    fcvtzu v1.4s, v4.4s
-; CHECK-NEXT:    fcvtzu v0.4s, v0.4s
-; CHECK-NEXT:    mov w5, v1.s[1]
-; CHECK-NEXT:    mov w6, v1.s[2]
-; CHECK-NEXT:    fmov w4, s1
-; CHECK-NEXT:    mov w1, v0.s[1]
-; CHECK-NEXT:    mov w2, v0.s[2]
-; CHECK-NEXT:    mov w3, v0.s[3]
-; CHECK-NEXT:    fmov w0, s0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_v7f32_v7i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 def $q1
+; CHECK-SD-NEXT:    // kill: def $s4 killed $s4 def $q4
+; CHECK-SD-NEXT:    // kill: def $s5 killed $s5 def $q5
+; CHECK-SD-NEXT:    // kill: def $s2 killed $s2 def $q2
+; CHECK-SD-NEXT:    // kill: def $s6 killed $s6 def $q6
+; CHECK-SD-NEXT:    // kill: def $s3 killed $s3 def $q3
+; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-SD-NEXT:    mov v4.s[1], v5.s[0]
+; CHECK-SD-NEXT:    mov v0.s[2], v2.s[0]
+; CHECK-SD-NEXT:    mov v4.s[2], v6.s[0]
+; CHECK-SD-NEXT:    mov v0.s[3], v3.s[0]
+; CHECK-SD-NEXT:    fcvtzu v1.4s, v4.4s
+; CHECK-SD-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-SD-NEXT:    mov w5, v1.s[1]
+; CHECK-SD-NEXT:    mov w6, v1.s[2]
+; CHECK-SD-NEXT:    fmov w4, s1
+; CHECK-SD-NEXT:    mov w1, v0.s[1]
+; CHECK-SD-NEXT:    mov w2, v0.s[2]
+; CHECK-SD-NEXT:    mov w3, v0.s[3]
+; CHECK-SD-NEXT:    fmov w0, s0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_v7f32_v7i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 def $q1
+; CHECK-GI-NEXT:    // kill: def $s4 killed $s4 def $q4
+; CHECK-GI-NEXT:    // kill: def $s2 killed $s2 def $q2
+; CHECK-GI-NEXT:    // kill: def $s5 killed $s5 def $q5
+; CHECK-GI-NEXT:    // kill: def $s3 killed $s3 def $q3
+; CHECK-GI-NEXT:    // kill: def $s6 killed $s6 def $q6
+; CHECK-GI-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-GI-NEXT:    mov v4.s[1], v5.s[0]
+; CHECK-GI-NEXT:    mov v0.s[2], v2.s[0]
+; CHECK-GI-NEXT:    mov v4.s[2], v6.s[0]
+; CHECK-GI-NEXT:    mov v0.s[3], v3.s[0]
+; CHECK-GI-NEXT:    fcvtzu v1.4s, v4.4s
+; CHECK-GI-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-GI-NEXT:    mov s5, v1.s[1]
+; CHECK-GI-NEXT:    mov s6, v1.s[2]
+; CHECK-GI-NEXT:    fmov w4, s1
+; CHECK-GI-NEXT:    mov s2, v0.s[1]
+; CHECK-GI-NEXT:    mov s3, v0.s[2]
+; CHECK-GI-NEXT:    mov s4, v0.s[3]
+; CHECK-GI-NEXT:    fmov w0, s0
+; CHECK-GI-NEXT:    fmov w5, s5
+; CHECK-GI-NEXT:    fmov w6, s6
+; CHECK-GI-NEXT:    fmov w1, s2
+; CHECK-GI-NEXT:    fmov w2, s3
+; CHECK-GI-NEXT:    fmov w3, s4
+; CHECK-GI-NEXT:    ret
     %x = call <7 x i32> @llvm.fptoui.sat.v7f32.v7i32(<7 x float> %f)
     ret <7 x i32> %x
 }
@@ -151,86 +244,201 @@ declare <5 x i32> @llvm.fptoui.sat.v5f64.v5i32 (<5 x double>)
 declare <6 x i32> @llvm.fptoui.sat.v6f64.v6i32 (<6 x double>)
 
 define <1 x i32> @test_unsigned_v1f64_v1i32(<1 x double> %f) {
-; CHECK-LABEL: test_unsigned_v1f64_v1i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu w8, d0
-; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_v1f64_v1i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu w8, d0
+; CHECK-SD-NEXT:    fmov s0, w8
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_v1f64_v1i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcvtzu w8, d0
+; CHECK-GI-NEXT:    mov v0.s[0], w8
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    ret
     %x = call <1 x i32> @llvm.fptoui.sat.v1f64.v1i32(<1 x double> %f)
     ret <1 x i32> %x
 }
 
 define <2 x i32> @test_unsigned_v2f64_v2i32(<2 x double> %f) {
-; CHECK-LABEL: test_unsigned_v2f64_v2i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov d1, v0.d[1]
-; CHECK-NEXT:    fcvtzu w8, d0
-; CHECK-NEXT:    fcvtzu w9, d1
-; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    mov v0.s[1], w9
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_v2f64_v2i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov d1, v0.d[1]
+; CHECK-SD-NEXT:    fcvtzu w8, d0
+; CHECK-SD-NEXT:    fcvtzu w9, d1
+; CHECK-SD-NEXT:    fmov s0, w8
+; CHECK-SD-NEXT:    mov v0.s[1], w9
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_v2f64_v2i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v1.2d, #0x000000ffffffff
+; CHECK-GI-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-GI-NEXT:    cmhi v2.2d, v1.2d, v0.2d
+; CHECK-GI-NEXT:    bif v0.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT:    xtn v0.2s, v0.2d
+; CHECK-GI-NEXT:    ret
     %x = call <2 x i32> @llvm.fptoui.sat.v2f64.v2i32(<2 x double> %f)
     ret <2 x i32> %x
 }
 
 define <3 x i32> @test_unsigned_v3f64_v3i32(<3 x double> %f) {
-; CHECK-LABEL: test_unsigned_v3f64_v3i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu w8, d0
-; CHECK-NEXT:    fcvtzu w9, d1
-; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    fcvtzu w8, d2
-; CHECK-NEXT:    mov v0.s[1], w9
-; CHECK-NEXT:    mov v0.s[2], w8
-; CHECK-NEXT:    fcvtzu w8, d0
-; CHECK-NEXT:    mov v0.s[3], w8
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_v3f64_v3i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu w8, d0
+; CHECK-SD-NEXT:    fcvtzu w9, d1
+; CHECK-SD-NEXT:    fmov s0, w8
+; CHECK-SD-NEXT:    fcvtzu w8, d2
+; CHECK-SD-NEXT:    mov v0.s[1], w9
+; CHECK-SD-NEXT:    mov v0.s[2], w8
+; CHECK-SD-NEXT:    fcvtzu w8, d0
+; CHECK-SD-NEXT:    mov v0.s[3], w8
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_v3f64_v3i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    fcvtzu v1.2d, v2.2d
+; CHECK-GI-NEXT:    movi v2.2d, #0x000000ffffffff
+; CHECK-GI-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-GI-NEXT:    cmhi v4.2d, v2.2d, v1.2d
+; CHECK-GI-NEXT:    bif v1.16b, v2.16b, v4.16b
+; CHECK-GI-NEXT:    cmhi v3.2d, v2.2d, v0.2d
+; CHECK-GI-NEXT:    bif v0.16b, v2.16b, v3.16b
+; CHECK-GI-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    ret
     %x = call <3 x i32> @llvm.fptoui.sat.v3f64.v3i32(<3 x double> %f)
     ret <3 x i32> %x
 }
 
 define <4 x i32> @test_unsigned_v4f64_v4i32(<4 x double> %f) {
-; CHECK-LABEL: test_unsigned_v4f64_v4i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov d2, v0.d[1]
-; CHECK-NEXT:    fcvtzu w8, d0
-; CHECK-NEXT:    fcvtzu w9, d2
-; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    fcvtzu w8, d1
-; CHECK-NEXT:    mov d1, v1.d[1]
-; CHECK-NEXT:    mov v0.s[1], w9
-; CHECK-NEXT:    mov v0.s[2], w8
-; CHECK-NEXT:    fcvtzu w8, d1
-; CHECK-NEXT:    mov v0.s[3], w8
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_v4f64_v4i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov d2, v0.d[1]
+; CHECK-SD-NEXT:    fcvtzu w8, d0
+; CHECK-SD-NEXT:    fcvtzu w9, d2
+; CHECK-SD-NEXT:    fmov s0, w8
+; CHECK-SD-NEXT:    fcvtzu w8, d1
+; CHECK-SD-NEXT:    mov d1, v1.d[1]
+; CHECK-SD-NEXT:    mov v0.s[1], w9
+; CHECK-SD-NEXT:    mov v0.s[2], w8
+; CHECK-SD-NEXT:    fcvtzu w8, d1
+; CHECK-SD-NEXT:    mov v0.s[3], w8
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_v4f64_v4i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v2.2d, #0x000000ffffffff
+; CHECK-GI-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-GI-NEXT:    fcvtzu v1.2d, v1.2d
+; CHECK-GI-NEXT:    cmhi v3.2d, v2.2d, v0.2d
+; CHECK-GI-NEXT:    cmhi v4.2d, v2.2d, v1.2d
+; CHECK-GI-NEXT:    bif v0.16b, v2.16b, v3.16b
+; CHECK-GI-NEXT:    bif v1.16b, v2.16b, v4.16b
+; CHECK-GI-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    ret
     %x = call <4 x i32> @llvm.fptoui.sat.v4f64.v4i32(<4 x double> %f)
     ret <4 x i32> %x
 }
 
 define <5 x i32> @test_unsigned_v5f64_v5i32(<5 x double> %f) {
-; CHECK-LABEL: test_unsigned_v5f64_v5i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu w0, d0
-; CHECK-NEXT:    fcvtzu w1, d1
-; CHECK-NEXT:    fcvtzu w2, d2
-; CHECK-NEXT:    fcvtzu w3, d3
-; CHECK-NEXT:    fcvtzu w4, d4
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_v5f64_v5i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu w0, d0
+; CHECK-SD-NEXT:    fcvtzu w1, d1
+; CHECK-SD-NEXT:    fcvtzu w2, d2
+; CHECK-SD-NEXT:    fcvtzu w3, d3
+; CHECK-SD-NEXT:    fcvtzu w4, d4
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_v5f64_v5i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
+; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    mov v2.d[1], v3.d[0]
+; CHECK-GI-NEXT:    movi v1.2d, #0x000000ffffffff
+; CHECK-GI-NEXT:    fcvtzu v3.2d, v4.2d
+; CHECK-GI-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-GI-NEXT:    fcvtzu v2.2d, v2.2d
+; CHECK-GI-NEXT:    cmhi v4.2d, v1.2d, v0.2d
+; CHECK-GI-NEXT:    cmhi v5.2d, v1.2d, v2.2d
+; CHECK-GI-NEXT:    bif v0.16b, v1.16b, v4.16b
+; CHECK-GI-NEXT:    bif v2.16b, v1.16b, v5.16b
+; CHECK-GI-NEXT:    cmhi v4.2d, v1.2d, v3.2d
+; CHECK-GI-NEXT:    bit v1.16b, v3.16b, v4.16b
+; CHECK-GI-NEXT:    mov d3, v0.d[1]
+; CHECK-GI-NEXT:    mov d4, v2.d[1]
+; CHECK-GI-NEXT:    fmov x0, d0
+; CHECK-GI-NEXT:    fmov x2, d2
+; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-GI-NEXT:    // kill: def $w2 killed $w2 killed $x2
+; CHECK-GI-NEXT:    fmov x4, d1
+; CHECK-GI-NEXT:    fmov x1, d3
+; CHECK-GI-NEXT:    fmov x3, d4
+; CHECK-GI-NEXT:    // kill: def $w4 killed $w4 killed $x4
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 killed $x1
+; CHECK-GI-NEXT:    // kill: def $w3 killed $w3 killed $x3
+; CHECK-GI-NEXT:    ret
     %x = call <5 x i32> @llvm.fptoui.sat.v5f64.v5i32(<5 x double> %f)
     ret <5 x i32> %x
 }
 
 define <6 x i32> @test_unsigned_v6f64_v6i32(<6 x double> %f) {
-; CHECK-LABEL: test_unsigned_v6f64_v6i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu w0, d0
-; CHECK-NEXT:    fcvtzu w1, d1
-; CHECK-NEXT:    fcvtzu w2, d2
-; CHECK-NEXT:    fcvtzu w3, d3
-; CHECK-NEXT:    fcvtzu w4, d4
-; CHECK-NEXT:    fcvtzu w5, d5
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_v6f64_v6i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu w0, d0
+; CHECK-SD-NEXT:    fcvtzu w1, d1
+; CHECK-SD-NEXT:    fcvtzu w2, d2
+; CHECK-SD-NEXT:    fcvtzu w3, d3
+; CHECK-SD-NEXT:    fcvtzu w4, d4
+; CHECK-SD-NEXT:    fcvtzu w5, d5
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_v6f64_v6i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-GI-NEXT:    // kill: def $d5 killed $d5 def $q5
+; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    mov v2.d[1], v3.d[0]
+; CHECK-GI-NEXT:    mov v4.d[1], v5.d[0]
+; CHECK-GI-NEXT:    movi v1.2d, #0x000000ffffffff
+; CHECK-GI-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-GI-NEXT:    fcvtzu v2.2d, v2.2d
+; CHECK-GI-NEXT:    fcvtzu v3.2d, v4.2d
+; CHECK-GI-NEXT:    cmhi v4.2d, v1.2d, v0.2d
+; CHECK-GI-NEXT:    cmhi v5.2d, v1.2d, v2.2d
+; CHECK-GI-NEXT:    cmhi v6.2d, v1.2d, v3.2d
+; CHECK-GI-NEXT:    bif v0.16b, v1.16b, v4.16b
+; CHECK-GI-NEXT:    bif v2.16b, v1.16b, v5.16b
+; CHECK-GI-NEXT:    bit v1.16b, v3.16b, v6.16b
+; CHECK-GI-NEXT:    mov d3, v0.d[1]
+; CHECK-GI-NEXT:    mov d4, v2.d[1]
+; CHECK-GI-NEXT:    mov d5, v1.d[1]
+; CHECK-GI-NEXT:    fmov x0, d0
+; CHECK-GI-NEXT:    fmov x2, d2
+; CHECK-GI-NEXT:    fmov x4, d1
+; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-GI-NEXT:    // kill: def $w2 killed $w2 killed $x2
+; CHECK-GI-NEXT:    // kill: def $w4 killed $w4 killed $x4
+; CHECK-GI-NEXT:    fmov x1, d3
+; CHECK-GI-NEXT:    fmov x3, d4
+; CHECK-GI-NEXT:    fmov x5, d5
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 killed $x1
+; CHECK-GI-NEXT:    // kill: def $w3 killed $w3 killed $x3
+; CHECK-GI-NEXT:    // kill: def $w5 killed $w5 killed $x5
+; CHECK-GI-NEXT:    ret
     %x = call <6 x i32> @llvm.fptoui.sat.v6f64.v6i32(<6 x double> %f)
     ret <6 x i32> %x
 }
@@ -245,237 +453,592 @@ declare <3 x i32> @llvm.fptoui.sat.v3f128.v3i32 (<3 x fp128>)
 declare <4 x i32> @llvm.fptoui.sat.v4f128.v4i32 (<4 x fp128>)
 
 define <1 x i32> @test_unsigned_v1f128_v1i32(<1 x fp128> %f) {
-; CHECK-LABEL: test_unsigned_v1f128_v1i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #32
-; CHECK-NEXT:    stp x30, x19, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    .cfi_offset w19, -8
-; CHECK-NEXT:    .cfi_offset w30, -16
-; CHECK-NEXT:    adrp x8, .LCPI14_0
-; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI14_0]
-; CHECK-NEXT:    bl __getf2
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    mov w19, w0
-; CHECK-NEXT:    bl __fixunstfsi
-; CHECK-NEXT:    adrp x8, .LCPI14_1
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    cmp w19, #0
-; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI14_1]
-; CHECK-NEXT:    csel w19, wzr, w0, lt
-; CHECK-NEXT:    bl __gttf2
-; CHECK-NEXT:    cmp w0, #0
-; CHECK-NEXT:    csinv w8, w19, wzr, le
-; CHECK-NEXT:    ldp x30, x19, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    add sp, sp, #32
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_v1f128_v1i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sub sp, sp, #32
+; CHECK-SD-NEXT:    stp x30, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT:    .cfi_offset w19, -8
+; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    adrp x8, .LCPI14_0
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI14_0]
+; CHECK-SD-NEXT:    bl __getf2
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov w19, w0
+; CHECK-SD-NEXT:    bl __fixunstfsi
+; CHECK-SD-NEXT:    adrp x8, .LCPI14_1
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    cmp w19, #0
+; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI14_1]
+; CHECK-SD-NEXT:    csel w19, wzr, w0, lt
+; CHECK-SD-NEXT:    bl __gttf2
+; CHECK-SD-NEXT:    cmp w0, #0
+; CHECK-SD-NEXT:    csinv w8, w19, wzr, le
+; CHECK-SD-NEXT:    ldp x30, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    fmov s0, w8
+; CHECK-SD-NEXT:    add sp, sp, #32
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_v1f128_v1i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    sub sp, sp, #64
+; CHECK-GI-NEXT:    stp d9, d8, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    .cfi_offset b8, -24
+; CHECK-GI-NEXT:    .cfi_offset b9, -32
+; CHECK-GI-NEXT:    adrp x8, .LCPI14_1
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI14_1]
+; CHECK-GI-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
+; CHECK-GI-NEXT:    bl __getf2
+; CHECK-GI-NEXT:    ldp q3, q2, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    mov d0, v3.d[1]
+; CHECK-GI-NEXT:    mov d1, v2.d[1]
+; CHECK-GI-NEXT:    fcsel d8, d3, d2, lt
+; CHECK-GI-NEXT:    fmov x8, d8
+; CHECK-GI-NEXT:    fcsel d9, d0, d1, lt
+; CHECK-GI-NEXT:    mov v0.d[0], x8
+; CHECK-GI-NEXT:    fmov x8, d9
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    adrp x8, .LCPI14_0
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI14_0]
+; CHECK-GI-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    bl __gttf2
+; CHECK-GI-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    mov d0, v1.d[1]
+; CHECK-GI-NEXT:    fcsel d1, d8, d1, gt
+; CHECK-GI-NEXT:    fmov x8, d1
+; CHECK-GI-NEXT:    fcsel d2, d9, d0, gt
+; CHECK-GI-NEXT:    mov v0.d[0], x8
+; CHECK-GI-NEXT:    fmov x8, d2
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    bl __fixunstfsi
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v0.s[0], w0
+; CHECK-GI-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    add sp, sp, #64
+; CHECK-GI-NEXT:    ret
     %x = call <1 x i32> @llvm.fptoui.sat.v1f128.v1i32(<1 x fp128> %f)
     ret <1 x i32> %x
 }
 
 define <2 x i32> @test_unsigned_v2f128_v2i32(<2 x fp128> %f) {
-; CHECK-LABEL: test_unsigned_v2f128_v2i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #96
-; CHECK-NEXT:    str x30, [sp, #64] // 8-byte Folded Spill
-; CHECK-NEXT:    stp x20, x19, [sp, #80] // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 96
-; CHECK-NEXT:    .cfi_offset w19, -8
-; CHECK-NEXT:    .cfi_offset w20, -16
-; CHECK-NEXT:    .cfi_offset w30, -32
-; CHECK-NEXT:    mov v2.16b, v1.16b
-; CHECK-NEXT:    stp q1, q0, [sp, #32] // 32-byte Folded Spill
-; CHECK-NEXT:    adrp x8, .LCPI15_0
-; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI15_0]
-; CHECK-NEXT:    mov v0.16b, v2.16b
-; CHECK-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT:    bl __getf2
-; CHECK-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT:    mov w19, w0
-; CHECK-NEXT:    bl __fixunstfsi
-; CHECK-NEXT:    adrp x8, .LCPI15_1
-; CHECK-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT:    cmp w19, #0
-; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI15_1]
-; CHECK-NEXT:    csel w19, wzr, w0, lt
-; CHECK-NEXT:    str q1, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    bl __gttf2
-; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    cmp w0, #0
-; CHECK-NEXT:    csinv w20, w19, wzr, le
-; CHECK-NEXT:    bl __getf2
-; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    mov w19, w0
-; CHECK-NEXT:    bl __fixunstfsi
-; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    cmp w19, #0
-; CHECK-NEXT:    csel w19, wzr, w0, lt
-; CHECK-NEXT:    bl __gttf2
-; CHECK-NEXT:    cmp w0, #0
-; CHECK-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
-; CHECK-NEXT:    csinv w8, w19, wzr, le
-; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    mov v0.s[1], w20
-; CHECK-NEXT:    ldp x20, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT:    add sp, sp, #96
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_v2f128_v2i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sub sp, sp, #96
+; CHECK-SD-NEXT:    str x30, [sp, #64] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    stp x20, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 96
+; CHECK-SD-NEXT:    .cfi_offset w19, -8
+; CHECK-SD-NEXT:    .cfi_offset w20, -16
+; CHECK-SD-NEXT:    .cfi_offset w30, -32
+; CHECK-SD-NEXT:    mov v2.16b, v1.16b
+; CHECK-SD-NEXT:    stp q1, q0, [sp, #32] // 32-byte Folded Spill
+; CHECK-SD-NEXT:    adrp x8, .LCPI15_0
+; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI15_0]
+; CHECK-SD-NEXT:    mov v0.16b, v2.16b
+; CHECK-SD-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    bl __getf2
+; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov w19, w0
+; CHECK-SD-NEXT:    bl __fixunstfsi
+; CHECK-SD-NEXT:    adrp x8, .LCPI15_1
+; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    cmp w19, #0
+; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI15_1]
+; CHECK-SD-NEXT:    csel w19, wzr, w0, lt
+; CHECK-SD-NEXT:    str q1, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    bl __gttf2
+; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    cmp w0, #0
+; CHECK-SD-NEXT:    csinv w20, w19, wzr, le
+; CHECK-SD-NEXT:    bl __getf2
+; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov w19, w0
+; CHECK-SD-NEXT:    bl __fixunstfsi
+; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    cmp w19, #0
+; CHECK-SD-NEXT:    csel w19, wzr, w0, lt
+; CHECK-SD-NEXT:    bl __gttf2
+; CHECK-SD-NEXT:    cmp w0, #0
+; CHECK-SD-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    csinv w8, w19, wzr, le
+; CHECK-SD-NEXT:    fmov s0, w8
+; CHECK-SD-NEXT:    mov v0.s[1], w20
+; CHECK-SD-NEXT:    ldp x20, x19, [sp, #80] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    add sp, sp, #96
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_v2f128_v2i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    sub sp, sp, #96
+; CHECK-GI-NEXT:    stp d11, d10, [sp, #48] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp d9, d8, [sp, #64] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 96
+; CHECK-GI-NEXT:    .cfi_offset w19, -8
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    .cfi_offset b8, -24
+; CHECK-GI-NEXT:    .cfi_offset b9, -32
+; CHECK-GI-NEXT:    .cfi_offset b10, -40
+; CHECK-GI-NEXT:    .cfi_offset b11, -48
+; CHECK-GI-NEXT:    adrp x8, .LCPI15_1
+; CHECK-GI-NEXT:    str q1, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI15_1]
+; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    mov v1.16b, v2.16b
+; CHECK-GI-NEXT:    str q2, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    bl __getf2
+; CHECK-GI-NEXT:    ldp q2, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    mov d0, v2.d[1]
+; CHECK-GI-NEXT:    mov d8, v1.d[1]
+; CHECK-GI-NEXT:    fcsel d9, d2, d1, lt
+; CHECK-GI-NEXT:    fmov x8, d9
+; CHECK-GI-NEXT:    fcsel d10, d0, d8, lt
+; CHECK-GI-NEXT:    mov v0.d[0], x8
+; CHECK-GI-NEXT:    fmov x8, d10
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    adrp x8, .LCPI15_0
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI15_0]
+; CHECK-GI-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    bl __gttf2
+; CHECK-GI-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    mov d11, v0.d[1]
+; CHECK-GI-NEXT:    fcsel d0, d9, d0, gt
+; CHECK-GI-NEXT:    fmov x8, d0
+; CHECK-GI-NEXT:    fcsel d1, d10, d11, gt
+; CHECK-GI-NEXT:    mov v0.d[0], x8
+; CHECK-GI-NEXT:    fmov x8, d1
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    bl __fixunstfsi
+; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov w19, w0
+; CHECK-GI-NEXT:    bl __getf2
+; CHECK-GI-NEXT:    ldp q3, q1, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    ldr q2, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov d0, v3.d[1]
+; CHECK-GI-NEXT:    fcsel d9, d3, d2, lt
+; CHECK-GI-NEXT:    fmov x8, d9
+; CHECK-GI-NEXT:    fcsel d8, d0, d8, lt
+; CHECK-GI-NEXT:    mov v0.d[0], x8
+; CHECK-GI-NEXT:    fmov x8, d8
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    bl __gttf2
+; CHECK-GI-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    fcsel d1, d8, d11, gt
+; CHECK-GI-NEXT:    fcsel d0, d9, d0, gt
+; CHECK-GI-NEXT:    fmov x8, d0
+; CHECK-GI-NEXT:    mov v0.d[0], x8
+; CHECK-GI-NEXT:    fmov x8, d1
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    bl __fixunstfsi
+; CHECK-GI-NEXT:    mov v0.s[0], w19
+; CHECK-GI-NEXT:    ldp x30, x19, [sp, #80] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v0.s[1], w0
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    add sp, sp, #96
+; CHECK-GI-NEXT:    ret
     %x = call <2 x i32> @llvm.fptoui.sat.v2f128.v2i32(<2 x fp128> %f)
     ret <2 x i32> %x
 }
 
 define <3 x i32> @test_unsigned_v3f128_v3i32(<3 x fp128> %f) {
-; CHECK-LABEL: test_unsigned_v3f128_v3i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #112
-; CHECK-NEXT:    str x30, [sp, #80] // 8-byte Folded Spill
-; CHECK-NEXT:    stp x20, x19, [sp, #96] // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 112
-; CHECK-NEXT:    .cfi_offset w19, -8
-; CHECK-NEXT:    .cfi_offset w20, -16
-; CHECK-NEXT:    .cfi_offset w30, -32
-; CHECK-NEXT:    stp q0, q2, [sp, #48] // 32-byte Folded Spill
-; CHECK-NEXT:    mov v2.16b, v1.16b
-; CHECK-NEXT:    adrp x8, .LCPI16_0
-; CHECK-NEXT:    str q1, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI16_0]
-; CHECK-NEXT:    mov v0.16b, v2.16b
-; CHECK-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT:    bl __getf2
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    mov w19, w0
-; CHECK-NEXT:    bl __fixunstfsi
-; CHECK-NEXT:    adrp x8, .LCPI16_1
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    cmp w19, #0
-; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI16_1]
-; CHECK-NEXT:    csel w19, wzr, w0, lt
-; CHECK-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT:    bl __gttf2
-; CHECK-NEXT:    ldp q1, q0, [sp, #32] // 32-byte Folded Reload
-; CHECK-NEXT:    cmp w0, #0
-; CHECK-NEXT:    csinv w20, w19, wzr, le
-; CHECK-NEXT:    bl __getf2
-; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    mov w19, w0
-; CHECK-NEXT:    bl __fixunstfsi
-; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    cmp w19, #0
-; CHECK-NEXT:    csel w19, wzr, w0, lt
-; CHECK-NEXT:    bl __gttf2
-; CHECK-NEXT:    cmp w0, #0
-; CHECK-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT:    csinv w8, w19, wzr, le
-; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    mov v0.s[1], w20
-; CHECK-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT:    bl __getf2
-; CHECK-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT:    mov w19, w0
-; CHECK-NEXT:    bl __fixunstfsi
-; CHECK-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    cmp w19, #0
-; CHECK-NEXT:    csel w19, wzr, w0, lt
-; CHECK-NEXT:    bl __gttf2
-; CHECK-NEXT:    cmp w0, #0
-; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    ldr x30, [sp, #80] // 8-byte Folded Reload
-; CHECK-NEXT:    csinv w8, w19, wzr, le
-; CHECK-NEXT:    ldp x20, x19, [sp, #96] // 16-byte Folded Reload
-; CHECK-NEXT:    mov v0.s[2], w8
-; CHECK-NEXT:    add sp, sp, #112
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_v3f128_v3i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sub sp, sp, #112
+; CHECK-SD-NEXT:    str x30, [sp, #80] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    stp x20, x19, [sp, #96] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 112
+; CHECK-SD-NEXT:    .cfi_offset w19, -8
+; CHECK-SD-NEXT:    .cfi_offset w20, -16
+; CHECK-SD-NEXT:    .cfi_offset w30, -32
+; CHECK-SD-NEXT:    stp q0, q2, [sp, #48] // 32-byte Folded Spill
+; CHECK-SD-NEXT:    mov v2.16b, v1.16b
+; CHECK-SD-NEXT:    adrp x8, .LCPI16_0
+; CHECK-SD-NEXT:    str q1, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI16_0]
+; CHECK-SD-NEXT:    mov v0.16b, v2.16b
+; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    bl __getf2
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov w19, w0
+; CHECK-SD-NEXT:    bl __fixunstfsi
+; CHECK-SD-NEXT:    adrp x8, .LCPI16_1
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    cmp w19, #0
+; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI16_1]
+; CHECK-SD-NEXT:    csel w19, wzr, w0, lt
+; CHECK-SD-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    bl __gttf2
+; CHECK-SD-NEXT:    ldp q1, q0, [sp, #32] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    cmp w0, #0
+; CHECK-SD-NEXT:    csinv w20, w19, wzr, le
+; CHECK-SD-NEXT:    bl __getf2
+; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov w19, w0
+; CHECK-SD-NEXT:    bl __fixunstfsi
+; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    cmp w19, #0
+; CHECK-SD-NEXT:    csel w19, wzr, w0, lt
+; CHECK-SD-NEXT:    bl __gttf2
+; CHECK-SD-NEXT:    cmp w0, #0
+; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csinv w8, w19, wzr, le
+; CHECK-SD-NEXT:    fmov s0, w8
+; CHECK-SD-NEXT:    mov v0.s[1], w20
+; CHECK-SD-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    bl __getf2
+; CHECK-SD-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov w19, w0
+; CHECK-SD-NEXT:    bl __fixunstfsi
+; CHECK-SD-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    cmp w19, #0
+; CHECK-SD-NEXT:    csel w19, wzr, w0, lt
+; CHECK-SD-NEXT:    bl __gttf2
+; CHECK-SD-NEXT:    cmp w0, #0
+; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldr x30, [sp, #80] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    csinv w8, w19, wzr, le
+; CHECK-SD-NEXT:    ldp x20, x19, [sp, #96] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov v0.s[2], w8
+; CHECK-SD-NEXT:    add sp, sp, #112
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_v3f128_v3i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    sub sp, sp, #128
+; CHECK-GI-NEXT:    stp d11, d10, [sp, #64] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp d9, d8, [sp, #80] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    str x30, [sp, #96] // 8-byte Folded Spill
+; CHECK-GI-NEXT:    stp x20, x19, [sp, #112] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 128
+; CHECK-GI-NEXT:    .cfi_offset w19, -8
+; CHECK-GI-NEXT:    .cfi_offset w20, -16
+; CHECK-GI-NEXT:    .cfi_offset w30, -32
+; CHECK-GI-NEXT:    .cfi_offset b8, -40
+; CHECK-GI-NEXT:    .cfi_offset b9, -48
+; CHECK-GI-NEXT:    .cfi_offset b10, -56
+; CHECK-GI-NEXT:    .cfi_offset b11, -64
+; CHECK-GI-NEXT:    adrp x8, .LCPI16_1
+; CHECK-GI-NEXT:    str q1, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI16_1]
+; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    str q2, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    str q1, [sp, #48] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    bl __getf2
+; CHECK-GI-NEXT:    ldp q2, q1, [sp, #32] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    mov d0, v2.d[1]
+; CHECK-GI-NEXT:    mov d8, v1.d[1]
+; CHECK-GI-NEXT:    fcsel d10, d2, d1, lt
+; CHECK-GI-NEXT:    fmov x8, d10
+; CHECK-GI-NEXT:    fcsel d11, d0, d8, lt
+; CHECK-GI-NEXT:    mov v0.d[0], x8
+; CHECK-GI-NEXT:    fmov x8, d11
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    adrp x8, .LCPI16_0
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI16_0]
+; CHECK-GI-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    bl __gttf2
+; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    mov d9, v0.d[1]
+; CHECK-GI-NEXT:    fcsel d0, d10, d0, gt
+; CHECK-GI-NEXT:    fmov x8, d0
+; CHECK-GI-NEXT:    fcsel d1, d11, d9, gt
+; CHECK-GI-NEXT:    mov v0.d[0], x8
+; CHECK-GI-NEXT:    fmov x8, d1
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    bl __fixunstfsi
+; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q1, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov w19, w0
+; CHECK-GI-NEXT:    bl __getf2
+; CHECK-GI-NEXT:    ldp q1, q3, [sp, #32] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    ldr q2, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov d0, v2.d[1]
+; CHECK-GI-NEXT:    fcsel d10, d2, d3, lt
+; CHECK-GI-NEXT:    fmov x8, d10
+; CHECK-GI-NEXT:    fcsel d11, d0, d8, lt
+; CHECK-GI-NEXT:    mov v0.d[0], x8
+; CHECK-GI-NEXT:    fmov x8, d11
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    bl __gttf2
+; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    fcsel d1, d11, d9, gt
+; CHECK-GI-NEXT:    fcsel d0, d10, d0, gt
+; CHECK-GI-NEXT:    fmov x8, d0
+; CHECK-GI-NEXT:    mov v0.d[0], x8
+; CHECK-GI-NEXT:    fmov x8, d1
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    bl __fixunstfsi
+; CHECK-GI-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q1, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov w20, w0
+; CHECK-GI-NEXT:    bl __getf2
+; CHECK-GI-NEXT:    ldp q4, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    ldr q2, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov d0, v4.d[1]
+; CHECK-GI-NEXT:    fcsel d10, d4, d2, lt
+; CHECK-GI-NEXT:    fmov x8, d10
+; CHECK-GI-NEXT:    fcsel d8, d0, d8, lt
+; CHECK-GI-NEXT:    mov v0.d[0], x8
+; CHECK-GI-NEXT:    fmov x8, d8
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    bl __gttf2
+; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    fcsel d1, d8, d9, gt
+; CHECK-GI-NEXT:    fcsel d0, d10, d0, gt
+; CHECK-GI-NEXT:    fmov x8, d0
+; CHECK-GI-NEXT:    mov v0.d[0], x8
+; CHECK-GI-NEXT:    fmov x8, d1
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    bl __fixunstfsi
+; CHECK-GI-NEXT:    mov v0.s[0], w19
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #80] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d11, d10, [sp, #64] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr x30, [sp, #96] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    mov v0.s[1], w20
+; CHECK-GI-NEXT:    ldp x20, x19, [sp, #112] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v0.s[2], w0
+; CHECK-GI-NEXT:    add sp, sp, #128
+; CHECK-GI-NEXT:    ret
     %x = call <3 x i32> @llvm.fptoui.sat.v3f128.v3i32(<3 x fp128> %f)
     ret <3 x i32> %x
 }
 
 define <4 x i32> @test_unsigned_v4f128_v4i32(<4 x fp128> %f) {
-; CHECK-LABEL: test_unsigned_v4f128_v4i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #128
-; CHECK-NEXT:    str x30, [sp, #96] // 8-byte Folded Spill
-; CHECK-NEXT:    stp x20, x19, [sp, #112] // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 128
-; CHECK-NEXT:    .cfi_offset w19, -8
-; CHECK-NEXT:    .cfi_offset w20, -16
-; CHECK-NEXT:    .cfi_offset w30, -32
-; CHECK-NEXT:    stp q0, q2, [sp, #16] // 32-byte Folded Spill
-; CHECK-NEXT:    mov v2.16b, v1.16b
-; CHECK-NEXT:    adrp x8, .LCPI17_0
-; CHECK-NEXT:    str q1, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI17_0]
-; CHECK-NEXT:    str q3, [sp, #80] // 16-byte Folded Spill
-; CHECK-NEXT:    mov v0.16b, v2.16b
-; CHECK-NEXT:    str q1, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT:    bl __getf2
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    mov w19, w0
-; CHECK-NEXT:    bl __fixunstfsi
-; CHECK-NEXT:    adrp x8, .LCPI17_1
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    cmp w19, #0
-; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI17_1]
-; CHECK-NEXT:    csel w19, wzr, w0, lt
-; CHECK-NEXT:    str q1, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT:    bl __gttf2
-; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    ldr q1, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT:    cmp w0, #0
-; CHECK-NEXT:    csinv w20, w19, wzr, le
-; CHECK-NEXT:    bl __getf2
-; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    mov w19, w0
-; CHECK-NEXT:    bl __fixunstfsi
-; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    ldr q1, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    cmp w19, #0
-; CHECK-NEXT:    csel w19, wzr, w0, lt
-; CHECK-NEXT:    bl __gttf2
-; CHECK-NEXT:    cmp w0, #0
-; CHECK-NEXT:    ldr q1, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT:    csinv w8, w19, wzr, le
-; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    mov v0.s[1], w20
-; CHECK-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT:    bl __getf2
-; CHECK-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT:    mov w19, w0
-; CHECK-NEXT:    bl __fixunstfsi
-; CHECK-NEXT:    ldp q0, q1, [sp, #32] // 32-byte Folded Reload
-; CHECK-NEXT:    cmp w19, #0
-; CHECK-NEXT:    csel w19, wzr, w0, lt
-; CHECK-NEXT:    bl __gttf2
-; CHECK-NEXT:    cmp w0, #0
-; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    csinv w8, w19, wzr, le
-; CHECK-NEXT:    mov v0.s[2], w8
-; CHECK-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT:    ldp q1, q0, [sp, #64] // 32-byte Folded Reload
-; CHECK-NEXT:    bl __getf2
-; CHECK-NEXT:    ldr q0, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT:    mov w19, w0
-; CHECK-NEXT:    bl __fixunstfsi
-; CHECK-NEXT:    ldr q0, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT:    ldr q1, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    cmp w19, #0
-; CHECK-NEXT:    csel w19, wzr, w0, lt
-; CHECK-NEXT:    bl __gttf2
-; CHECK-NEXT:    cmp w0, #0
-; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    ldr x30, [sp, #96] // 8-byte Folded Reload
-; CHECK-NEXT:    csinv w8, w19, wzr, le
-; CHECK-NEXT:    ldp x20, x19, [sp, #112] // 16-byte Folded Reload
-; CHECK-NEXT:    mov v0.s[3], w8
-; CHECK-NEXT:    add sp, sp, #128
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_v4f128_v4i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sub sp, sp, #128
+; CHECK-SD-NEXT:    str x30, [sp, #96] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    stp x20, x19, [sp, #112] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 128
+; CHECK-SD-NEXT:    .cfi_offset w19, -8
+; CHECK-SD-NEXT:    .cfi_offset w20, -16
+; CHECK-SD-NEXT:    .cfi_offset w30, -32
+; CHECK-SD-NEXT:    stp q0, q2, [sp, #16] // 32-byte Folded Spill
+; CHECK-SD-NEXT:    mov v2.16b, v1.16b
+; CHECK-SD-NEXT:    adrp x8, .LCPI17_0
+; CHECK-SD-NEXT:    str q1, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI17_0]
+; CHECK-SD-NEXT:    str q3, [sp, #80] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    mov v0.16b, v2.16b
+; CHECK-SD-NEXT:    str q1, [sp, #64] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    bl __getf2
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov w19, w0
+; CHECK-SD-NEXT:    bl __fixunstfsi
+; CHECK-SD-NEXT:    adrp x8, .LCPI17_1
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    cmp w19, #0
+; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI17_1]
+; CHECK-SD-NEXT:    csel w19, wzr, w0, lt
+; CHECK-SD-NEXT:    str q1, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    bl __gttf2
+; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldr q1, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    cmp w0, #0
+; CHECK-SD-NEXT:    csinv w20, w19, wzr, le
+; CHECK-SD-NEXT:    bl __getf2
+; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov w19, w0
+; CHECK-SD-NEXT:    bl __fixunstfsi
+; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldr q1, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    cmp w19, #0
+; CHECK-SD-NEXT:    csel w19, wzr, w0, lt
+; CHECK-SD-NEXT:    bl __gttf2
+; CHECK-SD-NEXT:    cmp w0, #0
+; CHECK-SD-NEXT:    ldr q1, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csinv w8, w19, wzr, le
+; CHECK-SD-NEXT:    fmov s0, w8
+; CHECK-SD-NEXT:    mov v0.s[1], w20
+; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    bl __getf2
+; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov w19, w0
+; CHECK-SD-NEXT:    bl __fixunstfsi
+; CHECK-SD-NEXT:    ldp q0, q1, [sp, #32] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    cmp w19, #0
+; CHECK-SD-NEXT:    csel w19, wzr, w0, lt
+; CHECK-SD-NEXT:    bl __gttf2
+; CHECK-SD-NEXT:    cmp w0, #0
+; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csinv w8, w19, wzr, le
+; CHECK-SD-NEXT:    mov v0.s[2], w8
+; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldp q1, q0, [sp, #64] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    bl __getf2
+; CHECK-SD-NEXT:    ldr q0, [sp, #80] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov w19, w0
+; CHECK-SD-NEXT:    bl __fixunstfsi
+; CHECK-SD-NEXT:    ldr q0, [sp, #80] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldr q1, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    cmp w19, #0
+; CHECK-SD-NEXT:    csel w19, wzr, w0, lt
+; CHECK-SD-NEXT:    bl __gttf2
+; CHECK-SD-NEXT:    cmp w0, #0
+; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldr x30, [sp, #96] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    csinv w8, w19, wzr, le
+; CHECK-SD-NEXT:    ldp x20, x19, [sp, #112] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov v0.s[3], w8
+; CHECK-SD-NEXT:    add sp, sp, #128
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_v4f128_v4i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    sub sp, sp, #144
+; CHECK-GI-NEXT:    stp d11, d10, [sp, #80] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp d9, d8, [sp, #96] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp x30, x21, [sp, #112] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp x20, x19, [sp, #128] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 144
+; CHECK-GI-NEXT:    .cfi_offset w19, -8
+; CHECK-GI-NEXT:    .cfi_offset w20, -16
+; CHECK-GI-NEXT:    .cfi_offset w21, -24
+; CHECK-GI-NEXT:    .cfi_offset w30, -32
+; CHECK-GI-NEXT:    .cfi_offset b8, -40
+; CHECK-GI-NEXT:    .cfi_offset b9, -48
+; CHECK-GI-NEXT:    .cfi_offset b10, -56
+; CHECK-GI-NEXT:    .cfi_offset b11, -64
+; CHECK-GI-NEXT:    adrp x8, .LCPI17_1
+; CHECK-GI-NEXT:    stp q1, q2, [sp] // 32-byte Folded Spill
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI17_1]
+; CHECK-GI-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    str q3, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    str q1, [sp, #64] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    bl __getf2
+; CHECK-GI-NEXT:    ldp q2, q1, [sp, #48] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    mov d0, v2.d[1]
+; CHECK-GI-NEXT:    mov d8, v1.d[1]
+; CHECK-GI-NEXT:    fcsel d10, d2, d1, lt
+; CHECK-GI-NEXT:    fmov x8, d10
+; CHECK-GI-NEXT:    fcsel d11, d0, d8, lt
+; CHECK-GI-NEXT:    mov v0.d[0], x8
+; CHECK-GI-NEXT:    fmov x8, d11
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    adrp x8, .LCPI17_0
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI17_0]
+; CHECK-GI-NEXT:    str q1, [sp, #48] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    bl __gttf2
+; CHECK-GI-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    mov d9, v0.d[1]
+; CHECK-GI-NEXT:    fcsel d0, d10, d0, gt
+; CHECK-GI-NEXT:    fmov x8, d0
+; CHECK-GI-NEXT:    fcsel d1, d11, d9, gt
+; CHECK-GI-NEXT:    mov v0.d[0], x8
+; CHECK-GI-NEXT:    fmov x8, d1
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    bl __fixunstfsi
+; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q1, [sp, #64] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov w19, w0
+; CHECK-GI-NEXT:    bl __getf2
+; CHECK-GI-NEXT:    ldp q1, q4, [sp, #48] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    ldr q2, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov d0, v2.d[1]
+; CHECK-GI-NEXT:    fcsel d10, d2, d4, lt
+; CHECK-GI-NEXT:    fmov x8, d10
+; CHECK-GI-NEXT:    fcsel d11, d0, d8, lt
+; CHECK-GI-NEXT:    mov v0.d[0], x8
+; CHECK-GI-NEXT:    fmov x8, d11
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    bl __gttf2
+; CHECK-GI-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    fcsel d1, d11, d9, gt
+; CHECK-GI-NEXT:    fcsel d0, d10, d0, gt
+; CHECK-GI-NEXT:    fmov x8, d0
+; CHECK-GI-NEXT:    mov v0.d[0], x8
+; CHECK-GI-NEXT:    fmov x8, d1
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    bl __fixunstfsi
+; CHECK-GI-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q1, [sp, #64] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov w20, w0
+; CHECK-GI-NEXT:    bl __getf2
+; CHECK-GI-NEXT:    ldp q1, q5, [sp, #48] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    ldr q2, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov d0, v2.d[1]
+; CHECK-GI-NEXT:    fcsel d10, d2, d5, lt
+; CHECK-GI-NEXT:    fmov x8, d10
+; CHECK-GI-NEXT:    fcsel d11, d0, d8, lt
+; CHECK-GI-NEXT:    mov v0.d[0], x8
+; CHECK-GI-NEXT:    fmov x8, d11
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    bl __gttf2
+; CHECK-GI-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    fcsel d1, d11, d9, gt
+; CHECK-GI-NEXT:    fcsel d0, d10, d0, gt
+; CHECK-GI-NEXT:    fmov x8, d0
+; CHECK-GI-NEXT:    mov v0.d[0], x8
+; CHECK-GI-NEXT:    fmov x8, d1
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    bl __fixunstfsi
+; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q1, [sp, #64] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov w21, w0
+; CHECK-GI-NEXT:    bl __getf2
+; CHECK-GI-NEXT:    ldp q6, q1, [sp, #32] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    ldr q2, [sp, #64] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov d0, v6.d[1]
+; CHECK-GI-NEXT:    fcsel d10, d6, d2, lt
+; CHECK-GI-NEXT:    fmov x8, d10
+; CHECK-GI-NEXT:    fcsel d8, d0, d8, lt
+; CHECK-GI-NEXT:    mov v0.d[0], x8
+; CHECK-GI-NEXT:    fmov x8, d8
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    bl __gttf2
+; CHECK-GI-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    fcsel d1, d8, d9, gt
+; CHECK-GI-NEXT:    fcsel d0, d10, d0, gt
+; CHECK-GI-NEXT:    fmov x8, d0
+; CHECK-GI-NEXT:    mov v0.d[0], x8
+; CHECK-GI-NEXT:    fmov x8, d1
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    bl __fixunstfsi
+; CHECK-GI-NEXT:    mov v0.s[0], w19
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #96] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d11, d10, [sp, #80] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v0.s[1], w20
+; CHECK-GI-NEXT:    ldp x20, x19, [sp, #128] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v0.s[2], w21
+; CHECK-GI-NEXT:    ldp x30, x21, [sp, #112] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v0.s[3], w0
+; CHECK-GI-NEXT:    add sp, sp, #144
+; CHECK-GI-NEXT:    ret
     %x = call <4 x i32> @llvm.fptoui.sat.v4f128.v4i32(<4 x fp128> %f)
     ret <4 x i32> %x
 }
@@ -494,29 +1057,53 @@ declare <7 x i32> @llvm.fptoui.sat.v7f16.v7i32 (<7 x half>)
 declare <8 x i32> @llvm.fptoui.sat.v8f16.v8i32 (<8 x half>)
 
 define <1 x i32> @test_unsigned_v1f16_v1i32(<1 x half> %f) {
-; CHECK-CVT-LABEL: test_unsigned_v1f16_v1i32:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    fcvtzu w8, s0
-; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_unsigned_v1f16_v1i32:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzu w8, h0
-; CHECK-FP16-NEXT:    fmov s0, w8
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_unsigned_v1f16_v1i32:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvt s0, h0
+; CHECK-SD-CVT-NEXT:    fcvtzu w8, s0
+; CHECK-SD-CVT-NEXT:    fmov s0, w8
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_unsigned_v1f16_v1i32:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzu w8, h0
+; CHECK-SD-FP16-NEXT:    fmov s0, w8
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_v1f16_v1i32:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    fcvtzu w8, s0
+; CHECK-GI-CVT-NEXT:    mov v0.s[0], w8
+; CHECK-GI-CVT-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_unsigned_v1f16_v1i32:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzu w8, h0
+; CHECK-GI-FP16-NEXT:    mov v0.s[0], w8
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-FP16-NEXT:    ret
     %x = call <1 x i32> @llvm.fptoui.sat.v1f16.v1i32(<1 x half> %f)
     ret <1 x i32> %x
 }
 
 define <2 x i32> @test_unsigned_v2f16_v2i32(<2 x half> %f) {
-; CHECK-LABEL: test_unsigned_v2f16_v2i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-NEXT:    fcvtzu v0.4s, v0.4s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_v2f16_v2i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-SD-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_v2f16_v2i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-GI-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-GI-NEXT:    fcvtzu v0.2s, v0.2s
+; CHECK-GI-NEXT:    ret
     %x = call <2 x i32> @llvm.fptoui.sat.v2f16.v2i32(<2 x half> %f)
     ret <2 x i32> %x
 }
@@ -542,67 +1129,135 @@ define <4 x i32> @test_unsigned_v4f16_v4i32(<4 x half> %f) {
 }
 
 define <5 x i32> @test_unsigned_v5f16_v5i32(<5 x half> %f) {
-; CHECK-LABEL: test_unsigned_v5f16_v5i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtl v1.4s, v0.4h
-; CHECK-NEXT:    fcvtl2 v0.4s, v0.8h
-; CHECK-NEXT:    fcvtzu v1.4s, v1.4s
-; CHECK-NEXT:    fcvtzu v0.4s, v0.4s
-; CHECK-NEXT:    mov w1, v1.s[1]
-; CHECK-NEXT:    mov w2, v1.s[2]
-; CHECK-NEXT:    mov w3, v1.s[3]
-; CHECK-NEXT:    fmov w0, s1
-; CHECK-NEXT:    fmov w4, s0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_v5f16_v5i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtl v1.4s, v0.4h
+; CHECK-SD-NEXT:    fcvtl2 v0.4s, v0.8h
+; CHECK-SD-NEXT:    fcvtzu v1.4s, v1.4s
+; CHECK-SD-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-SD-NEXT:    mov w1, v1.s[1]
+; CHECK-SD-NEXT:    mov w2, v1.s[2]
+; CHECK-SD-NEXT:    mov w3, v1.s[3]
+; CHECK-SD-NEXT:    fmov w0, s1
+; CHECK-SD-NEXT:    fmov w4, s0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_v5f16_v5i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcvtl v1.4s, v0.4h
+; CHECK-GI-NEXT:    mov v0.h[0], v0.h[4]
+; CHECK-GI-NEXT:    fcvtzu v1.4s, v1.4s
+; CHECK-GI-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-GI-NEXT:    mov s2, v1.s[1]
+; CHECK-GI-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-GI-NEXT:    mov s3, v1.s[2]
+; CHECK-GI-NEXT:    mov s4, v1.s[3]
+; CHECK-GI-NEXT:    fmov w0, s1
+; CHECK-GI-NEXT:    fmov w1, s2
+; CHECK-GI-NEXT:    fmov w2, s3
+; CHECK-GI-NEXT:    fmov w4, s0
+; CHECK-GI-NEXT:    fmov w3, s4
+; CHECK-GI-NEXT:    ret
     %x = call <5 x i32> @llvm.fptoui.sat.v5f16.v5i32(<5 x half> %f)
     ret <5 x i32> %x
 }
 
 define <6 x i32> @test_unsigned_v6f16_v6i32(<6 x half> %f) {
-; CHECK-LABEL: test_unsigned_v6f16_v6i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtl v1.4s, v0.4h
-; CHECK-NEXT:    fcvtl2 v0.4s, v0.8h
-; CHECK-NEXT:    fcvtzu v1.4s, v1.4s
-; CHECK-NEXT:    fcvtzu v0.4s, v0.4s
-; CHECK-NEXT:    mov w1, v1.s[1]
-; CHECK-NEXT:    mov w2, v1.s[2]
-; CHECK-NEXT:    mov w5, v0.s[1]
-; CHECK-NEXT:    mov w3, v1.s[3]
-; CHECK-NEXT:    fmov w4, s0
-; CHECK-NEXT:    fmov w0, s1
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_v6f16_v6i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtl v1.4s, v0.4h
+; CHECK-SD-NEXT:    fcvtl2 v0.4s, v0.8h
+; CHECK-SD-NEXT:    fcvtzu v1.4s, v1.4s
+; CHECK-SD-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-SD-NEXT:    mov w1, v1.s[1]
+; CHECK-SD-NEXT:    mov w2, v1.s[2]
+; CHECK-SD-NEXT:    mov w5, v0.s[1]
+; CHECK-SD-NEXT:    mov w3, v1.s[3]
+; CHECK-SD-NEXT:    fmov w4, s0
+; CHECK-SD-NEXT:    fmov w0, s1
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_v6f16_v6i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov v1.h[0], v0.h[4]
+; CHECK-GI-NEXT:    mov v1.h[1], v0.h[5]
+; CHECK-GI-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-GI-NEXT:    fcvtl v1.4s, v1.4h
+; CHECK-GI-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-GI-NEXT:    fcvtzu v1.4s, v1.4s
+; CHECK-GI-NEXT:    mov s2, v0.s[1]
+; CHECK-GI-NEXT:    mov s3, v0.s[2]
+; CHECK-GI-NEXT:    mov s4, v0.s[3]
+; CHECK-GI-NEXT:    fmov w0, s0
+; CHECK-GI-NEXT:    mov s5, v1.s[1]
+; CHECK-GI-NEXT:    fmov w1, s2
+; CHECK-GI-NEXT:    fmov w2, s3
+; CHECK-GI-NEXT:    fmov w3, s4
+; CHECK-GI-NEXT:    fmov w4, s1
+; CHECK-GI-NEXT:    fmov w5, s5
+; CHECK-GI-NEXT:    ret
     %x = call <6 x i32> @llvm.fptoui.sat.v6f16.v6i32(<6 x half> %f)
     ret <6 x i32> %x
 }
 
 define <7 x i32> @test_unsigned_v7f16_v7i32(<7 x half> %f) {
-; CHECK-LABEL: test_unsigned_v7f16_v7i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtl v1.4s, v0.4h
-; CHECK-NEXT:    fcvtl2 v0.4s, v0.8h
-; CHECK-NEXT:    fcvtzu v1.4s, v1.4s
-; CHECK-NEXT:    fcvtzu v0.4s, v0.4s
-; CHECK-NEXT:    mov w1, v1.s[1]
-; CHECK-NEXT:    mov w2, v1.s[2]
-; CHECK-NEXT:    mov w3, v1.s[3]
-; CHECK-NEXT:    mov w5, v0.s[1]
-; CHECK-NEXT:    mov w6, v0.s[2]
-; CHECK-NEXT:    fmov w0, s1
-; CHECK-NEXT:    fmov w4, s0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_v7f16_v7i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtl v1.4s, v0.4h
+; CHECK-SD-NEXT:    fcvtl2 v0.4s, v0.8h
+; CHECK-SD-NEXT:    fcvtzu v1.4s, v1.4s
+; CHECK-SD-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-SD-NEXT:    mov w1, v1.s[1]
+; CHECK-SD-NEXT:    mov w2, v1.s[2]
+; CHECK-SD-NEXT:    mov w3, v1.s[3]
+; CHECK-SD-NEXT:    mov w5, v0.s[1]
+; CHECK-SD-NEXT:    mov w6, v0.s[2]
+; CHECK-SD-NEXT:    fmov w0, s1
+; CHECK-SD-NEXT:    fmov w4, s0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_v7f16_v7i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov v1.h[0], v0.h[4]
+; CHECK-GI-NEXT:    mov v1.h[1], v0.h[5]
+; CHECK-GI-NEXT:    mov v1.h[2], v0.h[6]
+; CHECK-GI-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-GI-NEXT:    fcvtl v1.4s, v1.4h
+; CHECK-GI-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-GI-NEXT:    fcvtzu v1.4s, v1.4s
+; CHECK-GI-NEXT:    mov s2, v0.s[1]
+; CHECK-GI-NEXT:    mov s3, v0.s[2]
+; CHECK-GI-NEXT:    mov s4, v0.s[3]
+; CHECK-GI-NEXT:    fmov w0, s0
+; CHECK-GI-NEXT:    mov s5, v1.s[1]
+; CHECK-GI-NEXT:    mov s6, v1.s[2]
+; CHECK-GI-NEXT:    fmov w1, s2
+; CHECK-GI-NEXT:    fmov w2, s3
+; CHECK-GI-NEXT:    fmov w3, s4
+; CHECK-GI-NEXT:    fmov w4, s1
+; CHECK-GI-NEXT:    fmov w5, s5
+; CHECK-GI-NEXT:    fmov w6, s6
+; CHECK-GI-NEXT:    ret
     %x = call <7 x i32> @llvm.fptoui.sat.v7f16.v7i32(<7 x half> %f)
     ret <7 x i32> %x
 }
 
 define <8 x i32> @test_unsigned_v8f16_v8i32(<8 x half> %f) {
-; CHECK-LABEL: test_unsigned_v8f16_v8i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtl2 v1.4s, v0.8h
-; CHECK-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-NEXT:    fcvtzu v1.4s, v1.4s
-; CHECK-NEXT:    fcvtzu v0.4s, v0.4s
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_v8f16_v8i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtl2 v1.4s, v0.8h
+; CHECK-SD-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-SD-NEXT:    fcvtzu v1.4s, v1.4s
+; CHECK-SD-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_v8f16_v8i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcvtl v1.4s, v0.4h
+; CHECK-GI-NEXT:    fcvtl2 v2.4s, v0.8h
+; CHECK-GI-NEXT:    fcvtzu v0.4s, v1.4s
+; CHECK-GI-NEXT:    fcvtzu v1.4s, v2.4s
+; CHECK-GI-NEXT:    ret
     %x = call <8 x i32> @llvm.fptoui.sat.v8f16.v8i32(<8 x half> %f)
     ret <8 x i32> %x
 }
@@ -686,20 +1341,30 @@ define <2 x i32> @test_unsigned_v2f32_v2i32_duplicate(<2 x float> %f) {
 }
 
 define <2 x i50> @test_unsigned_v2f32_v2i50(<2 x float> %f) {
-; CHECK-LABEL: test_unsigned_v2f32_v2i50:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    mov s1, v0.s[1]
-; CHECK-NEXT:    fcvtzu x9, s0
-; CHECK-NEXT:    mov x10, #1125899906842623 // =0x3ffffffffffff
-; CHECK-NEXT:    fcvtzu x8, s1
-; CHECK-NEXT:    cmp x8, x10
-; CHECK-NEXT:    csel x8, x8, x10, lo
-; CHECK-NEXT:    cmp x9, x10
-; CHECK-NEXT:    csel x9, x9, x10, lo
-; CHECK-NEXT:    fmov d0, x9
-; CHECK-NEXT:    mov v0.d[1], x8
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_v2f32_v2i50:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    mov s1, v0.s[1]
+; CHECK-SD-NEXT:    fcvtzu x9, s0
+; CHECK-SD-NEXT:    mov x10, #1125899906842623 // =0x3ffffffffffff
+; CHECK-SD-NEXT:    fcvtzu x8, s1
+; CHECK-SD-NEXT:    cmp x8, x10
+; CHECK-SD-NEXT:    csel x8, x8, x10, lo
+; CHECK-SD-NEXT:    cmp x9, x10
+; CHECK-SD-NEXT:    csel x9, x9, x10, lo
+; CHECK-SD-NEXT:    fmov d0, x9
+; CHECK-SD-NEXT:    mov v0.d[1], x8
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_v2f32_v2i50:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcvtl v0.2d, v0.2s
+; CHECK-GI-NEXT:    adrp x8, .LCPI32_0
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI32_0]
+; CHECK-GI-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-GI-NEXT:    cmhi v2.2d, v1.2d, v0.2d
+; CHECK-GI-NEXT:    bif v0.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT:    ret
     %x = call <2 x i50> @llvm.fptoui.sat.v2f32.v2i50(<2 x float> %f)
     ret <2 x i50> %x
 }
@@ -715,95 +1380,181 @@ define <2 x i64> @test_unsigned_v2f32_v2i64(<2 x float> %f) {
 }
 
 define <2 x i100> @test_unsigned_v2f32_v2i100(<2 x float> %f) {
-; CHECK-LABEL: test_unsigned_v2f32_v2i100:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #64
-; CHECK-NEXT:    stp d9, d8, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x30, x21, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 64
-; CHECK-NEXT:    .cfi_offset w19, -8
-; CHECK-NEXT:    .cfi_offset w20, -16
-; CHECK-NEXT:    .cfi_offset w21, -24
-; CHECK-NEXT:    .cfi_offset w30, -32
-; CHECK-NEXT:    .cfi_offset b8, -40
-; CHECK-NEXT:    .cfi_offset b9, -48
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    mov w8, #1904214015 // =0x717fffff
-; CHECK-NEXT:    mov x21, #68719476735 // =0xfffffffff
-; CHECK-NEXT:    fmov s9, w8
-; CHECK-NEXT:    mov s8, v0.s[1]
-; CHECK-NEXT:    fcmp s0, #0.0
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, xzr, x1, lt
-; CHECK-NEXT:    fcmp s0, s9
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    csel x19, x21, x9, gt
-; CHECK-NEXT:    csinv x20, x8, xzr, le
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    fcmp s8, #0.0
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, xzr, x1, lt
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    mov x0, x20
-; CHECK-NEXT:    mov x1, x19
-; CHECK-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x3, x21, x9, gt
-; CHECK-NEXT:    ldp x30, x21, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp d9, d8, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    csinv x2, x8, xzr, le
-; CHECK-NEXT:    add sp, sp, #64
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_v2f32_v2i100:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sub sp, sp, #64
+; CHECK-SD-NEXT:    stp d9, d8, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x30, x21, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-SD-NEXT:    .cfi_offset w19, -8
+; CHECK-SD-NEXT:    .cfi_offset w20, -16
+; CHECK-SD-NEXT:    .cfi_offset w21, -24
+; CHECK-SD-NEXT:    .cfi_offset w30, -32
+; CHECK-SD-NEXT:    .cfi_offset b8, -40
+; CHECK-SD-NEXT:    .cfi_offset b9, -48
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT:    bl __fixunssfti
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov w8, #1904214015 // =0x717fffff
+; CHECK-SD-NEXT:    mov x21, #68719476735 // =0xfffffffff
+; CHECK-SD-NEXT:    fmov s9, w8
+; CHECK-SD-NEXT:    mov s8, v0.s[1]
+; CHECK-SD-NEXT:    fcmp s0, #0.0
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x1, lt
+; CHECK-SD-NEXT:    fcmp s0, s9
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    csel x19, x21, x9, gt
+; CHECK-SD-NEXT:    csinv x20, x8, xzr, le
+; CHECK-SD-NEXT:    bl __fixunssfti
+; CHECK-SD-NEXT:    fcmp s8, #0.0
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    mov x0, x20
+; CHECK-SD-NEXT:    mov x1, x19
+; CHECK-SD-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csel x3, x21, x9, gt
+; CHECK-SD-NEXT:    ldp x30, x21, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp d9, d8, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csinv x2, x8, xzr, le
+; CHECK-SD-NEXT:    add sp, sp, #64
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_v2f32_v2i100:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    sub sp, sp, #64
+; CHECK-GI-NEXT:    stp d9, d8, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp x30, x21, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-GI-NEXT:    .cfi_offset w19, -8
+; CHECK-GI-NEXT:    .cfi_offset w20, -16
+; CHECK-GI-NEXT:    .cfi_offset w21, -24
+; CHECK-GI-NEXT:    .cfi_offset w30, -32
+; CHECK-GI-NEXT:    .cfi_offset b8, -40
+; CHECK-GI-NEXT:    .cfi_offset b9, -48
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    mov s8, v0.s[1]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-GI-NEXT:    bl __fixunssfti
+; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov w8, #1904214015 // =0x717fffff
+; CHECK-GI-NEXT:    mov x21, #68719476735 // =0xfffffffff
+; CHECK-GI-NEXT:    fmov s9, w8
+; CHECK-GI-NEXT:    fcmp s0, #0.0
+; CHECK-GI-NEXT:    csel x8, xzr, x0, lt
+; CHECK-GI-NEXT:    csel x9, xzr, x1, lt
+; CHECK-GI-NEXT:    fcmp s0, s9
+; CHECK-GI-NEXT:    fmov s0, s8
+; CHECK-GI-NEXT:    csinv x19, x8, xzr, le
+; CHECK-GI-NEXT:    csel x20, x21, x9, gt
+; CHECK-GI-NEXT:    bl __fixunssfti
+; CHECK-GI-NEXT:    fcmp s8, #0.0
+; CHECK-GI-NEXT:    csel x8, xzr, x0, lt
+; CHECK-GI-NEXT:    csel x9, xzr, x1, lt
+; CHECK-GI-NEXT:    fcmp s8, s9
+; CHECK-GI-NEXT:    mov x0, x19
+; CHECK-GI-NEXT:    mov x1, x20
+; CHECK-GI-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    csel x3, x21, x9, gt
+; CHECK-GI-NEXT:    ldp x30, x21, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    csinv x2, x8, xzr, le
+; CHECK-GI-NEXT:    add sp, sp, #64
+; CHECK-GI-NEXT:    ret
     %x = call <2 x i100> @llvm.fptoui.sat.v2f32.v2i100(<2 x float> %f)
     ret <2 x i100> %x
 }
 
 define <2 x i128> @test_unsigned_v2f32_v2i128(<2 x float> %f) {
-; CHECK-LABEL: test_unsigned_v2f32_v2i128:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #64
-; CHECK-NEXT:    stp d9, d8, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
-; CHECK-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 64
-; CHECK-NEXT:    .cfi_offset w19, -8
-; CHECK-NEXT:    .cfi_offset w20, -16
-; CHECK-NEXT:    .cfi_offset w30, -32
-; CHECK-NEXT:    .cfi_offset b8, -40
-; CHECK-NEXT:    .cfi_offset b9, -48
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    mov w8, #2139095039 // =0x7f7fffff
-; CHECK-NEXT:    fmov s9, w8
-; CHECK-NEXT:    mov s8, v0.s[1]
-; CHECK-NEXT:    fcmp s0, #0.0
-; CHECK-NEXT:    csel x8, xzr, x1, lt
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    fcmp s0, s9
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    csinv x19, x9, xzr, le
-; CHECK-NEXT:    csinv x20, x8, xzr, le
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    fcmp s8, #0.0
-; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
-; CHECK-NEXT:    csel x8, xzr, x1, lt
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    mov x0, x19
-; CHECK-NEXT:    mov x1, x20
-; CHECK-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp d9, d8, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    csinv x2, x9, xzr, le
-; CHECK-NEXT:    csinv x3, x8, xzr, le
-; CHECK-NEXT:    add sp, sp, #64
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_v2f32_v2i128:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sub sp, sp, #64
+; CHECK-SD-NEXT:    stp d9, d8, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-SD-NEXT:    .cfi_offset w19, -8
+; CHECK-SD-NEXT:    .cfi_offset w20, -16
+; CHECK-SD-NEXT:    .cfi_offset w30, -32
+; CHECK-SD-NEXT:    .cfi_offset b8, -40
+; CHECK-SD-NEXT:    .cfi_offset b9, -48
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT:    bl __fixunssfti
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov w8, #2139095039 // =0x7f7fffff
+; CHECK-SD-NEXT:    fmov s9, w8
+; CHECK-SD-NEXT:    mov s8, v0.s[1]
+; CHECK-SD-NEXT:    fcmp s0, #0.0
+; CHECK-SD-NEXT:    csel x8, xzr, x1, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
+; CHECK-SD-NEXT:    fcmp s0, s9
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    csinv x19, x9, xzr, le
+; CHECK-SD-NEXT:    csinv x20, x8, xzr, le
+; CHECK-SD-NEXT:    bl __fixunssfti
+; CHECK-SD-NEXT:    fcmp s8, #0.0
+; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    csel x8, xzr, x1, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    mov x0, x19
+; CHECK-SD-NEXT:    mov x1, x20
+; CHECK-SD-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp d9, d8, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csinv x2, x9, xzr, le
+; CHECK-SD-NEXT:    csinv x3, x8, xzr, le
+; CHECK-SD-NEXT:    add sp, sp, #64
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_v2f32_v2i128:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    sub sp, sp, #64
+; CHECK-GI-NEXT:    stp d9, d8, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-GI-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-GI-NEXT:    .cfi_offset w19, -8
+; CHECK-GI-NEXT:    .cfi_offset w20, -16
+; CHECK-GI-NEXT:    .cfi_offset w30, -32
+; CHECK-GI-NEXT:    .cfi_offset b8, -40
+; CHECK-GI-NEXT:    .cfi_offset b9, -48
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    mov s8, v0.s[1]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-GI-NEXT:    bl __fixunssfti
+; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov w8, #2139095039 // =0x7f7fffff
+; CHECK-GI-NEXT:    fmov s9, w8
+; CHECK-GI-NEXT:    fcmp s0, #0.0
+; CHECK-GI-NEXT:    csel x8, xzr, x0, lt
+; CHECK-GI-NEXT:    csel x9, xzr, x1, lt
+; CHECK-GI-NEXT:    fcmp s0, s9
+; CHECK-GI-NEXT:    fmov s0, s8
+; CHECK-GI-NEXT:    csinv x19, x8, xzr, le
+; CHECK-GI-NEXT:    csinv x20, x9, xzr, le
+; CHECK-GI-NEXT:    bl __fixunssfti
+; CHECK-GI-NEXT:    fcmp s8, #0.0
+; CHECK-GI-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    csel x8, xzr, x0, lt
+; CHECK-GI-NEXT:    csel x9, xzr, x1, lt
+; CHECK-GI-NEXT:    fcmp s8, s9
+; CHECK-GI-NEXT:    mov x0, x19
+; CHECK-GI-NEXT:    mov x1, x20
+; CHECK-GI-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    csinv x2, x8, xzr, le
+; CHECK-GI-NEXT:    csinv x3, x9, xzr, le
+; CHECK-GI-NEXT:    add sp, sp, #64
+; CHECK-GI-NEXT:    ret
     %x = call <2 x i128> @llvm.fptoui.sat.v2f32.v2i128(<2 x float> %f)
     ret <2 x i128> %x
 }
@@ -859,11 +1610,19 @@ define <4 x i13> @test_unsigned_v4f32_v4i13(<4 x float> %f) {
 }
 
 define <4 x i16> @test_unsigned_v4f32_v4i16(<4 x float> %f) {
-; CHECK-LABEL: test_unsigned_v4f32_v4i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu v0.4s, v0.4s
-; CHECK-NEXT:    uqxtn v0.4h, v0.4s
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_v4f32_v4i16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-SD-NEXT:    uqxtn v0.4h, v0.4s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_v4f32_v4i16:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v1.2d, #0x00ffff0000ffff
+; CHECK-GI-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-GI-NEXT:    umin v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    xtn v0.4h, v0.4s
+; CHECK-GI-NEXT:    ret
     %x = call <4 x i16> @llvm.fptoui.sat.v4f32.v4i16(<4 x float> %f)
     ret <4 x i16> %x
 }
@@ -913,171 +1672,331 @@ define <4 x i50> @test_unsigned_v4f32_v4i50(<4 x float> %f) {
 }
 
 define <4 x i64> @test_unsigned_v4f32_v4i64(<4 x float> %f) {
-; CHECK-LABEL: test_unsigned_v4f32_v4i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtl2 v1.2d, v0.4s
-; CHECK-NEXT:    fcvtl v0.2d, v0.2s
-; CHECK-NEXT:    fcvtzu v1.2d, v1.2d
-; CHECK-NEXT:    fcvtzu v0.2d, v0.2d
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_v4f32_v4i64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtl2 v1.2d, v0.4s
+; CHECK-SD-NEXT:    fcvtl v0.2d, v0.2s
+; CHECK-SD-NEXT:    fcvtzu v1.2d, v1.2d
+; CHECK-SD-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_v4f32_v4i64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcvtl v1.2d, v0.2s
+; CHECK-GI-NEXT:    fcvtl2 v2.2d, v0.4s
+; CHECK-GI-NEXT:    fcvtzu v0.2d, v1.2d
+; CHECK-GI-NEXT:    fcvtzu v1.2d, v2.2d
+; CHECK-GI-NEXT:    ret
     %x = call <4 x i64> @llvm.fptoui.sat.v4f32.v4i64(<4 x float> %f)
     ret <4 x i64> %x
 }
 
 define <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) {
-; CHECK-LABEL: test_unsigned_v4f32_v4i100:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #96
-; CHECK-NEXT:    stp d9, d8, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x30, x25, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x24, x23, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x22, x21, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x20, x19, [sp, #80] // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 96
-; CHECK-NEXT:    .cfi_offset w19, -8
-; CHECK-NEXT:    .cfi_offset w20, -16
-; CHECK-NEXT:    .cfi_offset w21, -24
-; CHECK-NEXT:    .cfi_offset w22, -32
-; CHECK-NEXT:    .cfi_offset w23, -40
-; CHECK-NEXT:    .cfi_offset w24, -48
-; CHECK-NEXT:    .cfi_offset w25, -56
-; CHECK-NEXT:    .cfi_offset w30, -64
-; CHECK-NEXT:    .cfi_offset b8, -72
-; CHECK-NEXT:    .cfi_offset b9, -80
-; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    mov w8, #1904214015 // =0x717fffff
-; CHECK-NEXT:    mov x25, #68719476735 // =0xfffffffff
-; CHECK-NEXT:    fmov s9, w8
-; CHECK-NEXT:    mov s8, v0.s[1]
-; CHECK-NEXT:    fcmp s0, #0.0
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, xzr, x1, lt
-; CHECK-NEXT:    fcmp s0, s9
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    csel x19, x25, x9, gt
-; CHECK-NEXT:    csinv x20, x8, xzr, le
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    fcmp s8, #0.0
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, xzr, x1, lt
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    csel x21, x25, x9, gt
-; CHECK-NEXT:    csinv x22, x8, xzr, le
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    mov s8, v0.s[1]
-; CHECK-NEXT:    fcmp s0, #0.0
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, xzr, x1, lt
-; CHECK-NEXT:    fcmp s0, s9
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    csel x23, x25, x9, gt
-; CHECK-NEXT:    csinv x24, x8, xzr, le
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    fcmp s8, #0.0
-; CHECK-NEXT:    mov x2, x22
-; CHECK-NEXT:    mov x3, x21
-; CHECK-NEXT:    mov x4, x24
-; CHECK-NEXT:    mov x5, x23
-; CHECK-NEXT:    ldp x22, x21, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, xzr, x1, lt
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    mov x0, x20
-; CHECK-NEXT:    mov x1, x19
-; CHECK-NEXT:    ldp x20, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x7, x25, x9, gt
-; CHECK-NEXT:    ldp x24, x23, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp x30, x25, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT:    csinv x6, x8, xzr, le
-; CHECK-NEXT:    ldp d9, d8, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    add sp, sp, #96
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_v4f32_v4i100:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sub sp, sp, #96
+; CHECK-SD-NEXT:    stp d9, d8, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x30, x25, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x24, x23, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x22, x21, [sp, #64] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x20, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 96
+; CHECK-SD-NEXT:    .cfi_offset w19, -8
+; CHECK-SD-NEXT:    .cfi_offset w20, -16
+; CHECK-SD-NEXT:    .cfi_offset w21, -24
+; CHECK-SD-NEXT:    .cfi_offset w22, -32
+; CHECK-SD-NEXT:    .cfi_offset w23, -40
+; CHECK-SD-NEXT:    .cfi_offset w24, -48
+; CHECK-SD-NEXT:    .cfi_offset w25, -56
+; CHECK-SD-NEXT:    .cfi_offset w30, -64
+; CHECK-SD-NEXT:    .cfi_offset b8, -72
+; CHECK-SD-NEXT:    .cfi_offset b9, -80
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT:    bl __fixunssfti
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov w8, #1904214015 // =0x717fffff
+; CHECK-SD-NEXT:    mov x25, #68719476735 // =0xfffffffff
+; CHECK-SD-NEXT:    fmov s9, w8
+; CHECK-SD-NEXT:    mov s8, v0.s[1]
+; CHECK-SD-NEXT:    fcmp s0, #0.0
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x1, lt
+; CHECK-SD-NEXT:    fcmp s0, s9
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    csel x19, x25, x9, gt
+; CHECK-SD-NEXT:    csinv x20, x8, xzr, le
+; CHECK-SD-NEXT:    bl __fixunssfti
+; CHECK-SD-NEXT:    fcmp s8, #0.0
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    csel x21, x25, x9, gt
+; CHECK-SD-NEXT:    csinv x22, x8, xzr, le
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT:    bl __fixunssfti
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov s8, v0.s[1]
+; CHECK-SD-NEXT:    fcmp s0, #0.0
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x1, lt
+; CHECK-SD-NEXT:    fcmp s0, s9
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    csel x23, x25, x9, gt
+; CHECK-SD-NEXT:    csinv x24, x8, xzr, le
+; CHECK-SD-NEXT:    bl __fixunssfti
+; CHECK-SD-NEXT:    fcmp s8, #0.0
+; CHECK-SD-NEXT:    mov x2, x22
+; CHECK-SD-NEXT:    mov x3, x21
+; CHECK-SD-NEXT:    mov x4, x24
+; CHECK-SD-NEXT:    mov x5, x23
+; CHECK-SD-NEXT:    ldp x22, x21, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    mov x0, x20
+; CHECK-SD-NEXT:    mov x1, x19
+; CHECK-SD-NEXT:    ldp x20, x19, [sp, #80] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csel x7, x25, x9, gt
+; CHECK-SD-NEXT:    ldp x24, x23, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x30, x25, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csinv x6, x8, xzr, le
+; CHECK-SD-NEXT:    ldp d9, d8, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    add sp, sp, #96
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_v4f32_v4i100:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    sub sp, sp, #112
+; CHECK-GI-NEXT:    stp d11, d10, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp d9, d8, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp x30, x25, [sp, #48] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp x24, x23, [sp, #64] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp x22, x21, [sp, #80] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp x20, x19, [sp, #96] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 112
+; CHECK-GI-NEXT:    .cfi_offset w19, -8
+; CHECK-GI-NEXT:    .cfi_offset w20, -16
+; CHECK-GI-NEXT:    .cfi_offset w21, -24
+; CHECK-GI-NEXT:    .cfi_offset w22, -32
+; CHECK-GI-NEXT:    .cfi_offset w23, -40
+; CHECK-GI-NEXT:    .cfi_offset w24, -48
+; CHECK-GI-NEXT:    .cfi_offset w25, -56
+; CHECK-GI-NEXT:    .cfi_offset w30, -64
+; CHECK-GI-NEXT:    .cfi_offset b8, -72
+; CHECK-GI-NEXT:    .cfi_offset b9, -80
+; CHECK-GI-NEXT:    .cfi_offset b10, -88
+; CHECK-GI-NEXT:    .cfi_offset b11, -96
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    mov s8, v0.s[1]
+; CHECK-GI-NEXT:    mov s9, v0.s[2]
+; CHECK-GI-NEXT:    mov s10, v0.s[3]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-GI-NEXT:    bl __fixunssfti
+; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov w8, #1904214015 // =0x717fffff
+; CHECK-GI-NEXT:    mov x25, #68719476735 // =0xfffffffff
+; CHECK-GI-NEXT:    fmov s11, w8
+; CHECK-GI-NEXT:    fcmp s0, #0.0
+; CHECK-GI-NEXT:    csel x8, xzr, x0, lt
+; CHECK-GI-NEXT:    csel x9, xzr, x1, lt
+; CHECK-GI-NEXT:    fcmp s0, s11
+; CHECK-GI-NEXT:    fmov s0, s8
+; CHECK-GI-NEXT:    csinv x19, x8, xzr, le
+; CHECK-GI-NEXT:    csel x20, x25, x9, gt
+; CHECK-GI-NEXT:    bl __fixunssfti
+; CHECK-GI-NEXT:    fcmp s8, #0.0
+; CHECK-GI-NEXT:    fmov s0, s9
+; CHECK-GI-NEXT:    csel x8, xzr, x0, lt
+; CHECK-GI-NEXT:    csel x9, xzr, x1, lt
+; CHECK-GI-NEXT:    fcmp s8, s11
+; CHECK-GI-NEXT:    csinv x21, x8, xzr, le
+; CHECK-GI-NEXT:    csel x22, x25, x9, gt
+; CHECK-GI-NEXT:    bl __fixunssfti
+; CHECK-GI-NEXT:    fcmp s9, #0.0
+; CHECK-GI-NEXT:    fmov s0, s10
+; CHECK-GI-NEXT:    csel x8, xzr, x0, lt
+; CHECK-GI-NEXT:    csel x9, xzr, x1, lt
+; CHECK-GI-NEXT:    fcmp s9, s11
+; CHECK-GI-NEXT:    csinv x23, x8, xzr, le
+; CHECK-GI-NEXT:    csel x24, x25, x9, gt
+; CHECK-GI-NEXT:    bl __fixunssfti
+; CHECK-GI-NEXT:    fcmp s10, #0.0
+; CHECK-GI-NEXT:    mov x2, x21
+; CHECK-GI-NEXT:    mov x3, x22
+; CHECK-GI-NEXT:    mov x4, x23
+; CHECK-GI-NEXT:    mov x5, x24
+; CHECK-GI-NEXT:    ldp x22, x21, [sp, #80] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    csel x8, xzr, x0, lt
+; CHECK-GI-NEXT:    csel x9, xzr, x1, lt
+; CHECK-GI-NEXT:    fcmp s10, s11
+; CHECK-GI-NEXT:    mov x0, x19
+; CHECK-GI-NEXT:    mov x1, x20
+; CHECK-GI-NEXT:    ldp x20, x19, [sp, #96] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    csel x7, x25, x9, gt
+; CHECK-GI-NEXT:    ldp x24, x23, [sp, #64] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp x30, x25, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    csinv x6, x8, xzr, le
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d11, d10, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    add sp, sp, #112
+; CHECK-GI-NEXT:    ret
     %x = call <4 x i100> @llvm.fptoui.sat.v4f32.v4i100(<4 x float> %f)
     ret <4 x i100> %x
 }
 
 define <4 x i128> @test_unsigned_v4f32_v4i128(<4 x float> %f) {
-; CHECK-LABEL: test_unsigned_v4f32_v4i128:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #96
-; CHECK-NEXT:    stp d9, d8, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
-; CHECK-NEXT:    stp x24, x23, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x22, x21, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x20, x19, [sp, #80] // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 96
-; CHECK-NEXT:    .cfi_offset w19, -8
-; CHECK-NEXT:    .cfi_offset w20, -16
-; CHECK-NEXT:    .cfi_offset w21, -24
-; CHECK-NEXT:    .cfi_offset w22, -32
-; CHECK-NEXT:    .cfi_offset w23, -40
-; CHECK-NEXT:    .cfi_offset w24, -48
-; CHECK-NEXT:    .cfi_offset w30, -64
-; CHECK-NEXT:    .cfi_offset b8, -72
-; CHECK-NEXT:    .cfi_offset b9, -80
-; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    mov w8, #2139095039 // =0x7f7fffff
-; CHECK-NEXT:    fmov s9, w8
-; CHECK-NEXT:    mov s8, v0.s[1]
-; CHECK-NEXT:    fcmp s0, #0.0
-; CHECK-NEXT:    csel x8, xzr, x1, lt
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    fcmp s0, s9
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    csinv x19, x9, xzr, le
-; CHECK-NEXT:    csinv x20, x8, xzr, le
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    fcmp s8, #0.0
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    csel x8, xzr, x1, lt
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    csinv x21, x9, xzr, le
-; CHECK-NEXT:    csinv x22, x8, xzr, le
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    mov s8, v0.s[1]
-; CHECK-NEXT:    fcmp s0, #0.0
-; CHECK-NEXT:    csel x8, xzr, x1, lt
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    fcmp s0, s9
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    csinv x23, x9, xzr, le
-; CHECK-NEXT:    csinv x24, x8, xzr, le
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    fcmp s8, #0.0
-; CHECK-NEXT:    mov x2, x21
-; CHECK-NEXT:    mov x3, x22
-; CHECK-NEXT:    mov x4, x23
-; CHECK-NEXT:    mov x5, x24
-; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
-; CHECK-NEXT:    ldp x22, x21, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x8, xzr, x1, lt
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    mov x0, x19
-; CHECK-NEXT:    mov x1, x20
-; CHECK-NEXT:    ldp x20, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp x24, x23, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    csinv x6, x9, xzr, le
-; CHECK-NEXT:    ldp d9, d8, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    csinv x7, x8, xzr, le
-; CHECK-NEXT:    add sp, sp, #96
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_v4f32_v4i128:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sub sp, sp, #96
+; CHECK-SD-NEXT:    stp d9, d8, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    stp x24, x23, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x22, x21, [sp, #64] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x20, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 96
+; CHECK-SD-NEXT:    .cfi_offset w19, -8
+; CHECK-SD-NEXT:    .cfi_offset w20, -16
+; CHECK-SD-NEXT:    .cfi_offset w21, -24
+; CHECK-SD-NEXT:    .cfi_offset w22, -32
+; CHECK-SD-NEXT:    .cfi_offset w23, -40
+; CHECK-SD-NEXT:    .cfi_offset w24, -48
+; CHECK-SD-NEXT:    .cfi_offset w30, -64
+; CHECK-SD-NEXT:    .cfi_offset b8, -72
+; CHECK-SD-NEXT:    .cfi_offset b9, -80
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT:    bl __fixunssfti
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov w8, #2139095039 // =0x7f7fffff
+; CHECK-SD-NEXT:    fmov s9, w8
+; CHECK-SD-NEXT:    mov s8, v0.s[1]
+; CHECK-SD-NEXT:    fcmp s0, #0.0
+; CHECK-SD-NEXT:    csel x8, xzr, x1, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
+; CHECK-SD-NEXT:    fcmp s0, s9
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    csinv x19, x9, xzr, le
+; CHECK-SD-NEXT:    csinv x20, x8, xzr, le
+; CHECK-SD-NEXT:    bl __fixunssfti
+; CHECK-SD-NEXT:    fcmp s8, #0.0
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    csel x8, xzr, x1, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    csinv x21, x9, xzr, le
+; CHECK-SD-NEXT:    csinv x22, x8, xzr, le
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT:    bl __fixunssfti
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov s8, v0.s[1]
+; CHECK-SD-NEXT:    fcmp s0, #0.0
+; CHECK-SD-NEXT:    csel x8, xzr, x1, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
+; CHECK-SD-NEXT:    fcmp s0, s9
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    csinv x23, x9, xzr, le
+; CHECK-SD-NEXT:    csinv x24, x8, xzr, le
+; CHECK-SD-NEXT:    bl __fixunssfti
+; CHECK-SD-NEXT:    fcmp s8, #0.0
+; CHECK-SD-NEXT:    mov x2, x21
+; CHECK-SD-NEXT:    mov x3, x22
+; CHECK-SD-NEXT:    mov x4, x23
+; CHECK-SD-NEXT:    mov x5, x24
+; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x22, x21, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csel x8, xzr, x1, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    mov x0, x19
+; CHECK-SD-NEXT:    mov x1, x20
+; CHECK-SD-NEXT:    ldp x20, x19, [sp, #80] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x24, x23, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csinv x6, x9, xzr, le
+; CHECK-SD-NEXT:    ldp d9, d8, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csinv x7, x8, xzr, le
+; CHECK-SD-NEXT:    add sp, sp, #96
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_v4f32_v4i128:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    sub sp, sp, #112
+; CHECK-GI-NEXT:    stp d11, d10, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp d9, d8, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-GI-NEXT:    stp x24, x23, [sp, #64] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp x22, x21, [sp, #80] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp x20, x19, [sp, #96] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 112
+; CHECK-GI-NEXT:    .cfi_offset w19, -8
+; CHECK-GI-NEXT:    .cfi_offset w20, -16
+; CHECK-GI-NEXT:    .cfi_offset w21, -24
+; CHECK-GI-NEXT:    .cfi_offset w22, -32
+; CHECK-GI-NEXT:    .cfi_offset w23, -40
+; CHECK-GI-NEXT:    .cfi_offset w24, -48
+; CHECK-GI-NEXT:    .cfi_offset w30, -64
+; CHECK-GI-NEXT:    .cfi_offset b8, -72
+; CHECK-GI-NEXT:    .cfi_offset b9, -80
+; CHECK-GI-NEXT:    .cfi_offset b10, -88
+; CHECK-GI-NEXT:    .cfi_offset b11, -96
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    mov s8, v0.s[1]
+; CHECK-GI-NEXT:    mov s9, v0.s[2]
+; CHECK-GI-NEXT:    mov s10, v0.s[3]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-GI-NEXT:    bl __fixunssfti
+; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov w8, #2139095039 // =0x7f7fffff
+; CHECK-GI-NEXT:    fmov s11, w8
+; CHECK-GI-NEXT:    fcmp s0, #0.0
+; CHECK-GI-NEXT:    csel x8, xzr, x0, lt
+; CHECK-GI-NEXT:    csel x9, xzr, x1, lt
+; CHECK-GI-NEXT:    fcmp s0, s11
+; CHECK-GI-NEXT:    fmov s0, s8
+; CHECK-GI-NEXT:    csinv x19, x8, xzr, le
+; CHECK-GI-NEXT:    csinv x20, x9, xzr, le
+; CHECK-GI-NEXT:    bl __fixunssfti
+; CHECK-GI-NEXT:    fcmp s8, #0.0
+; CHECK-GI-NEXT:    fmov s0, s9
+; CHECK-GI-NEXT:    csel x8, xzr, x0, lt
+; CHECK-GI-NEXT:    csel x9, xzr, x1, lt
+; CHECK-GI-NEXT:    fcmp s8, s11
+; CHECK-GI-NEXT:    csinv x21, x8, xzr, le
+; CHECK-GI-NEXT:    csinv x22, x9, xzr, le
+; CHECK-GI-NEXT:    bl __fixunssfti
+; CHECK-GI-NEXT:    fcmp s9, #0.0
+; CHECK-GI-NEXT:    fmov s0, s10
+; CHECK-GI-NEXT:    csel x8, xzr, x0, lt
+; CHECK-GI-NEXT:    csel x9, xzr, x1, lt
+; CHECK-GI-NEXT:    fcmp s9, s11
+; CHECK-GI-NEXT:    csinv x23, x8, xzr, le
+; CHECK-GI-NEXT:    csinv x24, x9, xzr, le
+; CHECK-GI-NEXT:    bl __fixunssfti
+; CHECK-GI-NEXT:    fcmp s10, #0.0
+; CHECK-GI-NEXT:    mov x2, x21
+; CHECK-GI-NEXT:    mov x3, x22
+; CHECK-GI-NEXT:    mov x4, x23
+; CHECK-GI-NEXT:    mov x5, x24
+; CHECK-GI-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ldp x22, x21, [sp, #80] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    csel x8, xzr, x0, lt
+; CHECK-GI-NEXT:    csel x9, xzr, x1, lt
+; CHECK-GI-NEXT:    fcmp s10, s11
+; CHECK-GI-NEXT:    mov x0, x19
+; CHECK-GI-NEXT:    mov x1, x20
+; CHECK-GI-NEXT:    ldp x20, x19, [sp, #96] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp x24, x23, [sp, #64] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    csinv x6, x8, xzr, le
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    csinv x7, x9, xzr, le
+; CHECK-GI-NEXT:    ldp d11, d10, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    add sp, sp, #112
+; CHECK-GI-NEXT:    ret
     %x = call <4 x i128> @llvm.fptoui.sat.v4f32.v4i128(<4 x float> %f)
     ret <4 x i128> %x
 }
@@ -1097,127 +2016,193 @@ declare <2 x i100> @llvm.fptoui.sat.v2f64.v2i100(<2 x double>)
 declare <2 x i128> @llvm.fptoui.sat.v2f64.v2i128(<2 x double>)
 
 define <2 x i1> @test_unsigned_v2f64_v2i1(<2 x double> %f) {
-; CHECK-LABEL: test_unsigned_v2f64_v2i1:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov d1, v0.d[1]
-; CHECK-NEXT:    fcvtzu w9, d0
-; CHECK-NEXT:    fcvtzu w8, d1
-; CHECK-NEXT:    cmp w8, #1
-; CHECK-NEXT:    csinc w8, w8, wzr, lo
-; CHECK-NEXT:    cmp w9, #1
-; CHECK-NEXT:    csinc w9, w9, wzr, lo
-; CHECK-NEXT:    fmov s0, w9
-; CHECK-NEXT:    mov v0.s[1], w8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_v2f64_v2i1:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov d1, v0.d[1]
+; CHECK-SD-NEXT:    fcvtzu w9, d0
+; CHECK-SD-NEXT:    fcvtzu w8, d1
+; CHECK-SD-NEXT:    cmp w8, #1
+; CHECK-SD-NEXT:    csinc w8, w8, wzr, lo
+; CHECK-SD-NEXT:    cmp w9, #1
+; CHECK-SD-NEXT:    csinc w9, w9, wzr, lo
+; CHECK-SD-NEXT:    fmov s0, w9
+; CHECK-SD-NEXT:    mov v0.s[1], w8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_v2f64_v2i1:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-GI-NEXT:    adrp x8, .LCPI46_0
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI46_0]
+; CHECK-GI-NEXT:    cmhi v2.2d, v1.2d, v0.2d
+; CHECK-GI-NEXT:    bif v0.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT:    xtn v0.2s, v0.2d
+; CHECK-GI-NEXT:    ret
     %x = call <2 x i1> @llvm.fptoui.sat.v2f64.v2i1(<2 x double> %f)
     ret <2 x i1> %x
 }
 
 define <2 x i8> @test_unsigned_v2f64_v2i8(<2 x double> %f) {
-; CHECK-LABEL: test_unsigned_v2f64_v2i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov d1, v0.d[1]
-; CHECK-NEXT:    fcvtzu w10, d0
-; CHECK-NEXT:    mov w8, #255 // =0xff
-; CHECK-NEXT:    fcvtzu w9, d1
-; CHECK-NEXT:    cmp w9, #255
-; CHECK-NEXT:    csel w9, w9, w8, lo
-; CHECK-NEXT:    cmp w10, #255
-; CHECK-NEXT:    csel w8, w10, w8, lo
-; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    mov v0.s[1], w9
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_v2f64_v2i8:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov d1, v0.d[1]
+; CHECK-SD-NEXT:    fcvtzu w10, d0
+; CHECK-SD-NEXT:    mov w8, #255 // =0xff
+; CHECK-SD-NEXT:    fcvtzu w9, d1
+; CHECK-SD-NEXT:    cmp w9, #255
+; CHECK-SD-NEXT:    csel w9, w9, w8, lo
+; CHECK-SD-NEXT:    cmp w10, #255
+; CHECK-SD-NEXT:    csel w8, w10, w8, lo
+; CHECK-SD-NEXT:    fmov s0, w8
+; CHECK-SD-NEXT:    mov v0.s[1], w9
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_v2f64_v2i8:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v1.2d, #0x000000000000ff
+; CHECK-GI-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-GI-NEXT:    cmhi v2.2d, v1.2d, v0.2d
+; CHECK-GI-NEXT:    bif v0.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT:    xtn v0.2s, v0.2d
+; CHECK-GI-NEXT:    ret
     %x = call <2 x i8> @llvm.fptoui.sat.v2f64.v2i8(<2 x double> %f)
     ret <2 x i8> %x
 }
 
 define <2 x i13> @test_unsigned_v2f64_v2i13(<2 x double> %f) {
-; CHECK-LABEL: test_unsigned_v2f64_v2i13:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov d1, v0.d[1]
-; CHECK-NEXT:    fcvtzu w9, d0
-; CHECK-NEXT:    mov w10, #8191 // =0x1fff
-; CHECK-NEXT:    fcvtzu w8, d1
-; CHECK-NEXT:    cmp w8, w10
-; CHECK-NEXT:    csel w8, w8, w10, lo
-; CHECK-NEXT:    cmp w9, w10
-; CHECK-NEXT:    csel w9, w9, w10, lo
-; CHECK-NEXT:    fmov s0, w9
-; CHECK-NEXT:    mov v0.s[1], w8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_v2f64_v2i13:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov d1, v0.d[1]
+; CHECK-SD-NEXT:    fcvtzu w9, d0
+; CHECK-SD-NEXT:    mov w10, #8191 // =0x1fff
+; CHECK-SD-NEXT:    fcvtzu w8, d1
+; CHECK-SD-NEXT:    cmp w8, w10
+; CHECK-SD-NEXT:    csel w8, w8, w10, lo
+; CHECK-SD-NEXT:    cmp w9, w10
+; CHECK-SD-NEXT:    csel w9, w9, w10, lo
+; CHECK-SD-NEXT:    fmov s0, w9
+; CHECK-SD-NEXT:    mov v0.s[1], w8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_v2f64_v2i13:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-GI-NEXT:    adrp x8, .LCPI48_0
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI48_0]
+; CHECK-GI-NEXT:    cmhi v2.2d, v1.2d, v0.2d
+; CHECK-GI-NEXT:    bif v0.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT:    xtn v0.2s, v0.2d
+; CHECK-GI-NEXT:    ret
     %x = call <2 x i13> @llvm.fptoui.sat.v2f64.v2i13(<2 x double> %f)
     ret <2 x i13> %x
 }
 
 define <2 x i16> @test_unsigned_v2f64_v2i16(<2 x double> %f) {
-; CHECK-LABEL: test_unsigned_v2f64_v2i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov d1, v0.d[1]
-; CHECK-NEXT:    fcvtzu w9, d0
-; CHECK-NEXT:    mov w10, #65535 // =0xffff
-; CHECK-NEXT:    fcvtzu w8, d1
-; CHECK-NEXT:    cmp w8, w10
-; CHECK-NEXT:    csel w8, w8, w10, lo
-; CHECK-NEXT:    cmp w9, w10
-; CHECK-NEXT:    csel w9, w9, w10, lo
-; CHECK-NEXT:    fmov s0, w9
-; CHECK-NEXT:    mov v0.s[1], w8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_v2f64_v2i16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov d1, v0.d[1]
+; CHECK-SD-NEXT:    fcvtzu w9, d0
+; CHECK-SD-NEXT:    mov w10, #65535 // =0xffff
+; CHECK-SD-NEXT:    fcvtzu w8, d1
+; CHECK-SD-NEXT:    cmp w8, w10
+; CHECK-SD-NEXT:    csel w8, w8, w10, lo
+; CHECK-SD-NEXT:    cmp w9, w10
+; CHECK-SD-NEXT:    csel w9, w9, w10, lo
+; CHECK-SD-NEXT:    fmov s0, w9
+; CHECK-SD-NEXT:    mov v0.s[1], w8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_v2f64_v2i16:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v1.2d, #0x0000000000ffff
+; CHECK-GI-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-GI-NEXT:    cmhi v2.2d, v1.2d, v0.2d
+; CHECK-GI-NEXT:    bif v0.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT:    xtn v0.2s, v0.2d
+; CHECK-GI-NEXT:    ret
     %x = call <2 x i16> @llvm.fptoui.sat.v2f64.v2i16(<2 x double> %f)
     ret <2 x i16> %x
 }
 
 define <2 x i19> @test_unsigned_v2f64_v2i19(<2 x double> %f) {
-; CHECK-LABEL: test_unsigned_v2f64_v2i19:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov d1, v0.d[1]
-; CHECK-NEXT:    fcvtzu w9, d0
-; CHECK-NEXT:    mov w10, #524287 // =0x7ffff
-; CHECK-NEXT:    fcvtzu w8, d1
-; CHECK-NEXT:    cmp w8, w10
-; CHECK-NEXT:    csel w8, w8, w10, lo
-; CHECK-NEXT:    cmp w9, w10
-; CHECK-NEXT:    csel w9, w9, w10, lo
-; CHECK-NEXT:    fmov s0, w9
-; CHECK-NEXT:    mov v0.s[1], w8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_v2f64_v2i19:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov d1, v0.d[1]
+; CHECK-SD-NEXT:    fcvtzu w9, d0
+; CHECK-SD-NEXT:    mov w10, #524287 // =0x7ffff
+; CHECK-SD-NEXT:    fcvtzu w8, d1
+; CHECK-SD-NEXT:    cmp w8, w10
+; CHECK-SD-NEXT:    csel w8, w8, w10, lo
+; CHECK-SD-NEXT:    cmp w9, w10
+; CHECK-SD-NEXT:    csel w9, w9, w10, lo
+; CHECK-SD-NEXT:    fmov s0, w9
+; CHECK-SD-NEXT:    mov v0.s[1], w8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_v2f64_v2i19:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-GI-NEXT:    adrp x8, .LCPI50_0
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI50_0]
+; CHECK-GI-NEXT:    cmhi v2.2d, v1.2d, v0.2d
+; CHECK-GI-NEXT:    bif v0.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT:    xtn v0.2s, v0.2d
+; CHECK-GI-NEXT:    ret
     %x = call <2 x i19> @llvm.fptoui.sat.v2f64.v2i19(<2 x double> %f)
     ret <2 x i19> %x
 }
 
 define <2 x i32> @test_unsigned_v2f64_v2i32_duplicate(<2 x double> %f) {
-; CHECK-LABEL: test_unsigned_v2f64_v2i32_duplicate:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov d1, v0.d[1]
-; CHECK-NEXT:    fcvtzu w8, d0
-; CHECK-NEXT:    fcvtzu w9, d1
-; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    mov v0.s[1], w9
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_v2f64_v2i32_duplicate:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov d1, v0.d[1]
+; CHECK-SD-NEXT:    fcvtzu w8, d0
+; CHECK-SD-NEXT:    fcvtzu w9, d1
+; CHECK-SD-NEXT:    fmov s0, w8
+; CHECK-SD-NEXT:    mov v0.s[1], w9
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_v2f64_v2i32_duplicate:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v1.2d, #0x000000ffffffff
+; CHECK-GI-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-GI-NEXT:    cmhi v2.2d, v1.2d, v0.2d
+; CHECK-GI-NEXT:    bif v0.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT:    xtn v0.2s, v0.2d
+; CHECK-GI-NEXT:    ret
     %x = call <2 x i32> @llvm.fptoui.sat.v2f64.v2i32(<2 x double> %f)
     ret <2 x i32> %x
 }
 
 define <2 x i50> @test_unsigned_v2f64_v2i50(<2 x double> %f) {
-; CHECK-LABEL: test_unsigned_v2f64_v2i50:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov d1, v0.d[1]
-; CHECK-NEXT:    fcvtzu x9, d0
-; CHECK-NEXT:    mov x10, #1125899906842623 // =0x3ffffffffffff
-; CHECK-NEXT:    fcvtzu x8, d1
-; CHECK-NEXT:    cmp x8, x10
-; CHECK-NEXT:    csel x8, x8, x10, lo
-; CHECK-NEXT:    cmp x9, x10
-; CHECK-NEXT:    csel x9, x9, x10, lo
-; CHECK-NEXT:    fmov d0, x9
-; CHECK-NEXT:    mov v0.d[1], x8
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_v2f64_v2i50:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov d1, v0.d[1]
+; CHECK-SD-NEXT:    fcvtzu x9, d0
+; CHECK-SD-NEXT:    mov x10, #1125899906842623 // =0x3ffffffffffff
+; CHECK-SD-NEXT:    fcvtzu x8, d1
+; CHECK-SD-NEXT:    cmp x8, x10
+; CHECK-SD-NEXT:    csel x8, x8, x10, lo
+; CHECK-SD-NEXT:    cmp x9, x10
+; CHECK-SD-NEXT:    csel x9, x9, x10, lo
+; CHECK-SD-NEXT:    fmov d0, x9
+; CHECK-SD-NEXT:    mov v0.d[1], x8
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_v2f64_v2i50:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-GI-NEXT:    adrp x8, .LCPI52_0
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI52_0]
+; CHECK-GI-NEXT:    cmhi v2.2d, v1.2d, v0.2d
+; CHECK-GI-NEXT:    bif v0.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT:    ret
     %x = call <2 x i50> @llvm.fptoui.sat.v2f64.v2i50(<2 x double> %f)
     ret <2 x i50> %x
 }
@@ -1232,93 +2217,177 @@ define <2 x i64> @test_unsigned_v2f64_v2i64(<2 x double> %f) {
 }
 
 define <2 x i100> @test_unsigned_v2f64_v2i100(<2 x double> %f) {
-; CHECK-LABEL: test_unsigned_v2f64_v2i100:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #64
-; CHECK-NEXT:    stp d9, d8, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x30, x21, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 64
-; CHECK-NEXT:    .cfi_offset w19, -8
-; CHECK-NEXT:    .cfi_offset w20, -16
-; CHECK-NEXT:    .cfi_offset w21, -24
-; CHECK-NEXT:    .cfi_offset w30, -32
-; CHECK-NEXT:    .cfi_offset b8, -40
-; CHECK-NEXT:    .cfi_offset b9, -48
-; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT:    bl __fixunsdfti
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    mov x8, #5057542381537067007 // =0x462fffffffffffff
-; CHECK-NEXT:    mov x21, #68719476735 // =0xfffffffff
-; CHECK-NEXT:    fmov d9, x8
-; CHECK-NEXT:    mov d8, v0.d[1]
-; CHECK-NEXT:    fcmp d0, #0.0
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, xzr, x1, lt
-; CHECK-NEXT:    fcmp d0, d9
-; CHECK-NEXT:    fmov d0, d8
-; CHECK-NEXT:    csel x19, x21, x9, gt
-; CHECK-NEXT:    csinv x20, x8, xzr, le
-; CHECK-NEXT:    bl __fixunsdfti
-; CHECK-NEXT:    fcmp d8, #0.0
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, xzr, x1, lt
-; CHECK-NEXT:    fcmp d8, d9
-; CHECK-NEXT:    mov x0, x20
-; CHECK-NEXT:    mov x1, x19
-; CHECK-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x3, x21, x9, gt
-; CHECK-NEXT:    ldp x30, x21, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp d9, d8, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    csinv x2, x8, xzr, le
-; CHECK-NEXT:    add sp, sp, #64
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_v2f64_v2i100:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sub sp, sp, #64
+; CHECK-SD-NEXT:    stp d9, d8, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x30, x21, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-SD-NEXT:    .cfi_offset w19, -8
+; CHECK-SD-NEXT:    .cfi_offset w20, -16
+; CHECK-SD-NEXT:    .cfi_offset w21, -24
+; CHECK-SD-NEXT:    .cfi_offset w30, -32
+; CHECK-SD-NEXT:    .cfi_offset b8, -40
+; CHECK-SD-NEXT:    .cfi_offset b9, -48
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    bl __fixunsdfti
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov x8, #5057542381537067007 // =0x462fffffffffffff
+; CHECK-SD-NEXT:    mov x21, #68719476735 // =0xfffffffff
+; CHECK-SD-NEXT:    fmov d9, x8
+; CHECK-SD-NEXT:    mov d8, v0.d[1]
+; CHECK-SD-NEXT:    fcmp d0, #0.0
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x1, lt
+; CHECK-SD-NEXT:    fcmp d0, d9
+; CHECK-SD-NEXT:    fmov d0, d8
+; CHECK-SD-NEXT:    csel x19, x21, x9, gt
+; CHECK-SD-NEXT:    csinv x20, x8, xzr, le
+; CHECK-SD-NEXT:    bl __fixunsdfti
+; CHECK-SD-NEXT:    fcmp d8, #0.0
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x1, lt
+; CHECK-SD-NEXT:    fcmp d8, d9
+; CHECK-SD-NEXT:    mov x0, x20
+; CHECK-SD-NEXT:    mov x1, x19
+; CHECK-SD-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csel x3, x21, x9, gt
+; CHECK-SD-NEXT:    ldp x30, x21, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp d9, d8, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csinv x2, x8, xzr, le
+; CHECK-SD-NEXT:    add sp, sp, #64
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_v2f64_v2i100:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    sub sp, sp, #64
+; CHECK-GI-NEXT:    stp d9, d8, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp x30, x21, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-GI-NEXT:    .cfi_offset w19, -8
+; CHECK-GI-NEXT:    .cfi_offset w20, -16
+; CHECK-GI-NEXT:    .cfi_offset w21, -24
+; CHECK-GI-NEXT:    .cfi_offset w30, -32
+; CHECK-GI-NEXT:    .cfi_offset b8, -40
+; CHECK-GI-NEXT:    .cfi_offset b9, -48
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    mov d8, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    bl __fixunsdfti
+; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov x8, #5057542381537067007 // =0x462fffffffffffff
+; CHECK-GI-NEXT:    mov x21, #68719476735 // =0xfffffffff
+; CHECK-GI-NEXT:    fmov d9, x8
+; CHECK-GI-NEXT:    fcmp d0, #0.0
+; CHECK-GI-NEXT:    csel x8, xzr, x0, lt
+; CHECK-GI-NEXT:    csel x9, xzr, x1, lt
+; CHECK-GI-NEXT:    fcmp d0, d9
+; CHECK-GI-NEXT:    fmov d0, d8
+; CHECK-GI-NEXT:    csinv x19, x8, xzr, le
+; CHECK-GI-NEXT:    csel x20, x21, x9, gt
+; CHECK-GI-NEXT:    bl __fixunsdfti
+; CHECK-GI-NEXT:    fcmp d8, #0.0
+; CHECK-GI-NEXT:    csel x8, xzr, x0, lt
+; CHECK-GI-NEXT:    csel x9, xzr, x1, lt
+; CHECK-GI-NEXT:    fcmp d8, d9
+; CHECK-GI-NEXT:    mov x0, x19
+; CHECK-GI-NEXT:    mov x1, x20
+; CHECK-GI-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    csel x3, x21, x9, gt
+; CHECK-GI-NEXT:    ldp x30, x21, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    csinv x2, x8, xzr, le
+; CHECK-GI-NEXT:    add sp, sp, #64
+; CHECK-GI-NEXT:    ret
     %x = call <2 x i100> @llvm.fptoui.sat.v2f64.v2i100(<2 x double> %f)
     ret <2 x i100> %x
 }
 
 define <2 x i128> @test_unsigned_v2f64_v2i128(<2 x double> %f) {
-; CHECK-LABEL: test_unsigned_v2f64_v2i128:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #64
-; CHECK-NEXT:    stp d9, d8, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
-; CHECK-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 64
-; CHECK-NEXT:    .cfi_offset w19, -8
-; CHECK-NEXT:    .cfi_offset w20, -16
-; CHECK-NEXT:    .cfi_offset w30, -32
-; CHECK-NEXT:    .cfi_offset b8, -40
-; CHECK-NEXT:    .cfi_offset b9, -48
-; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT:    bl __fixunsdfti
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    mov x8, #5183643171103440895 // =0x47efffffffffffff
-; CHECK-NEXT:    fmov d9, x8
-; CHECK-NEXT:    mov d8, v0.d[1]
-; CHECK-NEXT:    fcmp d0, #0.0
-; CHECK-NEXT:    csel x8, xzr, x1, lt
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    fcmp d0, d9
-; CHECK-NEXT:    fmov d0, d8
-; CHECK-NEXT:    csinv x19, x9, xzr, le
-; CHECK-NEXT:    csinv x20, x8, xzr, le
-; CHECK-NEXT:    bl __fixunsdfti
-; CHECK-NEXT:    fcmp d8, #0.0
-; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
-; CHECK-NEXT:    csel x8, xzr, x1, lt
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    fcmp d8, d9
-; CHECK-NEXT:    mov x0, x19
-; CHECK-NEXT:    mov x1, x20
-; CHECK-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp d9, d8, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    csinv x2, x9, xzr, le
-; CHECK-NEXT:    csinv x3, x8, xzr, le
-; CHECK-NEXT:    add sp, sp, #64
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_v2f64_v2i128:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sub sp, sp, #64
+; CHECK-SD-NEXT:    stp d9, d8, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-SD-NEXT:    .cfi_offset w19, -8
+; CHECK-SD-NEXT:    .cfi_offset w20, -16
+; CHECK-SD-NEXT:    .cfi_offset w30, -32
+; CHECK-SD-NEXT:    .cfi_offset b8, -40
+; CHECK-SD-NEXT:    .cfi_offset b9, -48
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    bl __fixunsdfti
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov x8, #5183643171103440895 // =0x47efffffffffffff
+; CHECK-SD-NEXT:    fmov d9, x8
+; CHECK-SD-NEXT:    mov d8, v0.d[1]
+; CHECK-SD-NEXT:    fcmp d0, #0.0
+; CHECK-SD-NEXT:    csel x8, xzr, x1, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
+; CHECK-SD-NEXT:    fcmp d0, d9
+; CHECK-SD-NEXT:    fmov d0, d8
+; CHECK-SD-NEXT:    csinv x19, x9, xzr, le
+; CHECK-SD-NEXT:    csinv x20, x8, xzr, le
+; CHECK-SD-NEXT:    bl __fixunsdfti
+; CHECK-SD-NEXT:    fcmp d8, #0.0
+; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    csel x8, xzr, x1, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
+; CHECK-SD-NEXT:    fcmp d8, d9
+; CHECK-SD-NEXT:    mov x0, x19
+; CHECK-SD-NEXT:    mov x1, x20
+; CHECK-SD-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp d9, d8, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csinv x2, x9, xzr, le
+; CHECK-SD-NEXT:    csinv x3, x8, xzr, le
+; CHECK-SD-NEXT:    add sp, sp, #64
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_v2f64_v2i128:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    sub sp, sp, #64
+; CHECK-GI-NEXT:    stp d9, d8, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-GI-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-GI-NEXT:    .cfi_offset w19, -8
+; CHECK-GI-NEXT:    .cfi_offset w20, -16
+; CHECK-GI-NEXT:    .cfi_offset w30, -32
+; CHECK-GI-NEXT:    .cfi_offset b8, -40
+; CHECK-GI-NEXT:    .cfi_offset b9, -48
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    mov d8, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    bl __fixunsdfti
+; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov x8, #5183643171103440895 // =0x47efffffffffffff
+; CHECK-GI-NEXT:    fmov d9, x8
+; CHECK-GI-NEXT:    fcmp d0, #0.0
+; CHECK-GI-NEXT:    csel x8, xzr, x0, lt
+; CHECK-GI-NEXT:    csel x9, xzr, x1, lt
+; CHECK-GI-NEXT:    fcmp d0, d9
+; CHECK-GI-NEXT:    fmov d0, d8
+; CHECK-GI-NEXT:    csinv x19, x8, xzr, le
+; CHECK-GI-NEXT:    csinv x20, x9, xzr, le
+; CHECK-GI-NEXT:    bl __fixunsdfti
+; CHECK-GI-NEXT:    fcmp d8, #0.0
+; CHECK-GI-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    csel x8, xzr, x0, lt
+; CHECK-GI-NEXT:    csel x9, xzr, x1, lt
+; CHECK-GI-NEXT:    fcmp d8, d9
+; CHECK-GI-NEXT:    mov x0, x19
+; CHECK-GI-NEXT:    mov x1, x20
+; CHECK-GI-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    csinv x2, x8, xzr, le
+; CHECK-GI-NEXT:    csinv x3, x9, xzr, le
+; CHECK-GI-NEXT:    add sp, sp, #64
+; CHECK-GI-NEXT:    ret
     %x = call <2 x i128> @llvm.fptoui.sat.v2f64.v2i128(<2 x double> %f)
     ret <2 x i128> %x
 }
@@ -1338,77 +2407,139 @@ declare <4 x i100> @llvm.fptoui.sat.v4f16.v4i100(<4 x half>)
 declare <4 x i128> @llvm.fptoui.sat.v4f16.v4i128(<4 x half>)
 
 define <4 x i1> @test_unsigned_v4f16_v4i1(<4 x half> %f) {
-; CHECK-CVT-LABEL: test_unsigned_v4f16_v4i1:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT:    movi v1.4s, #1
-; CHECK-CVT-NEXT:    fcvtzu v0.4s, v0.4s
-; CHECK-CVT-NEXT:    umin v0.4s, v0.4s, v1.4s
-; CHECK-CVT-NEXT:    xtn v0.4h, v0.4s
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_unsigned_v4f16_v4i1:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    movi v1.4h, #1
-; CHECK-FP16-NEXT:    fcvtzu v0.4h, v0.4h
-; CHECK-FP16-NEXT:    umin v0.4h, v0.4h, v1.4h
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_unsigned_v4f16_v4i1:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-SD-CVT-NEXT:    movi v1.4s, #1
+; CHECK-SD-CVT-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-SD-CVT-NEXT:    umin v0.4s, v0.4s, v1.4s
+; CHECK-SD-CVT-NEXT:    xtn v0.4h, v0.4s
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_unsigned_v4f16_v4i1:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    movi v1.4h, #1
+; CHECK-SD-FP16-NEXT:    fcvtzu v0.4h, v0.4h
+; CHECK-SD-FP16-NEXT:    umin v0.4h, v0.4h, v1.4h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_v4f16_v4i1:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-GI-CVT-NEXT:    movi v1.4s, #1
+; CHECK-GI-CVT-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-GI-CVT-NEXT:    umin v0.4s, v0.4s, v1.4s
+; CHECK-GI-CVT-NEXT:    xtn v0.4h, v0.4s
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_unsigned_v4f16_v4i1:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    movi v1.4h, #1
+; CHECK-GI-FP16-NEXT:    fcvtzu v0.4h, v0.4h
+; CHECK-GI-FP16-NEXT:    umin v0.4h, v0.4h, v1.4h
+; CHECK-GI-FP16-NEXT:    ret
     %x = call <4 x i1> @llvm.fptoui.sat.v4f16.v4i1(<4 x half> %f)
     ret <4 x i1> %x
 }
 
 define <4 x i8> @test_unsigned_v4f16_v4i8(<4 x half> %f) {
-; CHECK-CVT-LABEL: test_unsigned_v4f16_v4i8:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT:    movi v1.2d, #0x0000ff000000ff
-; CHECK-CVT-NEXT:    fcvtzu v0.4s, v0.4s
-; CHECK-CVT-NEXT:    umin v0.4s, v0.4s, v1.4s
-; CHECK-CVT-NEXT:    xtn v0.4h, v0.4s
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_unsigned_v4f16_v4i8:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    movi d1, #0xff00ff00ff00ff
-; CHECK-FP16-NEXT:    fcvtzu v0.4h, v0.4h
-; CHECK-FP16-NEXT:    umin v0.4h, v0.4h, v1.4h
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_unsigned_v4f16_v4i8:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-SD-CVT-NEXT:    movi v1.2d, #0x0000ff000000ff
+; CHECK-SD-CVT-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-SD-CVT-NEXT:    umin v0.4s, v0.4s, v1.4s
+; CHECK-SD-CVT-NEXT:    xtn v0.4h, v0.4s
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_unsigned_v4f16_v4i8:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    movi d1, #0xff00ff00ff00ff
+; CHECK-SD-FP16-NEXT:    fcvtzu v0.4h, v0.4h
+; CHECK-SD-FP16-NEXT:    umin v0.4h, v0.4h, v1.4h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_v4f16_v4i8:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-GI-CVT-NEXT:    movi v1.2d, #0x0000ff000000ff
+; CHECK-GI-CVT-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-GI-CVT-NEXT:    umin v0.4s, v0.4s, v1.4s
+; CHECK-GI-CVT-NEXT:    xtn v0.4h, v0.4s
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_unsigned_v4f16_v4i8:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    movi d1, #0xff00ff00ff00ff
+; CHECK-GI-FP16-NEXT:    fcvtzu v0.4h, v0.4h
+; CHECK-GI-FP16-NEXT:    umin v0.4h, v0.4h, v1.4h
+; CHECK-GI-FP16-NEXT:    ret
     %x = call <4 x i8> @llvm.fptoui.sat.v4f16.v4i8(<4 x half> %f)
     ret <4 x i8> %x
 }
 
 define <4 x i13> @test_unsigned_v4f16_v4i13(<4 x half> %f) {
-; CHECK-CVT-LABEL: test_unsigned_v4f16_v4i13:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT:    movi v1.4s, #31, msl #8
-; CHECK-CVT-NEXT:    fcvtzu v0.4s, v0.4s
-; CHECK-CVT-NEXT:    umin v0.4s, v0.4s, v1.4s
-; CHECK-CVT-NEXT:    xtn v0.4h, v0.4s
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_unsigned_v4f16_v4i13:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzu v0.4h, v0.4h
-; CHECK-FP16-NEXT:    mvni v1.4h, #224, lsl #8
-; CHECK-FP16-NEXT:    umin v0.4h, v0.4h, v1.4h
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_unsigned_v4f16_v4i13:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-SD-CVT-NEXT:    movi v1.4s, #31, msl #8
+; CHECK-SD-CVT-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-SD-CVT-NEXT:    umin v0.4s, v0.4s, v1.4s
+; CHECK-SD-CVT-NEXT:    xtn v0.4h, v0.4s
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_unsigned_v4f16_v4i13:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzu v0.4h, v0.4h
+; CHECK-SD-FP16-NEXT:    mvni v1.4h, #224, lsl #8
+; CHECK-SD-FP16-NEXT:    umin v0.4h, v0.4h, v1.4h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_v4f16_v4i13:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-GI-CVT-NEXT:    movi v1.4s, #31, msl #8
+; CHECK-GI-CVT-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-GI-CVT-NEXT:    umin v0.4s, v0.4s, v1.4s
+; CHECK-GI-CVT-NEXT:    xtn v0.4h, v0.4s
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_unsigned_v4f16_v4i13:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzu v0.4h, v0.4h
+; CHECK-GI-FP16-NEXT:    mvni v1.4h, #224, lsl #8
+; CHECK-GI-FP16-NEXT:    umin v0.4h, v0.4h, v1.4h
+; CHECK-GI-FP16-NEXT:    ret
     %x = call <4 x i13> @llvm.fptoui.sat.v4f16.v4i13(<4 x half> %f)
     ret <4 x i13> %x
 }
 
 define <4 x i16> @test_unsigned_v4f16_v4i16(<4 x half> %f) {
-; CHECK-CVT-LABEL: test_unsigned_v4f16_v4i16:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT:    fcvtzu v0.4s, v0.4s
-; CHECK-CVT-NEXT:    uqxtn v0.4h, v0.4s
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_unsigned_v4f16_v4i16:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzu v0.4h, v0.4h
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_unsigned_v4f16_v4i16:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-SD-CVT-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-SD-CVT-NEXT:    uqxtn v0.4h, v0.4s
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_unsigned_v4f16_v4i16:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzu v0.4h, v0.4h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_v4f16_v4i16:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-GI-CVT-NEXT:    movi v1.2d, #0x00ffff0000ffff
+; CHECK-GI-CVT-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-GI-CVT-NEXT:    umin v0.4s, v0.4s, v1.4s
+; CHECK-GI-CVT-NEXT:    xtn v0.4h, v0.4s
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_unsigned_v4f16_v4i16:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzu v0.4h, v0.4h
+; CHECK-GI-FP16-NEXT:    ret
     %x = call <4 x i16> @llvm.fptoui.sat.v4f16.v4i16(<4 x half> %f)
     ret <4 x i16> %x
 }
@@ -1436,257 +2567,400 @@ define <4 x i32> @test_unsigned_v4f16_v4i32_duplicate(<4 x half> %f) {
 }
 
 define <4 x i50> @test_unsigned_v4f16_v4i50(<4 x half> %f) {
-; CHECK-CVT-LABEL: test_unsigned_v4f16_v4i50:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-CVT-NEXT:    mov h1, v0.h[1]
-; CHECK-CVT-NEXT:    mov h2, v0.h[2]
-; CHECK-CVT-NEXT:    mov x8, #1125899906842623 // =0x3ffffffffffff
-; CHECK-CVT-NEXT:    mov h3, v0.h[3]
-; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    fcvt s1, h1
-; CHECK-CVT-NEXT:    fcvt s2, h2
-; CHECK-CVT-NEXT:    fcvt s3, h3
-; CHECK-CVT-NEXT:    fcvtzu x9, s0
-; CHECK-CVT-NEXT:    fcvtzu x10, s1
-; CHECK-CVT-NEXT:    fcvtzu x11, s2
-; CHECK-CVT-NEXT:    fcvtzu x12, s3
-; CHECK-CVT-NEXT:    cmp x9, x8
-; CHECK-CVT-NEXT:    csel x0, x9, x8, lo
-; CHECK-CVT-NEXT:    cmp x10, x8
-; CHECK-CVT-NEXT:    csel x1, x10, x8, lo
-; CHECK-CVT-NEXT:    cmp x11, x8
-; CHECK-CVT-NEXT:    csel x2, x11, x8, lo
-; CHECK-CVT-NEXT:    cmp x12, x8
-; CHECK-CVT-NEXT:    csel x3, x12, x8, lo
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_unsigned_v4f16_v4i50:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-FP16-NEXT:    mov h1, v0.h[1]
-; CHECK-FP16-NEXT:    mov h2, v0.h[2]
-; CHECK-FP16-NEXT:    mov x8, #1125899906842623 // =0x3ffffffffffff
-; CHECK-FP16-NEXT:    mov h3, v0.h[3]
-; CHECK-FP16-NEXT:    fcvtzu x9, h0
-; CHECK-FP16-NEXT:    fcvtzu x10, h1
-; CHECK-FP16-NEXT:    fcvtzu x11, h2
-; CHECK-FP16-NEXT:    fcvtzu x12, h3
-; CHECK-FP16-NEXT:    cmp x9, x8
-; CHECK-FP16-NEXT:    csel x0, x9, x8, lo
-; CHECK-FP16-NEXT:    cmp x10, x8
-; CHECK-FP16-NEXT:    csel x1, x10, x8, lo
-; CHECK-FP16-NEXT:    cmp x11, x8
-; CHECK-FP16-NEXT:    csel x2, x11, x8, lo
-; CHECK-FP16-NEXT:    cmp x12, x8
-; CHECK-FP16-NEXT:    csel x3, x12, x8, lo
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_unsigned_v4f16_v4i50:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-CVT-NEXT:    mov h1, v0.h[1]
+; CHECK-SD-CVT-NEXT:    mov h2, v0.h[2]
+; CHECK-SD-CVT-NEXT:    mov x8, #1125899906842623 // =0x3ffffffffffff
+; CHECK-SD-CVT-NEXT:    mov h3, v0.h[3]
+; CHECK-SD-CVT-NEXT:    fcvt s0, h0
+; CHECK-SD-CVT-NEXT:    fcvt s1, h1
+; CHECK-SD-CVT-NEXT:    fcvt s2, h2
+; CHECK-SD-CVT-NEXT:    fcvt s3, h3
+; CHECK-SD-CVT-NEXT:    fcvtzu x9, s0
+; CHECK-SD-CVT-NEXT:    fcvtzu x10, s1
+; CHECK-SD-CVT-NEXT:    fcvtzu x11, s2
+; CHECK-SD-CVT-NEXT:    fcvtzu x12, s3
+; CHECK-SD-CVT-NEXT:    cmp x9, x8
+; CHECK-SD-CVT-NEXT:    csel x0, x9, x8, lo
+; CHECK-SD-CVT-NEXT:    cmp x10, x8
+; CHECK-SD-CVT-NEXT:    csel x1, x10, x8, lo
+; CHECK-SD-CVT-NEXT:    cmp x11, x8
+; CHECK-SD-CVT-NEXT:    csel x2, x11, x8, lo
+; CHECK-SD-CVT-NEXT:    cmp x12, x8
+; CHECK-SD-CVT-NEXT:    csel x3, x12, x8, lo
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_unsigned_v4f16_v4i50:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-FP16-NEXT:    mov h1, v0.h[1]
+; CHECK-SD-FP16-NEXT:    mov h2, v0.h[2]
+; CHECK-SD-FP16-NEXT:    mov x8, #1125899906842623 // =0x3ffffffffffff
+; CHECK-SD-FP16-NEXT:    mov h3, v0.h[3]
+; CHECK-SD-FP16-NEXT:    fcvtzu x9, h0
+; CHECK-SD-FP16-NEXT:    fcvtzu x10, h1
+; CHECK-SD-FP16-NEXT:    fcvtzu x11, h2
+; CHECK-SD-FP16-NEXT:    fcvtzu x12, h3
+; CHECK-SD-FP16-NEXT:    cmp x9, x8
+; CHECK-SD-FP16-NEXT:    csel x0, x9, x8, lo
+; CHECK-SD-FP16-NEXT:    cmp x10, x8
+; CHECK-SD-FP16-NEXT:    csel x1, x10, x8, lo
+; CHECK-SD-FP16-NEXT:    cmp x11, x8
+; CHECK-SD-FP16-NEXT:    csel x2, x11, x8, lo
+; CHECK-SD-FP16-NEXT:    cmp x12, x8
+; CHECK-SD-FP16-NEXT:    csel x3, x12, x8, lo
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_v4f16_v4i50:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-CVT-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-CVT-NEXT:    mov h2, v0.h[2]
+; CHECK-GI-CVT-NEXT:    mov x8, #1125899906842623 // =0x3ffffffffffff
+; CHECK-GI-CVT-NEXT:    mov h3, v0.h[3]
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    fcvt s1, h1
+; CHECK-GI-CVT-NEXT:    fcvt s2, h2
+; CHECK-GI-CVT-NEXT:    fcvt s3, h3
+; CHECK-GI-CVT-NEXT:    fcvtzu x9, s0
+; CHECK-GI-CVT-NEXT:    fcvtzu x10, s1
+; CHECK-GI-CVT-NEXT:    fcvtzu x11, s2
+; CHECK-GI-CVT-NEXT:    fcvtzu x12, s3
+; CHECK-GI-CVT-NEXT:    cmp x9, x8
+; CHECK-GI-CVT-NEXT:    csel x0, x9, x8, lo
+; CHECK-GI-CVT-NEXT:    cmp x10, x8
+; CHECK-GI-CVT-NEXT:    csel x1, x10, x8, lo
+; CHECK-GI-CVT-NEXT:    cmp x11, x8
+; CHECK-GI-CVT-NEXT:    csel x2, x11, x8, lo
+; CHECK-GI-CVT-NEXT:    cmp x12, x8
+; CHECK-GI-CVT-NEXT:    csel x3, x12, x8, lo
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_unsigned_v4f16_v4i50:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
+; CHECK-GI-FP16-NEXT:    mov x8, #1125899906842623 // =0x3ffffffffffff
+; CHECK-GI-FP16-NEXT:    mov h3, v0.h[3]
+; CHECK-GI-FP16-NEXT:    fcvtzu x9, h0
+; CHECK-GI-FP16-NEXT:    fcvtzu x10, h1
+; CHECK-GI-FP16-NEXT:    fcvtzu x11, h2
+; CHECK-GI-FP16-NEXT:    fcvtzu x12, h3
+; CHECK-GI-FP16-NEXT:    cmp x9, x8
+; CHECK-GI-FP16-NEXT:    csel x0, x9, x8, lo
+; CHECK-GI-FP16-NEXT:    cmp x10, x8
+; CHECK-GI-FP16-NEXT:    csel x1, x10, x8, lo
+; CHECK-GI-FP16-NEXT:    cmp x11, x8
+; CHECK-GI-FP16-NEXT:    csel x2, x11, x8, lo
+; CHECK-GI-FP16-NEXT:    cmp x12, x8
+; CHECK-GI-FP16-NEXT:    csel x3, x12, x8, lo
+; CHECK-GI-FP16-NEXT:    ret
     %x = call <4 x i50> @llvm.fptoui.sat.v4f16.v4i50(<4 x half> %f)
     ret <4 x i50> %x
 }
 
 define <4 x i64> @test_unsigned_v4f16_v4i64(<4 x half> %f) {
-; CHECK-CVT-LABEL: test_unsigned_v4f16_v4i64:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-CVT-NEXT:    mov h1, v0.h[2]
-; CHECK-CVT-NEXT:    mov h2, v0.h[1]
-; CHECK-CVT-NEXT:    mov h3, v0.h[3]
-; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    fcvt s1, h1
-; CHECK-CVT-NEXT:    fcvt s2, h2
-; CHECK-CVT-NEXT:    fcvt s3, h3
-; CHECK-CVT-NEXT:    fcvtzu x8, s0
-; CHECK-CVT-NEXT:    fcvtzu x9, s1
-; CHECK-CVT-NEXT:    fcvtzu x10, s2
-; CHECK-CVT-NEXT:    fcvtzu x11, s3
-; CHECK-CVT-NEXT:    fmov d0, x8
-; CHECK-CVT-NEXT:    fmov d1, x9
-; CHECK-CVT-NEXT:    mov v0.d[1], x10
-; CHECK-CVT-NEXT:    mov v1.d[1], x11
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_unsigned_v4f16_v4i64:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-FP16-NEXT:    mov h1, v0.h[2]
-; CHECK-FP16-NEXT:    mov h2, v0.h[1]
-; CHECK-FP16-NEXT:    mov h3, v0.h[3]
-; CHECK-FP16-NEXT:    fcvtzu x8, h0
-; CHECK-FP16-NEXT:    fcvtzu x9, h1
-; CHECK-FP16-NEXT:    fcvtzu x10, h2
-; CHECK-FP16-NEXT:    fcvtzu x11, h3
-; CHECK-FP16-NEXT:    fmov d0, x8
-; CHECK-FP16-NEXT:    fmov d1, x9
-; CHECK-FP16-NEXT:    mov v0.d[1], x10
-; CHECK-FP16-NEXT:    mov v1.d[1], x11
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_unsigned_v4f16_v4i64:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-CVT-NEXT:    mov h1, v0.h[2]
+; CHECK-SD-CVT-NEXT:    mov h2, v0.h[1]
+; CHECK-SD-CVT-NEXT:    mov h3, v0.h[3]
+; CHECK-SD-CVT-NEXT:    fcvt s0, h0
+; CHECK-SD-CVT-NEXT:    fcvt s1, h1
+; CHECK-SD-CVT-NEXT:    fcvt s2, h2
+; CHECK-SD-CVT-NEXT:    fcvt s3, h3
+; CHECK-SD-CVT-NEXT:    fcvtzu x8, s0
+; CHECK-SD-CVT-NEXT:    fcvtzu x9, s1
+; CHECK-SD-CVT-NEXT:    fcvtzu x10, s2
+; CHECK-SD-CVT-NEXT:    fcvtzu x11, s3
+; CHECK-SD-CVT-NEXT:    fmov d0, x8
+; CHECK-SD-CVT-NEXT:    fmov d1, x9
+; CHECK-SD-CVT-NEXT:    mov v0.d[1], x10
+; CHECK-SD-CVT-NEXT:    mov v1.d[1], x11
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_unsigned_v4f16_v4i64:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-FP16-NEXT:    mov h1, v0.h[2]
+; CHECK-SD-FP16-NEXT:    mov h2, v0.h[1]
+; CHECK-SD-FP16-NEXT:    mov h3, v0.h[3]
+; CHECK-SD-FP16-NEXT:    fcvtzu x8, h0
+; CHECK-SD-FP16-NEXT:    fcvtzu x9, h1
+; CHECK-SD-FP16-NEXT:    fcvtzu x10, h2
+; CHECK-SD-FP16-NEXT:    fcvtzu x11, h3
+; CHECK-SD-FP16-NEXT:    fmov d0, x8
+; CHECK-SD-FP16-NEXT:    fmov d1, x9
+; CHECK-SD-FP16-NEXT:    mov v0.d[1], x10
+; CHECK-SD-FP16-NEXT:    mov v1.d[1], x11
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_v4f16_v4i64:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-GI-CVT-NEXT:    fcvtl v1.2d, v0.2s
+; CHECK-GI-CVT-NEXT:    fcvtl2 v2.2d, v0.4s
+; CHECK-GI-CVT-NEXT:    fcvtzu v0.2d, v1.2d
+; CHECK-GI-CVT-NEXT:    fcvtzu v1.2d, v2.2d
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_unsigned_v4f16_v4i64:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-FP16-NEXT:    mov s1, v0.s[1]
+; CHECK-GI-FP16-NEXT:    mov h2, v0.h[1]
+; CHECK-GI-FP16-NEXT:    fcvt d0, h0
+; CHECK-GI-FP16-NEXT:    mov h3, v1.h[1]
+; CHECK-GI-FP16-NEXT:    fcvt d2, h2
+; CHECK-GI-FP16-NEXT:    fcvt d1, h1
+; CHECK-GI-FP16-NEXT:    fcvt d3, h3
+; CHECK-GI-FP16-NEXT:    mov v0.d[1], v2.d[0]
+; CHECK-GI-FP16-NEXT:    mov v1.d[1], v3.d[0]
+; CHECK-GI-FP16-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-GI-FP16-NEXT:    fcvtzu v1.2d, v1.2d
+; CHECK-GI-FP16-NEXT:    ret
     %x = call <4 x i64> @llvm.fptoui.sat.v4f16.v4i64(<4 x half> %f)
     ret <4 x i64> %x
 }
 
 define <4 x i100> @test_unsigned_v4f16_v4i100(<4 x half> %f) {
-; CHECK-LABEL: test_unsigned_v4f16_v4i100:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #96
-; CHECK-NEXT:    stp d9, d8, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x30, x25, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x24, x23, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x22, x21, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x20, x19, [sp, #80] // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 96
-; CHECK-NEXT:    .cfi_offset w19, -8
-; CHECK-NEXT:    .cfi_offset w20, -16
-; CHECK-NEXT:    .cfi_offset w21, -24
-; CHECK-NEXT:    .cfi_offset w22, -32
-; CHECK-NEXT:    .cfi_offset w23, -40
-; CHECK-NEXT:    .cfi_offset w24, -48
-; CHECK-NEXT:    .cfi_offset w25, -56
-; CHECK-NEXT:    .cfi_offset w30, -64
-; CHECK-NEXT:    .cfi_offset b8, -72
-; CHECK-NEXT:    .cfi_offset b9, -80
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    mov h1, v0.h[1]
-; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    fcvt s8, h1
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    mov w8, #1904214015 // =0x717fffff
-; CHECK-NEXT:    fcmp s8, #0.0
-; CHECK-NEXT:    fmov s9, w8
-; CHECK-NEXT:    mov x25, #68719476735 // =0xfffffffff
-; CHECK-NEXT:    mov h0, v0.h[2]
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    csel x8, xzr, x1, lt
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    csel x19, x25, x8, gt
-; CHECK-NEXT:    csinv x20, x9, xzr, le
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    fcmp s8, #0.0
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, xzr, x1, lt
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    csel x21, x25, x9, gt
-; CHECK-NEXT:    csinv x22, x8, xzr, le
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    fcmp s8, #0.0
-; CHECK-NEXT:    mov h0, v0.h[3]
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, xzr, x1, lt
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    csel x23, x25, x9, gt
-; CHECK-NEXT:    csinv x24, x8, xzr, le
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    fcmp s8, #0.0
-; CHECK-NEXT:    mov x2, x20
-; CHECK-NEXT:    mov x3, x19
-; CHECK-NEXT:    mov x4, x22
-; CHECK-NEXT:    mov x5, x21
-; CHECK-NEXT:    ldp x20, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, xzr, x1, lt
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    mov x0, x24
-; CHECK-NEXT:    mov x1, x23
-; CHECK-NEXT:    ldp x22, x21, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x7, x25, x9, gt
-; CHECK-NEXT:    ldp x24, x23, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp x30, x25, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT:    csinv x6, x8, xzr, le
-; CHECK-NEXT:    ldp d9, d8, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    add sp, sp, #96
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_v4f16_v4i100:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sub sp, sp, #96
+; CHECK-SD-NEXT:    stp d9, d8, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x30, x25, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x24, x23, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x22, x21, [sp, #64] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x20, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 96
+; CHECK-SD-NEXT:    .cfi_offset w19, -8
+; CHECK-SD-NEXT:    .cfi_offset w20, -16
+; CHECK-SD-NEXT:    .cfi_offset w21, -24
+; CHECK-SD-NEXT:    .cfi_offset w22, -32
+; CHECK-SD-NEXT:    .cfi_offset w23, -40
+; CHECK-SD-NEXT:    .cfi_offset w24, -48
+; CHECK-SD-NEXT:    .cfi_offset w25, -56
+; CHECK-SD-NEXT:    .cfi_offset w30, -64
+; CHECK-SD-NEXT:    .cfi_offset b8, -72
+; CHECK-SD-NEXT:    .cfi_offset b9, -80
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    mov h1, v0.h[1]
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    fcvt s8, h1
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixunssfti
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov w8, #1904214015 // =0x717fffff
+; CHECK-SD-NEXT:    fcmp s8, #0.0
+; CHECK-SD-NEXT:    fmov s9, w8
+; CHECK-SD-NEXT:    mov x25, #68719476735 // =0xfffffffff
+; CHECK-SD-NEXT:    mov h0, v0.h[2]
+; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x8, xzr, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    csel x19, x25, x8, gt
+; CHECK-SD-NEXT:    csinv x20, x9, xzr, le
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixunssfti
+; CHECK-SD-NEXT:    fcmp s8, #0.0
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    csel x21, x25, x9, gt
+; CHECK-SD-NEXT:    csinv x22, x8, xzr, le
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixunssfti
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    fcmp s8, #0.0
+; CHECK-SD-NEXT:    mov h0, v0.h[3]
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    csel x23, x25, x9, gt
+; CHECK-SD-NEXT:    csinv x24, x8, xzr, le
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixunssfti
+; CHECK-SD-NEXT:    fcmp s8, #0.0
+; CHECK-SD-NEXT:    mov x2, x20
+; CHECK-SD-NEXT:    mov x3, x19
+; CHECK-SD-NEXT:    mov x4, x22
+; CHECK-SD-NEXT:    mov x5, x21
+; CHECK-SD-NEXT:    ldp x20, x19, [sp, #80] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    mov x0, x24
+; CHECK-SD-NEXT:    mov x1, x23
+; CHECK-SD-NEXT:    ldp x22, x21, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csel x7, x25, x9, gt
+; CHECK-SD-NEXT:    ldp x24, x23, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x30, x25, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csinv x6, x8, xzr, le
+; CHECK-SD-NEXT:    ldp d9, d8, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    add sp, sp, #96
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_v4f16_v4i100:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-CVT-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-CVT-NEXT:    mov h2, v0.h[2]
+; CHECK-GI-CVT-NEXT:    mov x1, xzr
+; CHECK-GI-CVT-NEXT:    mov h3, v0.h[3]
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    mov x3, xzr
+; CHECK-GI-CVT-NEXT:    mov x5, xzr
+; CHECK-GI-CVT-NEXT:    mov x7, xzr
+; CHECK-GI-CVT-NEXT:    fcvt s1, h1
+; CHECK-GI-CVT-NEXT:    fcvt s2, h2
+; CHECK-GI-CVT-NEXT:    fcvt s3, h3
+; CHECK-GI-CVT-NEXT:    fcvtzu x0, s0
+; CHECK-GI-CVT-NEXT:    fcvtzu x2, s1
+; CHECK-GI-CVT-NEXT:    fcvtzu x4, s2
+; CHECK-GI-CVT-NEXT:    fcvtzu x6, s3
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_unsigned_v4f16_v4i100:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
+; CHECK-GI-FP16-NEXT:    mov x1, xzr
+; CHECK-GI-FP16-NEXT:    mov h3, v0.h[3]
+; CHECK-GI-FP16-NEXT:    fcvtzu x0, h0
+; CHECK-GI-FP16-NEXT:    mov x3, xzr
+; CHECK-GI-FP16-NEXT:    mov x5, xzr
+; CHECK-GI-FP16-NEXT:    mov x7, xzr
+; CHECK-GI-FP16-NEXT:    fcvtzu x2, h1
+; CHECK-GI-FP16-NEXT:    fcvtzu x4, h2
+; CHECK-GI-FP16-NEXT:    fcvtzu x6, h3
+; CHECK-GI-FP16-NEXT:    ret
     %x = call <4 x i100> @llvm.fptoui.sat.v4f16.v4i100(<4 x half> %f)
     ret <4 x i100> %x
 }
 
 define <4 x i128> @test_unsigned_v4f16_v4i128(<4 x half> %f) {
-; CHECK-LABEL: test_unsigned_v4f16_v4i128:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #96
-; CHECK-NEXT:    stp d9, d8, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
-; CHECK-NEXT:    stp x24, x23, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x22, x21, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x20, x19, [sp, #80] // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 96
-; CHECK-NEXT:    .cfi_offset w19, -8
-; CHECK-NEXT:    .cfi_offset w20, -16
-; CHECK-NEXT:    .cfi_offset w21, -24
-; CHECK-NEXT:    .cfi_offset w22, -32
-; CHECK-NEXT:    .cfi_offset w23, -40
-; CHECK-NEXT:    .cfi_offset w24, -48
-; CHECK-NEXT:    .cfi_offset w30, -64
-; CHECK-NEXT:    .cfi_offset b8, -72
-; CHECK-NEXT:    .cfi_offset b9, -80
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    mov w8, #2139095039 // =0x7f7fffff
-; CHECK-NEXT:    fcmp s8, #0.0
-; CHECK-NEXT:    fmov s9, w8
-; CHECK-NEXT:    mov h0, v0.h[1]
-; CHECK-NEXT:    csel x9, xzr, x1, lt
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    csinv x19, x8, xzr, le
-; CHECK-NEXT:    csinv x20, x9, xzr, le
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    fcmp s8, #0.0
-; CHECK-NEXT:    mov h0, v0.h[2]
-; CHECK-NEXT:    csel x8, xzr, x1, lt
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    csinv x21, x9, xzr, le
-; CHECK-NEXT:    csinv x22, x8, xzr, le
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    fcmp s8, #0.0
-; CHECK-NEXT:    mov h0, v0.h[3]
-; CHECK-NEXT:    csel x8, xzr, x1, lt
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    csinv x23, x9, xzr, le
-; CHECK-NEXT:    csinv x24, x8, xzr, le
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    fcmp s8, #0.0
-; CHECK-NEXT:    mov x2, x21
-; CHECK-NEXT:    mov x3, x22
-; CHECK-NEXT:    mov x4, x23
-; CHECK-NEXT:    mov x5, x24
-; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
-; CHECK-NEXT:    ldp x22, x21, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x8, xzr, x1, lt
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    mov x0, x19
-; CHECK-NEXT:    mov x1, x20
-; CHECK-NEXT:    ldp x20, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp x24, x23, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    csinv x6, x9, xzr, le
-; CHECK-NEXT:    ldp d9, d8, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    csinv x7, x8, xzr, le
-; CHECK-NEXT:    add sp, sp, #96
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_v4f16_v4i128:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sub sp, sp, #96
+; CHECK-SD-NEXT:    stp d9, d8, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    stp x24, x23, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x22, x21, [sp, #64] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x20, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 96
+; CHECK-SD-NEXT:    .cfi_offset w19, -8
+; CHECK-SD-NEXT:    .cfi_offset w20, -16
+; CHECK-SD-NEXT:    .cfi_offset w21, -24
+; CHECK-SD-NEXT:    .cfi_offset w22, -32
+; CHECK-SD-NEXT:    .cfi_offset w23, -40
+; CHECK-SD-NEXT:    .cfi_offset w24, -48
+; CHECK-SD-NEXT:    .cfi_offset w30, -64
+; CHECK-SD-NEXT:    .cfi_offset b8, -72
+; CHECK-SD-NEXT:    .cfi_offset b9, -80
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixunssfti
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov w8, #2139095039 // =0x7f7fffff
+; CHECK-SD-NEXT:    fcmp s8, #0.0
+; CHECK-SD-NEXT:    fmov s9, w8
+; CHECK-SD-NEXT:    mov h0, v0.h[1]
+; CHECK-SD-NEXT:    csel x9, xzr, x1, lt
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    csinv x19, x8, xzr, le
+; CHECK-SD-NEXT:    csinv x20, x9, xzr, le
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixunssfti
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    fcmp s8, #0.0
+; CHECK-SD-NEXT:    mov h0, v0.h[2]
+; CHECK-SD-NEXT:    csel x8, xzr, x1, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    csinv x21, x9, xzr, le
+; CHECK-SD-NEXT:    csinv x22, x8, xzr, le
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixunssfti
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    fcmp s8, #0.0
+; CHECK-SD-NEXT:    mov h0, v0.h[3]
+; CHECK-SD-NEXT:    csel x8, xzr, x1, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    csinv x23, x9, xzr, le
+; CHECK-SD-NEXT:    csinv x24, x8, xzr, le
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixunssfti
+; CHECK-SD-NEXT:    fcmp s8, #0.0
+; CHECK-SD-NEXT:    mov x2, x21
+; CHECK-SD-NEXT:    mov x3, x22
+; CHECK-SD-NEXT:    mov x4, x23
+; CHECK-SD-NEXT:    mov x5, x24
+; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x22, x21, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csel x8, xzr, x1, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    mov x0, x19
+; CHECK-SD-NEXT:    mov x1, x20
+; CHECK-SD-NEXT:    ldp x20, x19, [sp, #80] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x24, x23, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csinv x6, x9, xzr, le
+; CHECK-SD-NEXT:    ldp d9, d8, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csinv x7, x8, xzr, le
+; CHECK-SD-NEXT:    add sp, sp, #96
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_v4f16_v4i128:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-CVT-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-CVT-NEXT:    mov h2, v0.h[2]
+; CHECK-GI-CVT-NEXT:    mov x1, xzr
+; CHECK-GI-CVT-NEXT:    mov h3, v0.h[3]
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    mov x3, xzr
+; CHECK-GI-CVT-NEXT:    mov x5, xzr
+; CHECK-GI-CVT-NEXT:    mov x7, xzr
+; CHECK-GI-CVT-NEXT:    fcvt s1, h1
+; CHECK-GI-CVT-NEXT:    fcvt s2, h2
+; CHECK-GI-CVT-NEXT:    fcvt s3, h3
+; CHECK-GI-CVT-NEXT:    fcvtzu x0, s0
+; CHECK-GI-CVT-NEXT:    fcvtzu x2, s1
+; CHECK-GI-CVT-NEXT:    fcvtzu x4, s2
+; CHECK-GI-CVT-NEXT:    fcvtzu x6, s3
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_unsigned_v4f16_v4i128:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
+; CHECK-GI-FP16-NEXT:    mov x1, xzr
+; CHECK-GI-FP16-NEXT:    mov h3, v0.h[3]
+; CHECK-GI-FP16-NEXT:    fcvtzu x0, h0
+; CHECK-GI-FP16-NEXT:    mov x3, xzr
+; CHECK-GI-FP16-NEXT:    mov x5, xzr
+; CHECK-GI-FP16-NEXT:    mov x7, xzr
+; CHECK-GI-FP16-NEXT:    fcvtzu x2, h1
+; CHECK-GI-FP16-NEXT:    fcvtzu x4, h2
+; CHECK-GI-FP16-NEXT:    fcvtzu x6, h3
+; CHECK-GI-FP16-NEXT:    ret
     %x = call <4 x i128> @llvm.fptoui.sat.v4f16.v4i128(<4 x half> %f)
     ret <4 x i128> %x
 }
@@ -1706,91 +2980,169 @@ declare <8 x i100> @llvm.fptoui.sat.v8f16.v8i100(<8 x half>)
 declare <8 x i128> @llvm.fptoui.sat.v8f16.v8i128(<8 x half>)
 
 define <8 x i1> @test_unsigned_v8f16_v8i1(<8 x half> %f) {
-; CHECK-CVT-LABEL: test_unsigned_v8f16_v8i1:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvtl2 v2.4s, v0.8h
-; CHECK-CVT-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT:    movi v1.4s, #1
-; CHECK-CVT-NEXT:    fcvtzu v2.4s, v2.4s
-; CHECK-CVT-NEXT:    fcvtzu v0.4s, v0.4s
-; CHECK-CVT-NEXT:    umin v2.4s, v2.4s, v1.4s
-; CHECK-CVT-NEXT:    umin v0.4s, v0.4s, v1.4s
-; CHECK-CVT-NEXT:    uzp1 v0.8h, v0.8h, v2.8h
-; CHECK-CVT-NEXT:    xtn v0.8b, v0.8h
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i1:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    movi v1.8h, #1
-; CHECK-FP16-NEXT:    fcvtzu v0.8h, v0.8h
-; CHECK-FP16-NEXT:    umin v0.8h, v0.8h, v1.8h
-; CHECK-FP16-NEXT:    xtn v0.8b, v0.8h
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_unsigned_v8f16_v8i1:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvtl2 v2.4s, v0.8h
+; CHECK-SD-CVT-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-SD-CVT-NEXT:    movi v1.4s, #1
+; CHECK-SD-CVT-NEXT:    fcvtzu v2.4s, v2.4s
+; CHECK-SD-CVT-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-SD-CVT-NEXT:    umin v2.4s, v2.4s, v1.4s
+; CHECK-SD-CVT-NEXT:    umin v0.4s, v0.4s, v1.4s
+; CHECK-SD-CVT-NEXT:    uzp1 v0.8h, v0.8h, v2.8h
+; CHECK-SD-CVT-NEXT:    xtn v0.8b, v0.8h
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_unsigned_v8f16_v8i1:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    movi v1.8h, #1
+; CHECK-SD-FP16-NEXT:    fcvtzu v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT:    umin v0.8h, v0.8h, v1.8h
+; CHECK-SD-FP16-NEXT:    xtn v0.8b, v0.8h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_v8f16_v8i1:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvtl v2.4s, v0.4h
+; CHECK-GI-CVT-NEXT:    fcvtl2 v0.4s, v0.8h
+; CHECK-GI-CVT-NEXT:    movi v1.4s, #1
+; CHECK-GI-CVT-NEXT:    fcvtzu v2.4s, v2.4s
+; CHECK-GI-CVT-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-GI-CVT-NEXT:    umin v2.4s, v2.4s, v1.4s
+; CHECK-GI-CVT-NEXT:    umin v0.4s, v0.4s, v1.4s
+; CHECK-GI-CVT-NEXT:    uzp1 v0.8h, v2.8h, v0.8h
+; CHECK-GI-CVT-NEXT:    xtn v0.8b, v0.8h
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_unsigned_v8f16_v8i1:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    movi v1.8h, #1
+; CHECK-GI-FP16-NEXT:    fcvtzu v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT:    umin v0.8h, v0.8h, v1.8h
+; CHECK-GI-FP16-NEXT:    xtn v0.8b, v0.8h
+; CHECK-GI-FP16-NEXT:    ret
     %x = call <8 x i1> @llvm.fptoui.sat.v8f16.v8i1(<8 x half> %f)
     ret <8 x i1> %x
 }
 
 define <8 x i8> @test_unsigned_v8f16_v8i8(<8 x half> %f) {
-; CHECK-CVT-LABEL: test_unsigned_v8f16_v8i8:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvtl2 v2.4s, v0.8h
-; CHECK-CVT-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT:    movi v1.2d, #0x0000ff000000ff
-; CHECK-CVT-NEXT:    fcvtzu v2.4s, v2.4s
-; CHECK-CVT-NEXT:    fcvtzu v0.4s, v0.4s
-; CHECK-CVT-NEXT:    umin v2.4s, v2.4s, v1.4s
-; CHECK-CVT-NEXT:    umin v0.4s, v0.4s, v1.4s
-; CHECK-CVT-NEXT:    uzp1 v0.8h, v0.8h, v2.8h
-; CHECK-CVT-NEXT:    xtn v0.8b, v0.8h
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i8:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzu v0.8h, v0.8h
-; CHECK-FP16-NEXT:    uqxtn v0.8b, v0.8h
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_unsigned_v8f16_v8i8:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvtl2 v2.4s, v0.8h
+; CHECK-SD-CVT-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-SD-CVT-NEXT:    movi v1.2d, #0x0000ff000000ff
+; CHECK-SD-CVT-NEXT:    fcvtzu v2.4s, v2.4s
+; CHECK-SD-CVT-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-SD-CVT-NEXT:    umin v2.4s, v2.4s, v1.4s
+; CHECK-SD-CVT-NEXT:    umin v0.4s, v0.4s, v1.4s
+; CHECK-SD-CVT-NEXT:    uzp1 v0.8h, v0.8h, v2.8h
+; CHECK-SD-CVT-NEXT:    xtn v0.8b, v0.8h
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_unsigned_v8f16_v8i8:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzu v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT:    uqxtn v0.8b, v0.8h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_v8f16_v8i8:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvtl v2.4s, v0.4h
+; CHECK-GI-CVT-NEXT:    fcvtl2 v0.4s, v0.8h
+; CHECK-GI-CVT-NEXT:    movi v1.2d, #0x0000ff000000ff
+; CHECK-GI-CVT-NEXT:    fcvtzu v2.4s, v2.4s
+; CHECK-GI-CVT-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-GI-CVT-NEXT:    umin v2.4s, v2.4s, v1.4s
+; CHECK-GI-CVT-NEXT:    umin v0.4s, v0.4s, v1.4s
+; CHECK-GI-CVT-NEXT:    uzp1 v0.8h, v2.8h, v0.8h
+; CHECK-GI-CVT-NEXT:    xtn v0.8b, v0.8h
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_unsigned_v8f16_v8i8:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    movi v1.2d, #0xff00ff00ff00ff
+; CHECK-GI-FP16-NEXT:    fcvtzu v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT:    umin v0.8h, v0.8h, v1.8h
+; CHECK-GI-FP16-NEXT:    xtn v0.8b, v0.8h
+; CHECK-GI-FP16-NEXT:    ret
     %x = call <8 x i8> @llvm.fptoui.sat.v8f16.v8i8(<8 x half> %f)
     ret <8 x i8> %x
 }
 
 define <8 x i13> @test_unsigned_v8f16_v8i13(<8 x half> %f) {
-; CHECK-CVT-LABEL: test_unsigned_v8f16_v8i13:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvtl2 v2.4s, v0.8h
-; CHECK-CVT-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT:    movi v1.4s, #31, msl #8
-; CHECK-CVT-NEXT:    fcvtzu v2.4s, v2.4s
-; CHECK-CVT-NEXT:    fcvtzu v0.4s, v0.4s
-; CHECK-CVT-NEXT:    umin v2.4s, v2.4s, v1.4s
-; CHECK-CVT-NEXT:    umin v0.4s, v0.4s, v1.4s
-; CHECK-CVT-NEXT:    uzp1 v0.8h, v0.8h, v2.8h
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i13:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzu v0.8h, v0.8h
-; CHECK-FP16-NEXT:    mvni v1.8h, #224, lsl #8
-; CHECK-FP16-NEXT:    umin v0.8h, v0.8h, v1.8h
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_unsigned_v8f16_v8i13:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvtl2 v2.4s, v0.8h
+; CHECK-SD-CVT-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-SD-CVT-NEXT:    movi v1.4s, #31, msl #8
+; CHECK-SD-CVT-NEXT:    fcvtzu v2.4s, v2.4s
+; CHECK-SD-CVT-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-SD-CVT-NEXT:    umin v2.4s, v2.4s, v1.4s
+; CHECK-SD-CVT-NEXT:    umin v0.4s, v0.4s, v1.4s
+; CHECK-SD-CVT-NEXT:    uzp1 v0.8h, v0.8h, v2.8h
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_unsigned_v8f16_v8i13:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzu v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT:    mvni v1.8h, #224, lsl #8
+; CHECK-SD-FP16-NEXT:    umin v0.8h, v0.8h, v1.8h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_v8f16_v8i13:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvtl v2.4s, v0.4h
+; CHECK-GI-CVT-NEXT:    fcvtl2 v0.4s, v0.8h
+; CHECK-GI-CVT-NEXT:    movi v1.4s, #31, msl #8
+; CHECK-GI-CVT-NEXT:    fcvtzu v2.4s, v2.4s
+; CHECK-GI-CVT-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-GI-CVT-NEXT:    umin v2.4s, v2.4s, v1.4s
+; CHECK-GI-CVT-NEXT:    umin v0.4s, v0.4s, v1.4s
+; CHECK-GI-CVT-NEXT:    uzp1 v0.8h, v2.8h, v0.8h
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_unsigned_v8f16_v8i13:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzu v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT:    mvni v1.8h, #224, lsl #8
+; CHECK-GI-FP16-NEXT:    umin v0.8h, v0.8h, v1.8h
+; CHECK-GI-FP16-NEXT:    ret
     %x = call <8 x i13> @llvm.fptoui.sat.v8f16.v8i13(<8 x half> %f)
     ret <8 x i13> %x
 }
 
 define <8 x i16> @test_unsigned_v8f16_v8i16(<8 x half> %f) {
-; CHECK-CVT-LABEL: test_unsigned_v8f16_v8i16:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvtl v1.4s, v0.4h
-; CHECK-CVT-NEXT:    fcvtl2 v2.4s, v0.8h
-; CHECK-CVT-NEXT:    fcvtzu v1.4s, v1.4s
-; CHECK-CVT-NEXT:    uqxtn v0.4h, v1.4s
-; CHECK-CVT-NEXT:    fcvtzu v1.4s, v2.4s
-; CHECK-CVT-NEXT:    uqxtn2 v0.8h, v1.4s
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i16:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzu v0.8h, v0.8h
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_unsigned_v8f16_v8i16:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvtl v1.4s, v0.4h
+; CHECK-SD-CVT-NEXT:    fcvtl2 v2.4s, v0.8h
+; CHECK-SD-CVT-NEXT:    fcvtzu v1.4s, v1.4s
+; CHECK-SD-CVT-NEXT:    uqxtn v0.4h, v1.4s
+; CHECK-SD-CVT-NEXT:    fcvtzu v1.4s, v2.4s
+; CHECK-SD-CVT-NEXT:    uqxtn2 v0.8h, v1.4s
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_unsigned_v8f16_v8i16:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzu v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_v8f16_v8i16:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvtl v2.4s, v0.4h
+; CHECK-GI-CVT-NEXT:    fcvtl2 v0.4s, v0.8h
+; CHECK-GI-CVT-NEXT:    movi v1.2d, #0x00ffff0000ffff
+; CHECK-GI-CVT-NEXT:    fcvtzu v2.4s, v2.4s
+; CHECK-GI-CVT-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-GI-CVT-NEXT:    umin v2.4s, v2.4s, v1.4s
+; CHECK-GI-CVT-NEXT:    umin v0.4s, v0.4s, v1.4s
+; CHECK-GI-CVT-NEXT:    uzp1 v0.8h, v2.8h, v0.8h
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_unsigned_v8f16_v8i16:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzu v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT:    ret
     %x = call <8 x i16> @llvm.fptoui.sat.v8f16.v8i16(<8 x half> %f)
     ret <8 x i16> %x
 }
@@ -1819,327 +3171,562 @@ define <8 x i19> @test_unsigned_v8f16_v8i19(<8 x half> %f) {
 }
 
 define <8 x i32> @test_unsigned_v8f16_v8i32_duplicate(<8 x half> %f) {
-; CHECK-LABEL: test_unsigned_v8f16_v8i32_duplicate:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtl2 v1.4s, v0.8h
-; CHECK-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-NEXT:    fcvtzu v1.4s, v1.4s
-; CHECK-NEXT:    fcvtzu v0.4s, v0.4s
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_v8f16_v8i32_duplicate:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtl2 v1.4s, v0.8h
+; CHECK-SD-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-SD-NEXT:    fcvtzu v1.4s, v1.4s
+; CHECK-SD-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_v8f16_v8i32_duplicate:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcvtl v1.4s, v0.4h
+; CHECK-GI-NEXT:    fcvtl2 v2.4s, v0.8h
+; CHECK-GI-NEXT:    fcvtzu v0.4s, v1.4s
+; CHECK-GI-NEXT:    fcvtzu v1.4s, v2.4s
+; CHECK-GI-NEXT:    ret
     %x = call <8 x i32> @llvm.fptoui.sat.v8f16.v8i32(<8 x half> %f)
     ret <8 x i32> %x
 }
 
 define <8 x i50> @test_unsigned_v8f16_v8i50(<8 x half> %f) {
-; CHECK-CVT-LABEL: test_unsigned_v8f16_v8i50:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-CVT-NEXT:    mov h5, v0.h[1]
-; CHECK-CVT-NEXT:    mov x8, #1125899906842623 // =0x3ffffffffffff
-; CHECK-CVT-NEXT:    mov h6, v0.h[2]
-; CHECK-CVT-NEXT:    mov h7, v0.h[3]
-; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    mov h2, v1.h[1]
-; CHECK-CVT-NEXT:    mov h3, v1.h[2]
-; CHECK-CVT-NEXT:    mov h4, v1.h[3]
-; CHECK-CVT-NEXT:    fcvt s1, h1
-; CHECK-CVT-NEXT:    fcvtzu x13, s0
-; CHECK-CVT-NEXT:    fcvt s2, h2
-; CHECK-CVT-NEXT:    fcvt s3, h3
-; CHECK-CVT-NEXT:    fcvt s4, h4
-; CHECK-CVT-NEXT:    fcvtzu x9, s1
-; CHECK-CVT-NEXT:    fcvt s1, h5
-; CHECK-CVT-NEXT:    fcvtzu x10, s2
-; CHECK-CVT-NEXT:    fcvtzu x11, s3
-; CHECK-CVT-NEXT:    fcvt s2, h6
-; CHECK-CVT-NEXT:    fcvtzu x12, s4
-; CHECK-CVT-NEXT:    fcvt s3, h7
-; CHECK-CVT-NEXT:    cmp x9, x8
-; CHECK-CVT-NEXT:    fcvtzu x14, s1
-; CHECK-CVT-NEXT:    csel x4, x9, x8, lo
-; CHECK-CVT-NEXT:    cmp x10, x8
-; CHECK-CVT-NEXT:    fcvtzu x9, s2
-; CHECK-CVT-NEXT:    csel x5, x10, x8, lo
-; CHECK-CVT-NEXT:    cmp x11, x8
-; CHECK-CVT-NEXT:    fcvtzu x10, s3
-; CHECK-CVT-NEXT:    csel x6, x11, x8, lo
-; CHECK-CVT-NEXT:    cmp x12, x8
-; CHECK-CVT-NEXT:    csel x7, x12, x8, lo
-; CHECK-CVT-NEXT:    cmp x13, x8
-; CHECK-CVT-NEXT:    csel x0, x13, x8, lo
-; CHECK-CVT-NEXT:    cmp x14, x8
-; CHECK-CVT-NEXT:    csel x1, x14, x8, lo
-; CHECK-CVT-NEXT:    cmp x9, x8
-; CHECK-CVT-NEXT:    csel x2, x9, x8, lo
-; CHECK-CVT-NEXT:    cmp x10, x8
-; CHECK-CVT-NEXT:    csel x3, x10, x8, lo
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i50:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-FP16-NEXT:    mov x8, #1125899906842623 // =0x3ffffffffffff
-; CHECK-FP16-NEXT:    fcvtzu x13, h0
-; CHECK-FP16-NEXT:    mov h2, v1.h[1]
-; CHECK-FP16-NEXT:    mov h3, v1.h[2]
-; CHECK-FP16-NEXT:    mov h4, v1.h[3]
-; CHECK-FP16-NEXT:    fcvtzu x9, h1
-; CHECK-FP16-NEXT:    mov h1, v0.h[1]
-; CHECK-FP16-NEXT:    fcvtzu x10, h2
-; CHECK-FP16-NEXT:    fcvtzu x11, h3
-; CHECK-FP16-NEXT:    mov h2, v0.h[2]
-; CHECK-FP16-NEXT:    fcvtzu x12, h4
-; CHECK-FP16-NEXT:    mov h3, v0.h[3]
-; CHECK-FP16-NEXT:    cmp x9, x8
-; CHECK-FP16-NEXT:    fcvtzu x14, h1
-; CHECK-FP16-NEXT:    csel x4, x9, x8, lo
-; CHECK-FP16-NEXT:    cmp x10, x8
-; CHECK-FP16-NEXT:    fcvtzu x9, h2
-; CHECK-FP16-NEXT:    csel x5, x10, x8, lo
-; CHECK-FP16-NEXT:    cmp x11, x8
-; CHECK-FP16-NEXT:    fcvtzu x10, h3
-; CHECK-FP16-NEXT:    csel x6, x11, x8, lo
-; CHECK-FP16-NEXT:    cmp x12, x8
-; CHECK-FP16-NEXT:    csel x7, x12, x8, lo
-; CHECK-FP16-NEXT:    cmp x13, x8
-; CHECK-FP16-NEXT:    csel x0, x13, x8, lo
-; CHECK-FP16-NEXT:    cmp x14, x8
-; CHECK-FP16-NEXT:    csel x1, x14, x8, lo
-; CHECK-FP16-NEXT:    cmp x9, x8
-; CHECK-FP16-NEXT:    csel x2, x9, x8, lo
-; CHECK-FP16-NEXT:    cmp x10, x8
-; CHECK-FP16-NEXT:    csel x3, x10, x8, lo
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_unsigned_v8f16_v8i50:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-CVT-NEXT:    mov h5, v0.h[1]
+; CHECK-SD-CVT-NEXT:    mov x8, #1125899906842623 // =0x3ffffffffffff
+; CHECK-SD-CVT-NEXT:    mov h6, v0.h[2]
+; CHECK-SD-CVT-NEXT:    mov h7, v0.h[3]
+; CHECK-SD-CVT-NEXT:    fcvt s0, h0
+; CHECK-SD-CVT-NEXT:    mov h2, v1.h[1]
+; CHECK-SD-CVT-NEXT:    mov h3, v1.h[2]
+; CHECK-SD-CVT-NEXT:    mov h4, v1.h[3]
+; CHECK-SD-CVT-NEXT:    fcvt s1, h1
+; CHECK-SD-CVT-NEXT:    fcvtzu x13, s0
+; CHECK-SD-CVT-NEXT:    fcvt s2, h2
+; CHECK-SD-CVT-NEXT:    fcvt s3, h3
+; CHECK-SD-CVT-NEXT:    fcvt s4, h4
+; CHECK-SD-CVT-NEXT:    fcvtzu x9, s1
+; CHECK-SD-CVT-NEXT:    fcvt s1, h5
+; CHECK-SD-CVT-NEXT:    fcvtzu x10, s2
+; CHECK-SD-CVT-NEXT:    fcvtzu x11, s3
+; CHECK-SD-CVT-NEXT:    fcvt s2, h6
+; CHECK-SD-CVT-NEXT:    fcvtzu x12, s4
+; CHECK-SD-CVT-NEXT:    fcvt s3, h7
+; CHECK-SD-CVT-NEXT:    cmp x9, x8
+; CHECK-SD-CVT-NEXT:    fcvtzu x14, s1
+; CHECK-SD-CVT-NEXT:    csel x4, x9, x8, lo
+; CHECK-SD-CVT-NEXT:    cmp x10, x8
+; CHECK-SD-CVT-NEXT:    fcvtzu x9, s2
+; CHECK-SD-CVT-NEXT:    csel x5, x10, x8, lo
+; CHECK-SD-CVT-NEXT:    cmp x11, x8
+; CHECK-SD-CVT-NEXT:    fcvtzu x10, s3
+; CHECK-SD-CVT-NEXT:    csel x6, x11, x8, lo
+; CHECK-SD-CVT-NEXT:    cmp x12, x8
+; CHECK-SD-CVT-NEXT:    csel x7, x12, x8, lo
+; CHECK-SD-CVT-NEXT:    cmp x13, x8
+; CHECK-SD-CVT-NEXT:    csel x0, x13, x8, lo
+; CHECK-SD-CVT-NEXT:    cmp x14, x8
+; CHECK-SD-CVT-NEXT:    csel x1, x14, x8, lo
+; CHECK-SD-CVT-NEXT:    cmp x9, x8
+; CHECK-SD-CVT-NEXT:    csel x2, x9, x8, lo
+; CHECK-SD-CVT-NEXT:    cmp x10, x8
+; CHECK-SD-CVT-NEXT:    csel x3, x10, x8, lo
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_unsigned_v8f16_v8i50:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-FP16-NEXT:    mov x8, #1125899906842623 // =0x3ffffffffffff
+; CHECK-SD-FP16-NEXT:    fcvtzu x13, h0
+; CHECK-SD-FP16-NEXT:    mov h2, v1.h[1]
+; CHECK-SD-FP16-NEXT:    mov h3, v1.h[2]
+; CHECK-SD-FP16-NEXT:    mov h4, v1.h[3]
+; CHECK-SD-FP16-NEXT:    fcvtzu x9, h1
+; CHECK-SD-FP16-NEXT:    mov h1, v0.h[1]
+; CHECK-SD-FP16-NEXT:    fcvtzu x10, h2
+; CHECK-SD-FP16-NEXT:    fcvtzu x11, h3
+; CHECK-SD-FP16-NEXT:    mov h2, v0.h[2]
+; CHECK-SD-FP16-NEXT:    fcvtzu x12, h4
+; CHECK-SD-FP16-NEXT:    mov h3, v0.h[3]
+; CHECK-SD-FP16-NEXT:    cmp x9, x8
+; CHECK-SD-FP16-NEXT:    fcvtzu x14, h1
+; CHECK-SD-FP16-NEXT:    csel x4, x9, x8, lo
+; CHECK-SD-FP16-NEXT:    cmp x10, x8
+; CHECK-SD-FP16-NEXT:    fcvtzu x9, h2
+; CHECK-SD-FP16-NEXT:    csel x5, x10, x8, lo
+; CHECK-SD-FP16-NEXT:    cmp x11, x8
+; CHECK-SD-FP16-NEXT:    fcvtzu x10, h3
+; CHECK-SD-FP16-NEXT:    csel x6, x11, x8, lo
+; CHECK-SD-FP16-NEXT:    cmp x12, x8
+; CHECK-SD-FP16-NEXT:    csel x7, x12, x8, lo
+; CHECK-SD-FP16-NEXT:    cmp x13, x8
+; CHECK-SD-FP16-NEXT:    csel x0, x13, x8, lo
+; CHECK-SD-FP16-NEXT:    cmp x14, x8
+; CHECK-SD-FP16-NEXT:    csel x1, x14, x8, lo
+; CHECK-SD-FP16-NEXT:    cmp x9, x8
+; CHECK-SD-FP16-NEXT:    csel x2, x9, x8, lo
+; CHECK-SD-FP16-NEXT:    cmp x10, x8
+; CHECK-SD-FP16-NEXT:    csel x3, x10, x8, lo
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_v8f16_v8i50:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-GI-CVT-NEXT:    mov h5, v0.h[1]
+; CHECK-GI-CVT-NEXT:    mov x8, #1125899906842623 // =0x3ffffffffffff
+; CHECK-GI-CVT-NEXT:    mov h6, v0.h[2]
+; CHECK-GI-CVT-NEXT:    mov h7, v0.h[3]
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    mov h2, v1.h[1]
+; CHECK-GI-CVT-NEXT:    mov h3, v1.h[2]
+; CHECK-GI-CVT-NEXT:    mov h4, v1.h[3]
+; CHECK-GI-CVT-NEXT:    fcvt s1, h1
+; CHECK-GI-CVT-NEXT:    fcvtzu x13, s0
+; CHECK-GI-CVT-NEXT:    fcvt s2, h2
+; CHECK-GI-CVT-NEXT:    fcvt s3, h3
+; CHECK-GI-CVT-NEXT:    fcvt s4, h4
+; CHECK-GI-CVT-NEXT:    fcvtzu x9, s1
+; CHECK-GI-CVT-NEXT:    fcvt s1, h5
+; CHECK-GI-CVT-NEXT:    fcvtzu x10, s2
+; CHECK-GI-CVT-NEXT:    fcvtzu x11, s3
+; CHECK-GI-CVT-NEXT:    fcvt s2, h6
+; CHECK-GI-CVT-NEXT:    fcvtzu x12, s4
+; CHECK-GI-CVT-NEXT:    fcvt s3, h7
+; CHECK-GI-CVT-NEXT:    cmp x9, x8
+; CHECK-GI-CVT-NEXT:    fcvtzu x14, s1
+; CHECK-GI-CVT-NEXT:    csel x4, x9, x8, lo
+; CHECK-GI-CVT-NEXT:    cmp x10, x8
+; CHECK-GI-CVT-NEXT:    fcvtzu x9, s2
+; CHECK-GI-CVT-NEXT:    csel x5, x10, x8, lo
+; CHECK-GI-CVT-NEXT:    cmp x11, x8
+; CHECK-GI-CVT-NEXT:    fcvtzu x10, s3
+; CHECK-GI-CVT-NEXT:    csel x6, x11, x8, lo
+; CHECK-GI-CVT-NEXT:    cmp x12, x8
+; CHECK-GI-CVT-NEXT:    csel x7, x12, x8, lo
+; CHECK-GI-CVT-NEXT:    cmp x13, x8
+; CHECK-GI-CVT-NEXT:    csel x0, x13, x8, lo
+; CHECK-GI-CVT-NEXT:    cmp x14, x8
+; CHECK-GI-CVT-NEXT:    csel x1, x14, x8, lo
+; CHECK-GI-CVT-NEXT:    cmp x9, x8
+; CHECK-GI-CVT-NEXT:    csel x2, x9, x8, lo
+; CHECK-GI-CVT-NEXT:    cmp x10, x8
+; CHECK-GI-CVT-NEXT:    csel x3, x10, x8, lo
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_unsigned_v8f16_v8i50:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-GI-FP16-NEXT:    mov x8, #1125899906842623 // =0x3ffffffffffff
+; CHECK-GI-FP16-NEXT:    fcvtzu x13, h0
+; CHECK-GI-FP16-NEXT:    mov h2, v1.h[1]
+; CHECK-GI-FP16-NEXT:    mov h3, v1.h[2]
+; CHECK-GI-FP16-NEXT:    mov h4, v1.h[3]
+; CHECK-GI-FP16-NEXT:    fcvtzu x9, h1
+; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-FP16-NEXT:    fcvtzu x10, h2
+; CHECK-GI-FP16-NEXT:    fcvtzu x11, h3
+; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
+; CHECK-GI-FP16-NEXT:    fcvtzu x12, h4
+; CHECK-GI-FP16-NEXT:    mov h3, v0.h[3]
+; CHECK-GI-FP16-NEXT:    cmp x9, x8
+; CHECK-GI-FP16-NEXT:    fcvtzu x14, h1
+; CHECK-GI-FP16-NEXT:    csel x4, x9, x8, lo
+; CHECK-GI-FP16-NEXT:    cmp x10, x8
+; CHECK-GI-FP16-NEXT:    fcvtzu x9, h2
+; CHECK-GI-FP16-NEXT:    csel x5, x10, x8, lo
+; CHECK-GI-FP16-NEXT:    cmp x11, x8
+; CHECK-GI-FP16-NEXT:    fcvtzu x10, h3
+; CHECK-GI-FP16-NEXT:    csel x6, x11, x8, lo
+; CHECK-GI-FP16-NEXT:    cmp x12, x8
+; CHECK-GI-FP16-NEXT:    csel x7, x12, x8, lo
+; CHECK-GI-FP16-NEXT:    cmp x13, x8
+; CHECK-GI-FP16-NEXT:    csel x0, x13, x8, lo
+; CHECK-GI-FP16-NEXT:    cmp x14, x8
+; CHECK-GI-FP16-NEXT:    csel x1, x14, x8, lo
+; CHECK-GI-FP16-NEXT:    cmp x9, x8
+; CHECK-GI-FP16-NEXT:    csel x2, x9, x8, lo
+; CHECK-GI-FP16-NEXT:    cmp x10, x8
+; CHECK-GI-FP16-NEXT:    csel x3, x10, x8, lo
+; CHECK-GI-FP16-NEXT:    ret
     %x = call <8 x i50> @llvm.fptoui.sat.v8f16.v8i50(<8 x half> %f)
     ret <8 x i50> %x
 }
 
 define <8 x i64> @test_unsigned_v8f16_v8i64(<8 x half> %f) {
-; CHECK-CVT-LABEL: test_unsigned_v8f16_v8i64:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-CVT-NEXT:    mov h4, v0.h[2]
-; CHECK-CVT-NEXT:    mov h3, v0.h[1]
-; CHECK-CVT-NEXT:    mov h7, v0.h[3]
-; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    mov h2, v1.h[2]
-; CHECK-CVT-NEXT:    mov h5, v1.h[1]
-; CHECK-CVT-NEXT:    mov h6, v1.h[3]
-; CHECK-CVT-NEXT:    fcvt s1, h1
-; CHECK-CVT-NEXT:    fcvt s4, h4
-; CHECK-CVT-NEXT:    fcvt s3, h3
-; CHECK-CVT-NEXT:    fcvt s7, h7
-; CHECK-CVT-NEXT:    fcvtzu x9, s0
-; CHECK-CVT-NEXT:    fcvt s2, h2
-; CHECK-CVT-NEXT:    fcvt s5, h5
-; CHECK-CVT-NEXT:    fcvt s6, h6
-; CHECK-CVT-NEXT:    fcvtzu x8, s1
-; CHECK-CVT-NEXT:    fcvtzu x12, s4
-; CHECK-CVT-NEXT:    fcvtzu x11, s3
-; CHECK-CVT-NEXT:    fcvtzu x15, s7
-; CHECK-CVT-NEXT:    fmov d0, x9
-; CHECK-CVT-NEXT:    fcvtzu x10, s2
-; CHECK-CVT-NEXT:    fcvtzu x13, s5
-; CHECK-CVT-NEXT:    fcvtzu x14, s6
-; CHECK-CVT-NEXT:    fmov d2, x8
-; CHECK-CVT-NEXT:    fmov d1, x12
-; CHECK-CVT-NEXT:    mov v0.d[1], x11
-; CHECK-CVT-NEXT:    fmov d3, x10
-; CHECK-CVT-NEXT:    mov v2.d[1], x13
-; CHECK-CVT-NEXT:    mov v1.d[1], x15
-; CHECK-CVT-NEXT:    mov v3.d[1], x14
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i64:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-FP16-NEXT:    mov h4, v0.h[2]
-; CHECK-FP16-NEXT:    mov h3, v0.h[1]
-; CHECK-FP16-NEXT:    mov h7, v0.h[3]
-; CHECK-FP16-NEXT:    fcvtzu x9, h0
-; CHECK-FP16-NEXT:    mov h2, v1.h[2]
-; CHECK-FP16-NEXT:    mov h5, v1.h[1]
-; CHECK-FP16-NEXT:    mov h6, v1.h[3]
-; CHECK-FP16-NEXT:    fcvtzu x8, h1
-; CHECK-FP16-NEXT:    fcvtzu x12, h4
-; CHECK-FP16-NEXT:    fcvtzu x11, h3
-; CHECK-FP16-NEXT:    fcvtzu x15, h7
-; CHECK-FP16-NEXT:    fmov d0, x9
-; CHECK-FP16-NEXT:    fcvtzu x10, h2
-; CHECK-FP16-NEXT:    fcvtzu x13, h5
-; CHECK-FP16-NEXT:    fcvtzu x14, h6
-; CHECK-FP16-NEXT:    fmov d2, x8
-; CHECK-FP16-NEXT:    fmov d1, x12
-; CHECK-FP16-NEXT:    mov v0.d[1], x11
-; CHECK-FP16-NEXT:    fmov d3, x10
-; CHECK-FP16-NEXT:    mov v2.d[1], x13
-; CHECK-FP16-NEXT:    mov v1.d[1], x15
-; CHECK-FP16-NEXT:    mov v3.d[1], x14
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_unsigned_v8f16_v8i64:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-CVT-NEXT:    mov h4, v0.h[2]
+; CHECK-SD-CVT-NEXT:    mov h3, v0.h[1]
+; CHECK-SD-CVT-NEXT:    mov h7, v0.h[3]
+; CHECK-SD-CVT-NEXT:    fcvt s0, h0
+; CHECK-SD-CVT-NEXT:    mov h2, v1.h[2]
+; CHECK-SD-CVT-NEXT:    mov h5, v1.h[1]
+; CHECK-SD-CVT-NEXT:    mov h6, v1.h[3]
+; CHECK-SD-CVT-NEXT:    fcvt s1, h1
+; CHECK-SD-CVT-NEXT:    fcvt s4, h4
+; CHECK-SD-CVT-NEXT:    fcvt s3, h3
+; CHECK-SD-CVT-NEXT:    fcvt s7, h7
+; CHECK-SD-CVT-NEXT:    fcvtzu x9, s0
+; CHECK-SD-CVT-NEXT:    fcvt s2, h2
+; CHECK-SD-CVT-NEXT:    fcvt s5, h5
+; CHECK-SD-CVT-NEXT:    fcvt s6, h6
+; CHECK-SD-CVT-NEXT:    fcvtzu x8, s1
+; CHECK-SD-CVT-NEXT:    fcvtzu x12, s4
+; CHECK-SD-CVT-NEXT:    fcvtzu x11, s3
+; CHECK-SD-CVT-NEXT:    fcvtzu x15, s7
+; CHECK-SD-CVT-NEXT:    fmov d0, x9
+; CHECK-SD-CVT-NEXT:    fcvtzu x10, s2
+; CHECK-SD-CVT-NEXT:    fcvtzu x13, s5
+; CHECK-SD-CVT-NEXT:    fcvtzu x14, s6
+; CHECK-SD-CVT-NEXT:    fmov d2, x8
+; CHECK-SD-CVT-NEXT:    fmov d1, x12
+; CHECK-SD-CVT-NEXT:    mov v0.d[1], x11
+; CHECK-SD-CVT-NEXT:    fmov d3, x10
+; CHECK-SD-CVT-NEXT:    mov v2.d[1], x13
+; CHECK-SD-CVT-NEXT:    mov v1.d[1], x15
+; CHECK-SD-CVT-NEXT:    mov v3.d[1], x14
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_unsigned_v8f16_v8i64:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-FP16-NEXT:    mov h4, v0.h[2]
+; CHECK-SD-FP16-NEXT:    mov h3, v0.h[1]
+; CHECK-SD-FP16-NEXT:    mov h7, v0.h[3]
+; CHECK-SD-FP16-NEXT:    fcvtzu x9, h0
+; CHECK-SD-FP16-NEXT:    mov h2, v1.h[2]
+; CHECK-SD-FP16-NEXT:    mov h5, v1.h[1]
+; CHECK-SD-FP16-NEXT:    mov h6, v1.h[3]
+; CHECK-SD-FP16-NEXT:    fcvtzu x8, h1
+; CHECK-SD-FP16-NEXT:    fcvtzu x12, h4
+; CHECK-SD-FP16-NEXT:    fcvtzu x11, h3
+; CHECK-SD-FP16-NEXT:    fcvtzu x15, h7
+; CHECK-SD-FP16-NEXT:    fmov d0, x9
+; CHECK-SD-FP16-NEXT:    fcvtzu x10, h2
+; CHECK-SD-FP16-NEXT:    fcvtzu x13, h5
+; CHECK-SD-FP16-NEXT:    fcvtzu x14, h6
+; CHECK-SD-FP16-NEXT:    fmov d2, x8
+; CHECK-SD-FP16-NEXT:    fmov d1, x12
+; CHECK-SD-FP16-NEXT:    mov v0.d[1], x11
+; CHECK-SD-FP16-NEXT:    fmov d3, x10
+; CHECK-SD-FP16-NEXT:    mov v2.d[1], x13
+; CHECK-SD-FP16-NEXT:    mov v1.d[1], x15
+; CHECK-SD-FP16-NEXT:    mov v3.d[1], x14
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_v8f16_v8i64:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvtl v1.4s, v0.4h
+; CHECK-GI-CVT-NEXT:    fcvtl2 v0.4s, v0.8h
+; CHECK-GI-CVT-NEXT:    fcvtl v2.2d, v1.2s
+; CHECK-GI-CVT-NEXT:    fcvtl2 v1.2d, v1.4s
+; CHECK-GI-CVT-NEXT:    fcvtl v3.2d, v0.2s
+; CHECK-GI-CVT-NEXT:    fcvtl2 v4.2d, v0.4s
+; CHECK-GI-CVT-NEXT:    fcvtzu v0.2d, v2.2d
+; CHECK-GI-CVT-NEXT:    fcvtzu v1.2d, v1.2d
+; CHECK-GI-CVT-NEXT:    fcvtzu v2.2d, v3.2d
+; CHECK-GI-CVT-NEXT:    fcvtzu v3.2d, v4.2d
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_unsigned_v8f16_v8i64:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    mov s1, v0.s[1]
+; CHECK-GI-FP16-NEXT:    mov s2, v0.s[2]
+; CHECK-GI-FP16-NEXT:    mov s3, v0.s[3]
+; CHECK-GI-FP16-NEXT:    mov h4, v0.h[1]
+; CHECK-GI-FP16-NEXT:    fcvt d0, h0
+; CHECK-GI-FP16-NEXT:    mov h5, v1.h[1]
+; CHECK-GI-FP16-NEXT:    mov h6, v2.h[1]
+; CHECK-GI-FP16-NEXT:    mov h7, v3.h[1]
+; CHECK-GI-FP16-NEXT:    fcvt d4, h4
+; CHECK-GI-FP16-NEXT:    fcvt d1, h1
+; CHECK-GI-FP16-NEXT:    fcvt d2, h2
+; CHECK-GI-FP16-NEXT:    fcvt d3, h3
+; CHECK-GI-FP16-NEXT:    fcvt d5, h5
+; CHECK-GI-FP16-NEXT:    fcvt d6, h6
+; CHECK-GI-FP16-NEXT:    fcvt d7, h7
+; CHECK-GI-FP16-NEXT:    mov v0.d[1], v4.d[0]
+; CHECK-GI-FP16-NEXT:    mov v1.d[1], v5.d[0]
+; CHECK-GI-FP16-NEXT:    mov v2.d[1], v6.d[0]
+; CHECK-GI-FP16-NEXT:    mov v3.d[1], v7.d[0]
+; CHECK-GI-FP16-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-GI-FP16-NEXT:    fcvtzu v1.2d, v1.2d
+; CHECK-GI-FP16-NEXT:    fcvtzu v2.2d, v2.2d
+; CHECK-GI-FP16-NEXT:    fcvtzu v3.2d, v3.2d
+; CHECK-GI-FP16-NEXT:    ret
     %x = call <8 x i64> @llvm.fptoui.sat.v8f16.v8i64(<8 x half> %f)
     ret <8 x i64> %x
 }
 
 define <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) {
-; CHECK-LABEL: test_unsigned_v8f16_v8i100:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #176
-; CHECK-NEXT:    stp d9, d8, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x29, x30, [sp, #80] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x28, x27, [sp, #96] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x26, x25, [sp, #112] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x24, x23, [sp, #128] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x22, x21, [sp, #144] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x20, x19, [sp, #160] // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 176
-; CHECK-NEXT:    .cfi_offset w19, -8
-; CHECK-NEXT:    .cfi_offset w20, -16
-; CHECK-NEXT:    .cfi_offset w21, -24
-; CHECK-NEXT:    .cfi_offset w22, -32
-; CHECK-NEXT:    .cfi_offset w23, -40
-; CHECK-NEXT:    .cfi_offset w24, -48
-; CHECK-NEXT:    .cfi_offset w25, -56
-; CHECK-NEXT:    .cfi_offset w26, -64
-; CHECK-NEXT:    .cfi_offset w27, -72
-; CHECK-NEXT:    .cfi_offset w28, -80
-; CHECK-NEXT:    .cfi_offset w30, -88
-; CHECK-NEXT:    .cfi_offset w29, -96
-; CHECK-NEXT:    .cfi_offset b8, -104
-; CHECK-NEXT:    .cfi_offset b9, -112
-; CHECK-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    mov x19, x8
-; CHECK-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT:    mov h0, v0.h[1]
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT:    mov w8, #1904214015 // =0x717fffff
-; CHECK-NEXT:    fcmp s8, #0.0
-; CHECK-NEXT:    fmov s9, w8
-; CHECK-NEXT:    mov x23, #68719476735 // =0xfffffffff
-; CHECK-NEXT:    mov h0, v0.h[3]
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    csel x8, xzr, x1, lt
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    csel x10, x23, x8, gt
-; CHECK-NEXT:    csinv x8, x9, xzr, le
-; CHECK-NEXT:    stp x8, x10, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    fcmp s8, #0.0
-; CHECK-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, xzr, x1, lt
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    csel x9, x23, x9, gt
-; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    stp x8, x9, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT:    fcmp s8, #0.0
-; CHECK-NEXT:    mov h0, v0.h[2]
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, xzr, x1, lt
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    csel x25, x23, x9, gt
-; CHECK-NEXT:    str x8, [sp, #32] // 8-byte Folded Spill
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    fcmp s8, #0.0
-; CHECK-NEXT:    mov h0, v0.h[1]
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, xzr, x1, lt
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    csel x26, x23, x9, gt
-; CHECK-NEXT:    csinv x28, x8, xzr, le
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    fcmp s8, #0.0
-; CHECK-NEXT:    mov h0, v0.h[3]
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, xzr, x1, lt
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    csel x29, x23, x9, gt
-; CHECK-NEXT:    csinv x20, x8, xzr, le
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    fcmp s8, #0.0
-; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, xzr, x1, lt
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    csel x21, x23, x9, gt
-; CHECK-NEXT:    csinv x27, x8, xzr, le
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    fcmp s8, #0.0
-; CHECK-NEXT:    mov h0, v0.h[2]
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, xzr, x1, lt
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    csel x22, x23, x9, gt
-; CHECK-NEXT:    csinv x24, x8, xzr, le
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    extr x8, x21, x27, #28
-; CHECK-NEXT:    extr x9, x29, x20, #28
-; CHECK-NEXT:    stur x28, [x19, #75]
-; CHECK-NEXT:    fcmp s8, #0.0
-; CHECK-NEXT:    bfi x22, x20, #36, #28
-; CHECK-NEXT:    lsr x11, x29, #28
-; CHECK-NEXT:    stur x8, [x19, #41]
-; CHECK-NEXT:    str x9, [x19, #16]
-; CHECK-NEXT:    ldr x10, [sp, #32] // 8-byte Folded Reload
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, xzr, x1, lt
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    stp x24, x22, [x19]
-; CHECK-NEXT:    stur x10, [x19, #50]
-; CHECK-NEXT:    lsr x10, x21, #28
-; CHECK-NEXT:    strb w11, [x19, #24]
-; CHECK-NEXT:    strb w10, [x19, #49]
-; CHECK-NEXT:    csel x9, x23, x9, gt
-; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    ldp x12, x11, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    bfi x9, x27, #36, #28
-; CHECK-NEXT:    stur x8, [x19, #25]
-; CHECK-NEXT:    stur x9, [x19, #33]
-; CHECK-NEXT:    extr x10, x11, x12, #28
-; CHECK-NEXT:    bfi x26, x12, #36, #28
-; CHECK-NEXT:    stur x10, [x19, #91]
-; CHECK-NEXT:    ldp x10, x9, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    stur x26, [x19, #83]
-; CHECK-NEXT:    extr x8, x9, x10, #28
-; CHECK-NEXT:    bfi x25, x10, #36, #28
-; CHECK-NEXT:    lsr x9, x9, #28
-; CHECK-NEXT:    stur x8, [x19, #66]
-; CHECK-NEXT:    lsr x8, x11, #28
-; CHECK-NEXT:    stur x25, [x19, #58]
-; CHECK-NEXT:    strb w8, [x19, #99]
-; CHECK-NEXT:    strb w9, [x19, #74]
-; CHECK-NEXT:    ldp x20, x19, [sp, #160] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp x22, x21, [sp, #144] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp x24, x23, [sp, #128] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp x26, x25, [sp, #112] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp x28, x27, [sp, #96] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp x29, x30, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT:    add sp, sp, #176
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_v8f16_v8i100:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sub sp, sp, #176
+; CHECK-SD-NEXT:    stp d9, d8, [sp, #64] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x29, x30, [sp, #80] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x28, x27, [sp, #96] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x26, x25, [sp, #112] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x24, x23, [sp, #128] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x22, x21, [sp, #144] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x20, x19, [sp, #160] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 176
+; CHECK-SD-NEXT:    .cfi_offset w19, -8
+; CHECK-SD-NEXT:    .cfi_offset w20, -16
+; CHECK-SD-NEXT:    .cfi_offset w21, -24
+; CHECK-SD-NEXT:    .cfi_offset w22, -32
+; CHECK-SD-NEXT:    .cfi_offset w23, -40
+; CHECK-SD-NEXT:    .cfi_offset w24, -48
+; CHECK-SD-NEXT:    .cfi_offset w25, -56
+; CHECK-SD-NEXT:    .cfi_offset w26, -64
+; CHECK-SD-NEXT:    .cfi_offset w27, -72
+; CHECK-SD-NEXT:    .cfi_offset w28, -80
+; CHECK-SD-NEXT:    .cfi_offset w30, -88
+; CHECK-SD-NEXT:    .cfi_offset w29, -96
+; CHECK-SD-NEXT:    .cfi_offset b8, -104
+; CHECK-SD-NEXT:    .cfi_offset b9, -112
+; CHECK-SD-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    mov x19, x8
+; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    mov h0, v0.h[1]
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixunssfti
+; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov w8, #1904214015 // =0x717fffff
+; CHECK-SD-NEXT:    fcmp s8, #0.0
+; CHECK-SD-NEXT:    fmov s9, w8
+; CHECK-SD-NEXT:    mov x23, #68719476735 // =0xfffffffff
+; CHECK-SD-NEXT:    mov h0, v0.h[3]
+; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x8, xzr, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    csel x10, x23, x8, gt
+; CHECK-SD-NEXT:    csinv x8, x9, xzr, le
+; CHECK-SD-NEXT:    stp x8, x10, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixunssfti
+; CHECK-SD-NEXT:    fcmp s8, #0.0
+; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    csel x9, x23, x9, gt
+; CHECK-SD-NEXT:    csinv x8, x8, xzr, le
+; CHECK-SD-NEXT:    stp x8, x9, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixunssfti
+; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    fcmp s8, #0.0
+; CHECK-SD-NEXT:    mov h0, v0.h[2]
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    csinv x8, x8, xzr, le
+; CHECK-SD-NEXT:    csel x25, x23, x9, gt
+; CHECK-SD-NEXT:    str x8, [sp, #32] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixunssfti
+; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    fcmp s8, #0.0
+; CHECK-SD-NEXT:    mov h0, v0.h[1]
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    csel x26, x23, x9, gt
+; CHECK-SD-NEXT:    csinv x28, x8, xzr, le
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixunssfti
+; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    fcmp s8, #0.0
+; CHECK-SD-NEXT:    mov h0, v0.h[3]
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    csel x29, x23, x9, gt
+; CHECK-SD-NEXT:    csinv x20, x8, xzr, le
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixunssfti
+; CHECK-SD-NEXT:    fcmp s8, #0.0
+; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    csel x21, x23, x9, gt
+; CHECK-SD-NEXT:    csinv x27, x8, xzr, le
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixunssfti
+; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    fcmp s8, #0.0
+; CHECK-SD-NEXT:    mov h0, v0.h[2]
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    csel x22, x23, x9, gt
+; CHECK-SD-NEXT:    csinv x24, x8, xzr, le
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixunssfti
+; CHECK-SD-NEXT:    extr x8, x21, x27, #28
+; CHECK-SD-NEXT:    extr x9, x29, x20, #28
+; CHECK-SD-NEXT:    stur x28, [x19, #75]
+; CHECK-SD-NEXT:    fcmp s8, #0.0
+; CHECK-SD-NEXT:    bfi x22, x20, #36, #28
+; CHECK-SD-NEXT:    lsr x11, x29, #28
+; CHECK-SD-NEXT:    stur x8, [x19, #41]
+; CHECK-SD-NEXT:    str x9, [x19, #16]
+; CHECK-SD-NEXT:    ldr x10, [sp, #32] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    stp x24, x22, [x19]
+; CHECK-SD-NEXT:    stur x10, [x19, #50]
+; CHECK-SD-NEXT:    lsr x10, x21, #28
+; CHECK-SD-NEXT:    strb w11, [x19, #24]
+; CHECK-SD-NEXT:    strb w10, [x19, #49]
+; CHECK-SD-NEXT:    csel x9, x23, x9, gt
+; CHECK-SD-NEXT:    csinv x8, x8, xzr, le
+; CHECK-SD-NEXT:    ldp x12, x11, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    bfi x9, x27, #36, #28
+; CHECK-SD-NEXT:    stur x8, [x19, #25]
+; CHECK-SD-NEXT:    stur x9, [x19, #33]
+; CHECK-SD-NEXT:    extr x10, x11, x12, #28
+; CHECK-SD-NEXT:    bfi x26, x12, #36, #28
+; CHECK-SD-NEXT:    stur x10, [x19, #91]
+; CHECK-SD-NEXT:    ldp x10, x9, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    stur x26, [x19, #83]
+; CHECK-SD-NEXT:    extr x8, x9, x10, #28
+; CHECK-SD-NEXT:    bfi x25, x10, #36, #28
+; CHECK-SD-NEXT:    lsr x9, x9, #28
+; CHECK-SD-NEXT:    stur x8, [x19, #66]
+; CHECK-SD-NEXT:    lsr x8, x11, #28
+; CHECK-SD-NEXT:    stur x25, [x19, #58]
+; CHECK-SD-NEXT:    strb w8, [x19, #99]
+; CHECK-SD-NEXT:    strb w9, [x19, #74]
+; CHECK-SD-NEXT:    ldp x20, x19, [sp, #160] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x22, x21, [sp, #144] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x24, x23, [sp, #128] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x26, x25, [sp, #112] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x28, x27, [sp, #96] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x29, x30, [sp, #80] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    add sp, sp, #176
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_v8f16_v8i100:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-CVT-NEXT:    mov h2, v0.h[2]
+; CHECK-GI-CVT-NEXT:    mov x11, x8
+; CHECK-GI-CVT-NEXT:    fcvt s3, h0
+; CHECK-GI-CVT-NEXT:    mov h4, v0.h[3]
+; CHECK-GI-CVT-NEXT:    str wzr, [x8, #8]
+; CHECK-GI-CVT-NEXT:    strb wzr, [x8, #12]
+; CHECK-GI-CVT-NEXT:    fcvt s1, h1
+; CHECK-GI-CVT-NEXT:    fcvt s2, h2
+; CHECK-GI-CVT-NEXT:    fcvtzu x9, s3
+; CHECK-GI-CVT-NEXT:    fcvt s3, h4
+; CHECK-GI-CVT-NEXT:    fcvtzu x10, s1
+; CHECK-GI-CVT-NEXT:    mov h1, v0.h[4]
+; CHECK-GI-CVT-NEXT:    fcvtzu x12, s2
+; CHECK-GI-CVT-NEXT:    mov h2, v0.h[5]
+; CHECK-GI-CVT-NEXT:    str x9, [x8]
+; CHECK-GI-CVT-NEXT:    mov x9, x8
+; CHECK-GI-CVT-NEXT:    fcvt s1, h1
+; CHECK-GI-CVT-NEXT:    str x10, [x11, #12]!
+; CHECK-GI-CVT-NEXT:    fcvtzu x10, s3
+; CHECK-GI-CVT-NEXT:    mov h3, v0.h[6]
+; CHECK-GI-CVT-NEXT:    fcvt s2, h2
+; CHECK-GI-CVT-NEXT:    mov h0, v0.h[7]
+; CHECK-GI-CVT-NEXT:    str wzr, [x11, #8]
+; CHECK-GI-CVT-NEXT:    strb wzr, [x11, #12]
+; CHECK-GI-CVT-NEXT:    mov x11, x8
+; CHECK-GI-CVT-NEXT:    str x12, [x9, #25]!
+; CHECK-GI-CVT-NEXT:    fcvtzu x12, s1
+; CHECK-GI-CVT-NEXT:    str wzr, [x9, #8]
+; CHECK-GI-CVT-NEXT:    fcvt s1, h3
+; CHECK-GI-CVT-NEXT:    strb wzr, [x9, #12]
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    mov x9, x8
+; CHECK-GI-CVT-NEXT:    str x10, [x11, #37]!
+; CHECK-GI-CVT-NEXT:    fcvtzu x10, s2
+; CHECK-GI-CVT-NEXT:    str wzr, [x11, #8]
+; CHECK-GI-CVT-NEXT:    strb wzr, [x11, #12]
+; CHECK-GI-CVT-NEXT:    fcvtzu x11, s1
+; CHECK-GI-CVT-NEXT:    str x12, [x9, #50]!
+; CHECK-GI-CVT-NEXT:    str wzr, [x9, #8]
+; CHECK-GI-CVT-NEXT:    strb wzr, [x9, #12]
+; CHECK-GI-CVT-NEXT:    mov x9, x8
+; CHECK-GI-CVT-NEXT:    str x10, [x9, #62]!
+; CHECK-GI-CVT-NEXT:    fcvtzu x10, s0
+; CHECK-GI-CVT-NEXT:    str wzr, [x9, #8]
+; CHECK-GI-CVT-NEXT:    strb wzr, [x9, #12]
+; CHECK-GI-CVT-NEXT:    mov x9, x8
+; CHECK-GI-CVT-NEXT:    str x11, [x9, #75]!
+; CHECK-GI-CVT-NEXT:    str wzr, [x9, #8]
+; CHECK-GI-CVT-NEXT:    strb wzr, [x9, #12]
+; CHECK-GI-CVT-NEXT:    str x10, [x8, #87]!
+; CHECK-GI-CVT-NEXT:    str wzr, [x8, #8]
+; CHECK-GI-CVT-NEXT:    strb wzr, [x8, #12]
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_unsigned_v8f16_v8i100:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
+; CHECK-GI-FP16-NEXT:    mov x11, x8
+; CHECK-GI-FP16-NEXT:    fcvtzu x9, h0
+; CHECK-GI-FP16-NEXT:    str wzr, [x8, #8]
+; CHECK-GI-FP16-NEXT:    strb wzr, [x8, #12]
+; CHECK-GI-FP16-NEXT:    fcvtzu x10, h1
+; CHECK-GI-FP16-NEXT:    mov h1, v0.h[3]
+; CHECK-GI-FP16-NEXT:    fcvtzu x12, h2
+; CHECK-GI-FP16-NEXT:    mov h2, v0.h[4]
+; CHECK-GI-FP16-NEXT:    str x9, [x8]
+; CHECK-GI-FP16-NEXT:    mov x9, x8
+; CHECK-GI-FP16-NEXT:    str x10, [x11, #12]!
+; CHECK-GI-FP16-NEXT:    fcvtzu x10, h1
+; CHECK-GI-FP16-NEXT:    mov h1, v0.h[5]
+; CHECK-GI-FP16-NEXT:    str wzr, [x11, #8]
+; CHECK-GI-FP16-NEXT:    strb wzr, [x11, #12]
+; CHECK-GI-FP16-NEXT:    mov x11, x8
+; CHECK-GI-FP16-NEXT:    str x12, [x9, #25]!
+; CHECK-GI-FP16-NEXT:    fcvtzu x12, h2
+; CHECK-GI-FP16-NEXT:    str wzr, [x9, #8]
+; CHECK-GI-FP16-NEXT:    mov h2, v0.h[6]
+; CHECK-GI-FP16-NEXT:    mov h0, v0.h[7]
+; CHECK-GI-FP16-NEXT:    strb wzr, [x9, #12]
+; CHECK-GI-FP16-NEXT:    fcvtzu x9, h1
+; CHECK-GI-FP16-NEXT:    str x10, [x11, #37]!
+; CHECK-GI-FP16-NEXT:    mov x10, x8
+; CHECK-GI-FP16-NEXT:    str wzr, [x11, #8]
+; CHECK-GI-FP16-NEXT:    strb wzr, [x11, #12]
+; CHECK-GI-FP16-NEXT:    fcvtzu x11, h2
+; CHECK-GI-FP16-NEXT:    str x12, [x10, #50]!
+; CHECK-GI-FP16-NEXT:    str wzr, [x10, #8]
+; CHECK-GI-FP16-NEXT:    strb wzr, [x10, #12]
+; CHECK-GI-FP16-NEXT:    mov x10, x8
+; CHECK-GI-FP16-NEXT:    str x9, [x10, #62]!
+; CHECK-GI-FP16-NEXT:    fcvtzu x9, h0
+; CHECK-GI-FP16-NEXT:    str wzr, [x10, #8]
+; CHECK-GI-FP16-NEXT:    strb wzr, [x10, #12]
+; CHECK-GI-FP16-NEXT:    mov x10, x8
+; CHECK-GI-FP16-NEXT:    str x11, [x10, #75]!
+; CHECK-GI-FP16-NEXT:    str wzr, [x10, #8]
+; CHECK-GI-FP16-NEXT:    strb wzr, [x10, #12]
+; CHECK-GI-FP16-NEXT:    str x9, [x8, #87]!
+; CHECK-GI-FP16-NEXT:    str wzr, [x8, #8]
+; CHECK-GI-FP16-NEXT:    strb wzr, [x8, #12]
+; CHECK-GI-FP16-NEXT:    ret
     %x = call <8 x i100> @llvm.fptoui.sat.v8f16.v8i100(<8 x half> %f)
     ret <8 x i100> %x
 }
@@ -2305,64 +3892,116 @@ declare <16 x i8> @llvm.fptoui.sat.v16f64.v16i8(<16 x double> %f)
 declare <16 x i16> @llvm.fptoui.sat.v16f64.v16i16(<16 x double> %f)
 
 define <8 x i8> @test_unsigned_v8f32_v8i8(<8 x float> %f) {
-; CHECK-LABEL: test_unsigned_v8f32_v8i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v2.2d, #0x0000ff000000ff
-; CHECK-NEXT:    fcvtzu v1.4s, v1.4s
-; CHECK-NEXT:    fcvtzu v0.4s, v0.4s
-; CHECK-NEXT:    umin v1.4s, v1.4s, v2.4s
-; CHECK-NEXT:    umin v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    xtn v0.8b, v0.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_v8f32_v8i8:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v2.2d, #0x0000ff000000ff
+; CHECK-SD-NEXT:    fcvtzu v1.4s, v1.4s
+; CHECK-SD-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-SD-NEXT:    umin v1.4s, v1.4s, v2.4s
+; CHECK-SD-NEXT:    umin v0.4s, v0.4s, v2.4s
+; CHECK-SD-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT:    xtn v0.8b, v0.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_v8f32_v8i8:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v2.2d, #0x0000ff000000ff
+; CHECK-GI-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-GI-NEXT:    fcvtzu v1.4s, v1.4s
+; CHECK-GI-NEXT:    umin v0.4s, v0.4s, v2.4s
+; CHECK-GI-NEXT:    umin v1.4s, v1.4s, v2.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    xtn v0.8b, v0.8h
+; CHECK-GI-NEXT:    ret
     %x = call <8 x i8> @llvm.fptoui.sat.v8f32.v8i8(<8 x float> %f)
     ret <8 x i8> %x
 }
 
 define <16 x i8> @test_unsigned_v16f32_v16i8(<16 x float> %f) {
-; CHECK-LABEL: test_unsigned_v16f32_v16i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v4.2d, #0x0000ff000000ff
-; CHECK-NEXT:    fcvtzu v3.4s, v3.4s
-; CHECK-NEXT:    fcvtzu v2.4s, v2.4s
-; CHECK-NEXT:    fcvtzu v1.4s, v1.4s
-; CHECK-NEXT:    fcvtzu v0.4s, v0.4s
-; CHECK-NEXT:    umin v3.4s, v3.4s, v4.4s
-; CHECK-NEXT:    umin v2.4s, v2.4s, v4.4s
-; CHECK-NEXT:    umin v1.4s, v1.4s, v4.4s
-; CHECK-NEXT:    umin v0.4s, v0.4s, v4.4s
-; CHECK-NEXT:    uzp1 v2.8h, v2.8h, v3.8h
-; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    uzp1 v0.16b, v0.16b, v2.16b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_v16f32_v16i8:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v4.2d, #0x0000ff000000ff
+; CHECK-SD-NEXT:    fcvtzu v3.4s, v3.4s
+; CHECK-SD-NEXT:    fcvtzu v2.4s, v2.4s
+; CHECK-SD-NEXT:    fcvtzu v1.4s, v1.4s
+; CHECK-SD-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-SD-NEXT:    umin v3.4s, v3.4s, v4.4s
+; CHECK-SD-NEXT:    umin v2.4s, v2.4s, v4.4s
+; CHECK-SD-NEXT:    umin v1.4s, v1.4s, v4.4s
+; CHECK-SD-NEXT:    umin v0.4s, v0.4s, v4.4s
+; CHECK-SD-NEXT:    uzp1 v2.8h, v2.8h, v3.8h
+; CHECK-SD-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT:    uzp1 v0.16b, v0.16b, v2.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_v16f32_v16i8:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v4.2d, #0x0000ff000000ff
+; CHECK-GI-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-GI-NEXT:    fcvtzu v1.4s, v1.4s
+; CHECK-GI-NEXT:    fcvtzu v2.4s, v2.4s
+; CHECK-GI-NEXT:    fcvtzu v3.4s, v3.4s
+; CHECK-GI-NEXT:    umin v0.4s, v0.4s, v4.4s
+; CHECK-GI-NEXT:    umin v1.4s, v1.4s, v4.4s
+; CHECK-GI-NEXT:    umin v2.4s, v2.4s, v4.4s
+; CHECK-GI-NEXT:    umin v3.4s, v3.4s, v4.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    uzp1 v1.8h, v2.8h, v3.8h
+; CHECK-GI-NEXT:    uzp1 v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT:    ret
     %x = call <16 x i8> @llvm.fptoui.sat.v16f32.v16i8(<16 x float> %f)
     ret <16 x i8> %x
 }
 
 define <8 x i16> @test_unsigned_v8f32_v8i16(<8 x float> %f) {
-; CHECK-LABEL: test_unsigned_v8f32_v8i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu v0.4s, v0.4s
-; CHECK-NEXT:    fcvtzu v1.4s, v1.4s
-; CHECK-NEXT:    uqxtn v0.4h, v0.4s
-; CHECK-NEXT:    uqxtn2 v0.8h, v1.4s
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_v8f32_v8i16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-SD-NEXT:    fcvtzu v1.4s, v1.4s
+; CHECK-SD-NEXT:    uqxtn v0.4h, v0.4s
+; CHECK-SD-NEXT:    uqxtn2 v0.8h, v1.4s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_v8f32_v8i16:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v2.2d, #0x00ffff0000ffff
+; CHECK-GI-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-GI-NEXT:    fcvtzu v1.4s, v1.4s
+; CHECK-GI-NEXT:    umin v0.4s, v0.4s, v2.4s
+; CHECK-GI-NEXT:    umin v1.4s, v1.4s, v2.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    ret
     %x = call <8 x i16> @llvm.fptoui.sat.v8f32.v8i16(<8 x float> %f)
     ret <8 x i16> %x
 }
 
 define <16 x i16> @test_unsigned_v16f32_v16i16(<16 x float> %f) {
-; CHECK-LABEL: test_unsigned_v16f32_v16i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu v0.4s, v0.4s
-; CHECK-NEXT:    fcvtzu v2.4s, v2.4s
-; CHECK-NEXT:    fcvtzu v4.4s, v1.4s
-; CHECK-NEXT:    uqxtn v0.4h, v0.4s
-; CHECK-NEXT:    uqxtn v1.4h, v2.4s
-; CHECK-NEXT:    fcvtzu v2.4s, v3.4s
-; CHECK-NEXT:    uqxtn2 v0.8h, v4.4s
-; CHECK-NEXT:    uqxtn2 v1.8h, v2.4s
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_v16f32_v16i16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-SD-NEXT:    fcvtzu v2.4s, v2.4s
+; CHECK-SD-NEXT:    fcvtzu v4.4s, v1.4s
+; CHECK-SD-NEXT:    uqxtn v0.4h, v0.4s
+; CHECK-SD-NEXT:    uqxtn v1.4h, v2.4s
+; CHECK-SD-NEXT:    fcvtzu v2.4s, v3.4s
+; CHECK-SD-NEXT:    uqxtn2 v0.8h, v4.4s
+; CHECK-SD-NEXT:    uqxtn2 v1.8h, v2.4s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_v16f32_v16i16:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v4.2d, #0x00ffff0000ffff
+; CHECK-GI-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-GI-NEXT:    fcvtzu v1.4s, v1.4s
+; CHECK-GI-NEXT:    fcvtzu v2.4s, v2.4s
+; CHECK-GI-NEXT:    fcvtzu v3.4s, v3.4s
+; CHECK-GI-NEXT:    umin v0.4s, v0.4s, v4.4s
+; CHECK-GI-NEXT:    umin v1.4s, v1.4s, v4.4s
+; CHECK-GI-NEXT:    umin v2.4s, v2.4s, v4.4s
+; CHECK-GI-NEXT:    umin v3.4s, v3.4s, v4.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    uzp1 v1.8h, v2.8h, v3.8h
+; CHECK-GI-NEXT:    ret
     %x = call <16 x i16> @llvm.fptoui.sat.v16f32.v16i16(<16 x float> %f)
     ret <16 x i16> %x
 }
@@ -2370,344 +4009,511 @@ define <16 x i16> @test_unsigned_v16f32_v16i16(<16 x float> %f) {
 
 
 define <16 x i8> @test_unsigned_v16f16_v16i8(<16 x half> %f) {
-; CHECK-CVT-LABEL: test_unsigned_v16f16_v16i8:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvtl2 v3.4s, v1.8h
-; CHECK-CVT-NEXT:    fcvtl v1.4s, v1.4h
-; CHECK-CVT-NEXT:    fcvtl2 v4.4s, v0.8h
-; CHECK-CVT-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT:    movi v2.2d, #0x0000ff000000ff
-; CHECK-CVT-NEXT:    fcvtzu v3.4s, v3.4s
-; CHECK-CVT-NEXT:    fcvtzu v1.4s, v1.4s
-; CHECK-CVT-NEXT:    fcvtzu v4.4s, v4.4s
-; CHECK-CVT-NEXT:    fcvtzu v0.4s, v0.4s
-; CHECK-CVT-NEXT:    umin v3.4s, v3.4s, v2.4s
-; CHECK-CVT-NEXT:    umin v1.4s, v1.4s, v2.4s
-; CHECK-CVT-NEXT:    umin v4.4s, v4.4s, v2.4s
-; CHECK-CVT-NEXT:    umin v0.4s, v0.4s, v2.4s
-; CHECK-CVT-NEXT:    uzp1 v1.8h, v1.8h, v3.8h
-; CHECK-CVT-NEXT:    uzp1 v0.8h, v0.8h, v4.8h
-; CHECK-CVT-NEXT:    uzp1 v0.16b, v0.16b, v1.16b
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_unsigned_v16f16_v16i8:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzu v0.8h, v0.8h
-; CHECK-FP16-NEXT:    fcvtzu v1.8h, v1.8h
-; CHECK-FP16-NEXT:    uqxtn v0.8b, v0.8h
-; CHECK-FP16-NEXT:    uqxtn2 v0.16b, v1.8h
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_unsigned_v16f16_v16i8:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvtl2 v3.4s, v1.8h
+; CHECK-SD-CVT-NEXT:    fcvtl v1.4s, v1.4h
+; CHECK-SD-CVT-NEXT:    fcvtl2 v4.4s, v0.8h
+; CHECK-SD-CVT-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-SD-CVT-NEXT:    movi v2.2d, #0x0000ff000000ff
+; CHECK-SD-CVT-NEXT:    fcvtzu v3.4s, v3.4s
+; CHECK-SD-CVT-NEXT:    fcvtzu v1.4s, v1.4s
+; CHECK-SD-CVT-NEXT:    fcvtzu v4.4s, v4.4s
+; CHECK-SD-CVT-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-SD-CVT-NEXT:    umin v3.4s, v3.4s, v2.4s
+; CHECK-SD-CVT-NEXT:    umin v1.4s, v1.4s, v2.4s
+; CHECK-SD-CVT-NEXT:    umin v4.4s, v4.4s, v2.4s
+; CHECK-SD-CVT-NEXT:    umin v0.4s, v0.4s, v2.4s
+; CHECK-SD-CVT-NEXT:    uzp1 v1.8h, v1.8h, v3.8h
+; CHECK-SD-CVT-NEXT:    uzp1 v0.8h, v0.8h, v4.8h
+; CHECK-SD-CVT-NEXT:    uzp1 v0.16b, v0.16b, v1.16b
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_unsigned_v16f16_v16i8:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzu v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT:    fcvtzu v1.8h, v1.8h
+; CHECK-SD-FP16-NEXT:    uqxtn v0.8b, v0.8h
+; CHECK-SD-FP16-NEXT:    uqxtn2 v0.16b, v1.8h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_v16f16_v16i8:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvtl v3.4s, v0.4h
+; CHECK-GI-CVT-NEXT:    fcvtl2 v0.4s, v0.8h
+; CHECK-GI-CVT-NEXT:    fcvtl v4.4s, v1.4h
+; CHECK-GI-CVT-NEXT:    fcvtl2 v1.4s, v1.8h
+; CHECK-GI-CVT-NEXT:    movi v2.2d, #0x0000ff000000ff
+; CHECK-GI-CVT-NEXT:    fcvtzu v3.4s, v3.4s
+; CHECK-GI-CVT-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-GI-CVT-NEXT:    fcvtzu v4.4s, v4.4s
+; CHECK-GI-CVT-NEXT:    fcvtzu v1.4s, v1.4s
+; CHECK-GI-CVT-NEXT:    umin v3.4s, v3.4s, v2.4s
+; CHECK-GI-CVT-NEXT:    umin v0.4s, v0.4s, v2.4s
+; CHECK-GI-CVT-NEXT:    umin v4.4s, v4.4s, v2.4s
+; CHECK-GI-CVT-NEXT:    umin v1.4s, v1.4s, v2.4s
+; CHECK-GI-CVT-NEXT:    uzp1 v0.8h, v3.8h, v0.8h
+; CHECK-GI-CVT-NEXT:    uzp1 v1.8h, v4.8h, v1.8h
+; CHECK-GI-CVT-NEXT:    uzp1 v0.16b, v0.16b, v1.16b
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_unsigned_v16f16_v16i8:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    movi v2.2d, #0xff00ff00ff00ff
+; CHECK-GI-FP16-NEXT:    fcvtzu v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT:    fcvtzu v1.8h, v1.8h
+; CHECK-GI-FP16-NEXT:    umin v0.8h, v0.8h, v2.8h
+; CHECK-GI-FP16-NEXT:    umin v1.8h, v1.8h, v2.8h
+; CHECK-GI-FP16-NEXT:    uzp1 v0.16b, v0.16b, v1.16b
+; CHECK-GI-FP16-NEXT:    ret
     %x = call <16 x i8> @llvm.fptoui.sat.v16f16.v16i8(<16 x half> %f)
     ret <16 x i8> %x
 }
 
 define <16 x i16> @test_unsigned_v16f16_v16i16(<16 x half> %f) {
-; CHECK-CVT-LABEL: test_unsigned_v16f16_v16i16:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvtl v2.4s, v0.4h
-; CHECK-CVT-NEXT:    fcvtl v3.4s, v1.4h
-; CHECK-CVT-NEXT:    fcvtl2 v4.4s, v0.8h
-; CHECK-CVT-NEXT:    fcvtl2 v5.4s, v1.8h
-; CHECK-CVT-NEXT:    fcvtzu v2.4s, v2.4s
-; CHECK-CVT-NEXT:    fcvtzu v1.4s, v3.4s
-; CHECK-CVT-NEXT:    fcvtzu v3.4s, v5.4s
-; CHECK-CVT-NEXT:    uqxtn v0.4h, v2.4s
-; CHECK-CVT-NEXT:    fcvtzu v2.4s, v4.4s
-; CHECK-CVT-NEXT:    uqxtn v1.4h, v1.4s
-; CHECK-CVT-NEXT:    uqxtn2 v0.8h, v2.4s
-; CHECK-CVT-NEXT:    uqxtn2 v1.8h, v3.4s
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_unsigned_v16f16_v16i16:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzu v0.8h, v0.8h
-; CHECK-FP16-NEXT:    fcvtzu v1.8h, v1.8h
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_unsigned_v16f16_v16i16:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvtl v2.4s, v0.4h
+; CHECK-SD-CVT-NEXT:    fcvtl v3.4s, v1.4h
+; CHECK-SD-CVT-NEXT:    fcvtl2 v4.4s, v0.8h
+; CHECK-SD-CVT-NEXT:    fcvtl2 v5.4s, v1.8h
+; CHECK-SD-CVT-NEXT:    fcvtzu v2.4s, v2.4s
+; CHECK-SD-CVT-NEXT:    fcvtzu v1.4s, v3.4s
+; CHECK-SD-CVT-NEXT:    fcvtzu v3.4s, v5.4s
+; CHECK-SD-CVT-NEXT:    uqxtn v0.4h, v2.4s
+; CHECK-SD-CVT-NEXT:    fcvtzu v2.4s, v4.4s
+; CHECK-SD-CVT-NEXT:    uqxtn v1.4h, v1.4s
+; CHECK-SD-CVT-NEXT:    uqxtn2 v0.8h, v2.4s
+; CHECK-SD-CVT-NEXT:    uqxtn2 v1.8h, v3.4s
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_unsigned_v16f16_v16i16:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzu v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT:    fcvtzu v1.8h, v1.8h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_v16f16_v16i16:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvtl v3.4s, v0.4h
+; CHECK-GI-CVT-NEXT:    fcvtl2 v0.4s, v0.8h
+; CHECK-GI-CVT-NEXT:    fcvtl v4.4s, v1.4h
+; CHECK-GI-CVT-NEXT:    fcvtl2 v1.4s, v1.8h
+; CHECK-GI-CVT-NEXT:    movi v2.2d, #0x00ffff0000ffff
+; CHECK-GI-CVT-NEXT:    fcvtzu v3.4s, v3.4s
+; CHECK-GI-CVT-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-GI-CVT-NEXT:    fcvtzu v4.4s, v4.4s
+; CHECK-GI-CVT-NEXT:    fcvtzu v1.4s, v1.4s
+; CHECK-GI-CVT-NEXT:    umin v3.4s, v3.4s, v2.4s
+; CHECK-GI-CVT-NEXT:    umin v0.4s, v0.4s, v2.4s
+; CHECK-GI-CVT-NEXT:    umin v4.4s, v4.4s, v2.4s
+; CHECK-GI-CVT-NEXT:    umin v1.4s, v1.4s, v2.4s
+; CHECK-GI-CVT-NEXT:    uzp1 v0.8h, v3.8h, v0.8h
+; CHECK-GI-CVT-NEXT:    uzp1 v1.8h, v4.8h, v1.8h
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_unsigned_v16f16_v16i16:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzu v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT:    fcvtzu v1.8h, v1.8h
+; CHECK-GI-FP16-NEXT:    ret
     %x = call <16 x i16> @llvm.fptoui.sat.v16f16.v16i16(<16 x half> %f)
     ret <16 x i16> %x
 }
 
 define <8 x i8> @test_unsigned_v8f64_v8i8(<8 x double> %f) {
-; CHECK-LABEL: test_unsigned_v8f64_v8i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov d4, v3.d[1]
-; CHECK-NEXT:    mov d5, v2.d[1]
-; CHECK-NEXT:    mov w11, #255 // =0xff
-; CHECK-NEXT:    fcvtzu w9, d3
-; CHECK-NEXT:    mov d3, v1.d[1]
-; CHECK-NEXT:    fcvtzu w12, d2
-; CHECK-NEXT:    fcvtzu w14, d1
-; CHECK-NEXT:    fcvtzu w8, d4
-; CHECK-NEXT:    mov d4, v0.d[1]
-; CHECK-NEXT:    fcvtzu w10, d5
-; CHECK-NEXT:    fcvtzu w13, d3
-; CHECK-NEXT:    cmp w8, #255
-; CHECK-NEXT:    fcvtzu w15, d4
-; CHECK-NEXT:    csel w8, w8, w11, lo
-; CHECK-NEXT:    cmp w9, #255
-; CHECK-NEXT:    csel w9, w9, w11, lo
-; CHECK-NEXT:    cmp w10, #255
-; CHECK-NEXT:    fmov s4, w9
-; CHECK-NEXT:    csel w9, w10, w11, lo
-; CHECK-NEXT:    cmp w12, #255
-; CHECK-NEXT:    fcvtzu w10, d0
-; CHECK-NEXT:    mov v4.s[1], w8
-; CHECK-NEXT:    csel w8, w12, w11, lo
-; CHECK-NEXT:    cmp w13, #255
-; CHECK-NEXT:    fmov s3, w8
-; CHECK-NEXT:    csel w8, w13, w11, lo
-; CHECK-NEXT:    cmp w14, #255
-; CHECK-NEXT:    mov v3.s[1], w9
-; CHECK-NEXT:    csel w9, w14, w11, lo
-; CHECK-NEXT:    cmp w15, #255
-; CHECK-NEXT:    fmov s2, w9
-; CHECK-NEXT:    csel w9, w15, w11, lo
-; CHECK-NEXT:    cmp w10, #255
-; CHECK-NEXT:    mov v2.s[1], w8
-; CHECK-NEXT:    csel w8, w10, w11, lo
-; CHECK-NEXT:    fmov s1, w8
-; CHECK-NEXT:    adrp x8, .LCPI82_0
-; CHECK-NEXT:    ldr d0, [x8, :lo12:.LCPI82_0]
-; CHECK-NEXT:    mov v1.s[1], w9
-; CHECK-NEXT:    tbl v0.8b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.8b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_v8f64_v8i8:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov d4, v3.d[1]
+; CHECK-SD-NEXT:    mov d5, v2.d[1]
+; CHECK-SD-NEXT:    mov w11, #255 // =0xff
+; CHECK-SD-NEXT:    fcvtzu w9, d3
+; CHECK-SD-NEXT:    mov d3, v1.d[1]
+; CHECK-SD-NEXT:    fcvtzu w12, d2
+; CHECK-SD-NEXT:    fcvtzu w14, d1
+; CHECK-SD-NEXT:    fcvtzu w8, d4
+; CHECK-SD-NEXT:    mov d4, v0.d[1]
+; CHECK-SD-NEXT:    fcvtzu w10, d5
+; CHECK-SD-NEXT:    fcvtzu w13, d3
+; CHECK-SD-NEXT:    cmp w8, #255
+; CHECK-SD-NEXT:    fcvtzu w15, d4
+; CHECK-SD-NEXT:    csel w8, w8, w11, lo
+; CHECK-SD-NEXT:    cmp w9, #255
+; CHECK-SD-NEXT:    csel w9, w9, w11, lo
+; CHECK-SD-NEXT:    cmp w10, #255
+; CHECK-SD-NEXT:    fmov s4, w9
+; CHECK-SD-NEXT:    csel w9, w10, w11, lo
+; CHECK-SD-NEXT:    cmp w12, #255
+; CHECK-SD-NEXT:    fcvtzu w10, d0
+; CHECK-SD-NEXT:    mov v4.s[1], w8
+; CHECK-SD-NEXT:    csel w8, w12, w11, lo
+; CHECK-SD-NEXT:    cmp w13, #255
+; CHECK-SD-NEXT:    fmov s3, w8
+; CHECK-SD-NEXT:    csel w8, w13, w11, lo
+; CHECK-SD-NEXT:    cmp w14, #255
+; CHECK-SD-NEXT:    mov v3.s[1], w9
+; CHECK-SD-NEXT:    csel w9, w14, w11, lo
+; CHECK-SD-NEXT:    cmp w15, #255
+; CHECK-SD-NEXT:    fmov s2, w9
+; CHECK-SD-NEXT:    csel w9, w15, w11, lo
+; CHECK-SD-NEXT:    cmp w10, #255
+; CHECK-SD-NEXT:    mov v2.s[1], w8
+; CHECK-SD-NEXT:    csel w8, w10, w11, lo
+; CHECK-SD-NEXT:    fmov s1, w8
+; CHECK-SD-NEXT:    adrp x8, .LCPI82_0
+; CHECK-SD-NEXT:    ldr d0, [x8, :lo12:.LCPI82_0]
+; CHECK-SD-NEXT:    mov v1.s[1], w9
+; CHECK-SD-NEXT:    tbl v0.8b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.8b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_v8f64_v8i8:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v4.2d, #0x000000000000ff
+; CHECK-GI-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-GI-NEXT:    fcvtzu v1.2d, v1.2d
+; CHECK-GI-NEXT:    fcvtzu v2.2d, v2.2d
+; CHECK-GI-NEXT:    fcvtzu v3.2d, v3.2d
+; CHECK-GI-NEXT:    cmhi v5.2d, v4.2d, v0.2d
+; CHECK-GI-NEXT:    cmhi v6.2d, v4.2d, v1.2d
+; CHECK-GI-NEXT:    cmhi v7.2d, v4.2d, v2.2d
+; CHECK-GI-NEXT:    cmhi v16.2d, v4.2d, v3.2d
+; CHECK-GI-NEXT:    bif v0.16b, v4.16b, v5.16b
+; CHECK-GI-NEXT:    bif v1.16b, v4.16b, v6.16b
+; CHECK-GI-NEXT:    bif v2.16b, v4.16b, v7.16b
+; CHECK-GI-NEXT:    bif v3.16b, v4.16b, v16.16b
+; CHECK-GI-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    uzp1 v1.4s, v2.4s, v3.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    xtn v0.8b, v0.8h
+; CHECK-GI-NEXT:    ret
     %x = call <8 x i8> @llvm.fptoui.sat.v8f64.v8i8(<8 x double> %f)
     ret <8 x i8> %x
 }
 
 define <16 x i8> @test_unsigned_v16f64_v16i8(<16 x double> %f) {
-; CHECK-LABEL: test_unsigned_v16f64_v16i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov d16, v0.d[1]
-; CHECK-NEXT:    fcvtzu w10, d0
-; CHECK-NEXT:    mov w8, #255 // =0xff
-; CHECK-NEXT:    fcvtzu w9, d16
-; CHECK-NEXT:    mov d16, v1.d[1]
-; CHECK-NEXT:    cmp w9, #255
-; CHECK-NEXT:    csel w9, w9, w8, lo
-; CHECK-NEXT:    cmp w10, #255
-; CHECK-NEXT:    csel w10, w10, w8, lo
-; CHECK-NEXT:    fmov s0, w10
-; CHECK-NEXT:    fcvtzu w10, d16
-; CHECK-NEXT:    mov d16, v2.d[1]
-; CHECK-NEXT:    mov v0.s[1], w9
-; CHECK-NEXT:    fcvtzu w9, d1
-; CHECK-NEXT:    cmp w10, #255
-; CHECK-NEXT:    csel w10, w10, w8, lo
-; CHECK-NEXT:    cmp w9, #255
-; CHECK-NEXT:    mov w11, v0.s[1]
-; CHECK-NEXT:    csel w9, w9, w8, lo
-; CHECK-NEXT:    fmov s1, w9
-; CHECK-NEXT:    fcvtzu w9, d16
-; CHECK-NEXT:    mov d16, v3.d[1]
-; CHECK-NEXT:    mov v0.b[1], w11
-; CHECK-NEXT:    mov v1.s[1], w10
-; CHECK-NEXT:    fcvtzu w10, d2
-; CHECK-NEXT:    cmp w9, #255
-; CHECK-NEXT:    csel w9, w9, w8, lo
-; CHECK-NEXT:    cmp w10, #255
-; CHECK-NEXT:    mov w11, v1.s[1]
-; CHECK-NEXT:    mov v0.b[2], v1.b[0]
-; CHECK-NEXT:    csel w10, w10, w8, lo
-; CHECK-NEXT:    fmov s2, w10
-; CHECK-NEXT:    fcvtzu w10, d16
-; CHECK-NEXT:    mov d16, v4.d[1]
-; CHECK-NEXT:    mov v0.b[3], w11
-; CHECK-NEXT:    mov v2.s[1], w9
-; CHECK-NEXT:    fcvtzu w9, d3
-; CHECK-NEXT:    cmp w10, #255
-; CHECK-NEXT:    csel w10, w10, w8, lo
-; CHECK-NEXT:    cmp w9, #255
-; CHECK-NEXT:    mov w11, v2.s[1]
-; CHECK-NEXT:    mov v0.b[4], v2.b[0]
-; CHECK-NEXT:    csel w9, w9, w8, lo
-; CHECK-NEXT:    fmov s3, w9
-; CHECK-NEXT:    fcvtzu w9, d16
-; CHECK-NEXT:    mov d16, v5.d[1]
-; CHECK-NEXT:    mov v0.b[5], w11
-; CHECK-NEXT:    mov v3.s[1], w10
-; CHECK-NEXT:    fcvtzu w10, d4
-; CHECK-NEXT:    cmp w9, #255
-; CHECK-NEXT:    csel w9, w9, w8, lo
-; CHECK-NEXT:    cmp w10, #255
-; CHECK-NEXT:    mov w11, v3.s[1]
-; CHECK-NEXT:    mov v0.b[6], v3.b[0]
-; CHECK-NEXT:    csel w10, w10, w8, lo
-; CHECK-NEXT:    fmov s4, w10
-; CHECK-NEXT:    fcvtzu w10, d16
-; CHECK-NEXT:    mov v0.b[7], w11
-; CHECK-NEXT:    mov v4.s[1], w9
-; CHECK-NEXT:    fcvtzu w9, d5
-; CHECK-NEXT:    mov d5, v6.d[1]
-; CHECK-NEXT:    cmp w10, #255
-; CHECK-NEXT:    csel w10, w10, w8, lo
-; CHECK-NEXT:    cmp w9, #255
-; CHECK-NEXT:    mov w11, v4.s[1]
-; CHECK-NEXT:    mov v0.b[8], v4.b[0]
-; CHECK-NEXT:    csel w9, w9, w8, lo
-; CHECK-NEXT:    fmov s16, w9
-; CHECK-NEXT:    fcvtzu w9, d5
-; CHECK-NEXT:    mov d5, v7.d[1]
-; CHECK-NEXT:    mov v0.b[9], w11
-; CHECK-NEXT:    mov v16.s[1], w10
-; CHECK-NEXT:    fcvtzu w10, d6
-; CHECK-NEXT:    cmp w9, #255
-; CHECK-NEXT:    csel w9, w9, w8, lo
-; CHECK-NEXT:    cmp w10, #255
-; CHECK-NEXT:    mov v0.b[10], v16.b[0]
-; CHECK-NEXT:    mov w11, v16.s[1]
-; CHECK-NEXT:    csel w10, w10, w8, lo
-; CHECK-NEXT:    fmov s6, w10
-; CHECK-NEXT:    fcvtzu w10, d7
-; CHECK-NEXT:    mov v0.b[11], w11
-; CHECK-NEXT:    mov v6.s[1], w9
-; CHECK-NEXT:    fcvtzu w9, d5
-; CHECK-NEXT:    cmp w9, #255
-; CHECK-NEXT:    mov v0.b[12], v6.b[0]
-; CHECK-NEXT:    mov w11, v6.s[1]
-; CHECK-NEXT:    csel w9, w9, w8, lo
-; CHECK-NEXT:    cmp w10, #255
-; CHECK-NEXT:    csel w8, w10, w8, lo
-; CHECK-NEXT:    fmov s5, w8
-; CHECK-NEXT:    mov v0.b[13], w11
-; CHECK-NEXT:    mov v5.s[1], w9
-; CHECK-NEXT:    mov v0.b[14], v5.b[0]
-; CHECK-NEXT:    mov w8, v5.s[1]
-; CHECK-NEXT:    mov v0.b[15], w8
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_v16f64_v16i8:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov d16, v0.d[1]
+; CHECK-SD-NEXT:    fcvtzu w10, d0
+; CHECK-SD-NEXT:    mov w8, #255 // =0xff
+; CHECK-SD-NEXT:    fcvtzu w9, d16
+; CHECK-SD-NEXT:    mov d16, v1.d[1]
+; CHECK-SD-NEXT:    cmp w9, #255
+; CHECK-SD-NEXT:    csel w9, w9, w8, lo
+; CHECK-SD-NEXT:    cmp w10, #255
+; CHECK-SD-NEXT:    csel w10, w10, w8, lo
+; CHECK-SD-NEXT:    fmov s0, w10
+; CHECK-SD-NEXT:    fcvtzu w10, d16
+; CHECK-SD-NEXT:    mov d16, v2.d[1]
+; CHECK-SD-NEXT:    mov v0.s[1], w9
+; CHECK-SD-NEXT:    fcvtzu w9, d1
+; CHECK-SD-NEXT:    cmp w10, #255
+; CHECK-SD-NEXT:    csel w10, w10, w8, lo
+; CHECK-SD-NEXT:    cmp w9, #255
+; CHECK-SD-NEXT:    mov w11, v0.s[1]
+; CHECK-SD-NEXT:    csel w9, w9, w8, lo
+; CHECK-SD-NEXT:    fmov s1, w9
+; CHECK-SD-NEXT:    fcvtzu w9, d16
+; CHECK-SD-NEXT:    mov d16, v3.d[1]
+; CHECK-SD-NEXT:    mov v0.b[1], w11
+; CHECK-SD-NEXT:    mov v1.s[1], w10
+; CHECK-SD-NEXT:    fcvtzu w10, d2
+; CHECK-SD-NEXT:    cmp w9, #255
+; CHECK-SD-NEXT:    csel w9, w9, w8, lo
+; CHECK-SD-NEXT:    cmp w10, #255
+; CHECK-SD-NEXT:    mov w11, v1.s[1]
+; CHECK-SD-NEXT:    mov v0.b[2], v1.b[0]
+; CHECK-SD-NEXT:    csel w10, w10, w8, lo
+; CHECK-SD-NEXT:    fmov s2, w10
+; CHECK-SD-NEXT:    fcvtzu w10, d16
+; CHECK-SD-NEXT:    mov d16, v4.d[1]
+; CHECK-SD-NEXT:    mov v0.b[3], w11
+; CHECK-SD-NEXT:    mov v2.s[1], w9
+; CHECK-SD-NEXT:    fcvtzu w9, d3
+; CHECK-SD-NEXT:    cmp w10, #255
+; CHECK-SD-NEXT:    csel w10, w10, w8, lo
+; CHECK-SD-NEXT:    cmp w9, #255
+; CHECK-SD-NEXT:    mov w11, v2.s[1]
+; CHECK-SD-NEXT:    mov v0.b[4], v2.b[0]
+; CHECK-SD-NEXT:    csel w9, w9, w8, lo
+; CHECK-SD-NEXT:    fmov s3, w9
+; CHECK-SD-NEXT:    fcvtzu w9, d16
+; CHECK-SD-NEXT:    mov d16, v5.d[1]
+; CHECK-SD-NEXT:    mov v0.b[5], w11
+; CHECK-SD-NEXT:    mov v3.s[1], w10
+; CHECK-SD-NEXT:    fcvtzu w10, d4
+; CHECK-SD-NEXT:    cmp w9, #255
+; CHECK-SD-NEXT:    csel w9, w9, w8, lo
+; CHECK-SD-NEXT:    cmp w10, #255
+; CHECK-SD-NEXT:    mov w11, v3.s[1]
+; CHECK-SD-NEXT:    mov v0.b[6], v3.b[0]
+; CHECK-SD-NEXT:    csel w10, w10, w8, lo
+; CHECK-SD-NEXT:    fmov s4, w10
+; CHECK-SD-NEXT:    fcvtzu w10, d16
+; CHECK-SD-NEXT:    mov v0.b[7], w11
+; CHECK-SD-NEXT:    mov v4.s[1], w9
+; CHECK-SD-NEXT:    fcvtzu w9, d5
+; CHECK-SD-NEXT:    mov d5, v6.d[1]
+; CHECK-SD-NEXT:    cmp w10, #255
+; CHECK-SD-NEXT:    csel w10, w10, w8, lo
+; CHECK-SD-NEXT:    cmp w9, #255
+; CHECK-SD-NEXT:    mov w11, v4.s[1]
+; CHECK-SD-NEXT:    mov v0.b[8], v4.b[0]
+; CHECK-SD-NEXT:    csel w9, w9, w8, lo
+; CHECK-SD-NEXT:    fmov s16, w9
+; CHECK-SD-NEXT:    fcvtzu w9, d5
+; CHECK-SD-NEXT:    mov d5, v7.d[1]
+; CHECK-SD-NEXT:    mov v0.b[9], w11
+; CHECK-SD-NEXT:    mov v16.s[1], w10
+; CHECK-SD-NEXT:    fcvtzu w10, d6
+; CHECK-SD-NEXT:    cmp w9, #255
+; CHECK-SD-NEXT:    csel w9, w9, w8, lo
+; CHECK-SD-NEXT:    cmp w10, #255
+; CHECK-SD-NEXT:    mov v0.b[10], v16.b[0]
+; CHECK-SD-NEXT:    mov w11, v16.s[1]
+; CHECK-SD-NEXT:    csel w10, w10, w8, lo
+; CHECK-SD-NEXT:    fmov s6, w10
+; CHECK-SD-NEXT:    fcvtzu w10, d7
+; CHECK-SD-NEXT:    mov v0.b[11], w11
+; CHECK-SD-NEXT:    mov v6.s[1], w9
+; CHECK-SD-NEXT:    fcvtzu w9, d5
+; CHECK-SD-NEXT:    cmp w9, #255
+; CHECK-SD-NEXT:    mov v0.b[12], v6.b[0]
+; CHECK-SD-NEXT:    mov w11, v6.s[1]
+; CHECK-SD-NEXT:    csel w9, w9, w8, lo
+; CHECK-SD-NEXT:    cmp w10, #255
+; CHECK-SD-NEXT:    csel w8, w10, w8, lo
+; CHECK-SD-NEXT:    fmov s5, w8
+; CHECK-SD-NEXT:    mov v0.b[13], w11
+; CHECK-SD-NEXT:    mov v5.s[1], w9
+; CHECK-SD-NEXT:    mov v0.b[14], v5.b[0]
+; CHECK-SD-NEXT:    mov w8, v5.s[1]
+; CHECK-SD-NEXT:    mov v0.b[15], w8
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_v16f64_v16i8:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-GI-NEXT:    fcvtzu v1.2d, v1.2d
+; CHECK-GI-NEXT:    movi v16.2d, #0x000000000000ff
+; CHECK-GI-NEXT:    fcvtzu v2.2d, v2.2d
+; CHECK-GI-NEXT:    fcvtzu v3.2d, v3.2d
+; CHECK-GI-NEXT:    fcvtzu v4.2d, v4.2d
+; CHECK-GI-NEXT:    fcvtzu v5.2d, v5.2d
+; CHECK-GI-NEXT:    fcvtzu v6.2d, v6.2d
+; CHECK-GI-NEXT:    fcvtzu v7.2d, v7.2d
+; CHECK-GI-NEXT:    cmhi v17.2d, v16.2d, v0.2d
+; CHECK-GI-NEXT:    cmhi v18.2d, v16.2d, v1.2d
+; CHECK-GI-NEXT:    cmhi v19.2d, v16.2d, v2.2d
+; CHECK-GI-NEXT:    cmhi v20.2d, v16.2d, v3.2d
+; CHECK-GI-NEXT:    cmhi v21.2d, v16.2d, v4.2d
+; CHECK-GI-NEXT:    cmhi v22.2d, v16.2d, v5.2d
+; CHECK-GI-NEXT:    cmhi v23.2d, v16.2d, v6.2d
+; CHECK-GI-NEXT:    cmhi v24.2d, v16.2d, v7.2d
+; CHECK-GI-NEXT:    bif v0.16b, v16.16b, v17.16b
+; CHECK-GI-NEXT:    bif v1.16b, v16.16b, v18.16b
+; CHECK-GI-NEXT:    bif v2.16b, v16.16b, v19.16b
+; CHECK-GI-NEXT:    bif v3.16b, v16.16b, v20.16b
+; CHECK-GI-NEXT:    bif v4.16b, v16.16b, v21.16b
+; CHECK-GI-NEXT:    bif v5.16b, v16.16b, v22.16b
+; CHECK-GI-NEXT:    bif v6.16b, v16.16b, v23.16b
+; CHECK-GI-NEXT:    bif v7.16b, v16.16b, v24.16b
+; CHECK-GI-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    uzp1 v1.4s, v2.4s, v3.4s
+; CHECK-GI-NEXT:    uzp1 v2.4s, v4.4s, v5.4s
+; CHECK-GI-NEXT:    uzp1 v3.4s, v6.4s, v7.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    uzp1 v1.8h, v2.8h, v3.8h
+; CHECK-GI-NEXT:    uzp1 v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT:    ret
     %x = call <16 x i8> @llvm.fptoui.sat.v16f64.v16i8(<16 x double> %f)
     ret <16 x i8> %x
 }
 
 define <8 x i16> @test_unsigned_v8f64_v8i16(<8 x double> %f) {
-; CHECK-LABEL: test_unsigned_v8f64_v8i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov d4, v3.d[1]
-; CHECK-NEXT:    mov d5, v2.d[1]
-; CHECK-NEXT:    mov w10, #65535 // =0xffff
-; CHECK-NEXT:    fcvtzu w9, d3
-; CHECK-NEXT:    mov d3, v1.d[1]
-; CHECK-NEXT:    fcvtzu w12, d2
-; CHECK-NEXT:    fcvtzu w14, d1
-; CHECK-NEXT:    fcvtzu w8, d4
-; CHECK-NEXT:    mov d4, v0.d[1]
-; CHECK-NEXT:    fcvtzu w11, d5
-; CHECK-NEXT:    fcvtzu w13, d3
-; CHECK-NEXT:    cmp w8, w10
-; CHECK-NEXT:    fcvtzu w15, d4
-; CHECK-NEXT:    csel w8, w8, w10, lo
-; CHECK-NEXT:    cmp w9, w10
-; CHECK-NEXT:    csel w9, w9, w10, lo
-; CHECK-NEXT:    cmp w11, w10
-; CHECK-NEXT:    fmov s4, w9
-; CHECK-NEXT:    csel w9, w11, w10, lo
-; CHECK-NEXT:    cmp w12, w10
-; CHECK-NEXT:    fcvtzu w11, d0
-; CHECK-NEXT:    mov v4.s[1], w8
-; CHECK-NEXT:    csel w8, w12, w10, lo
-; CHECK-NEXT:    cmp w13, w10
-; CHECK-NEXT:    fmov s3, w8
-; CHECK-NEXT:    csel w8, w13, w10, lo
-; CHECK-NEXT:    cmp w14, w10
-; CHECK-NEXT:    mov v3.s[1], w9
-; CHECK-NEXT:    csel w9, w14, w10, lo
-; CHECK-NEXT:    cmp w15, w10
-; CHECK-NEXT:    fmov s2, w9
-; CHECK-NEXT:    csel w9, w15, w10, lo
-; CHECK-NEXT:    cmp w11, w10
-; CHECK-NEXT:    mov v2.s[1], w8
-; CHECK-NEXT:    csel w8, w11, w10, lo
-; CHECK-NEXT:    fmov s1, w8
-; CHECK-NEXT:    adrp x8, .LCPI84_0
-; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI84_0]
-; CHECK-NEXT:    mov v1.s[1], w9
-; CHECK-NEXT:    tbl v0.16b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.16b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_v8f64_v8i16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov d4, v3.d[1]
+; CHECK-SD-NEXT:    mov d5, v2.d[1]
+; CHECK-SD-NEXT:    mov w10, #65535 // =0xffff
+; CHECK-SD-NEXT:    fcvtzu w9, d3
+; CHECK-SD-NEXT:    mov d3, v1.d[1]
+; CHECK-SD-NEXT:    fcvtzu w12, d2
+; CHECK-SD-NEXT:    fcvtzu w14, d1
+; CHECK-SD-NEXT:    fcvtzu w8, d4
+; CHECK-SD-NEXT:    mov d4, v0.d[1]
+; CHECK-SD-NEXT:    fcvtzu w11, d5
+; CHECK-SD-NEXT:    fcvtzu w13, d3
+; CHECK-SD-NEXT:    cmp w8, w10
+; CHECK-SD-NEXT:    fcvtzu w15, d4
+; CHECK-SD-NEXT:    csel w8, w8, w10, lo
+; CHECK-SD-NEXT:    cmp w9, w10
+; CHECK-SD-NEXT:    csel w9, w9, w10, lo
+; CHECK-SD-NEXT:    cmp w11, w10
+; CHECK-SD-NEXT:    fmov s4, w9
+; CHECK-SD-NEXT:    csel w9, w11, w10, lo
+; CHECK-SD-NEXT:    cmp w12, w10
+; CHECK-SD-NEXT:    fcvtzu w11, d0
+; CHECK-SD-NEXT:    mov v4.s[1], w8
+; CHECK-SD-NEXT:    csel w8, w12, w10, lo
+; CHECK-SD-NEXT:    cmp w13, w10
+; CHECK-SD-NEXT:    fmov s3, w8
+; CHECK-SD-NEXT:    csel w8, w13, w10, lo
+; CHECK-SD-NEXT:    cmp w14, w10
+; CHECK-SD-NEXT:    mov v3.s[1], w9
+; CHECK-SD-NEXT:    csel w9, w14, w10, lo
+; CHECK-SD-NEXT:    cmp w15, w10
+; CHECK-SD-NEXT:    fmov s2, w9
+; CHECK-SD-NEXT:    csel w9, w15, w10, lo
+; CHECK-SD-NEXT:    cmp w11, w10
+; CHECK-SD-NEXT:    mov v2.s[1], w8
+; CHECK-SD-NEXT:    csel w8, w11, w10, lo
+; CHECK-SD-NEXT:    fmov s1, w8
+; CHECK-SD-NEXT:    adrp x8, .LCPI84_0
+; CHECK-SD-NEXT:    ldr q0, [x8, :lo12:.LCPI84_0]
+; CHECK-SD-NEXT:    mov v1.s[1], w9
+; CHECK-SD-NEXT:    tbl v0.16b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_v8f64_v8i16:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v4.2d, #0x0000000000ffff
+; CHECK-GI-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-GI-NEXT:    fcvtzu v1.2d, v1.2d
+; CHECK-GI-NEXT:    fcvtzu v2.2d, v2.2d
+; CHECK-GI-NEXT:    fcvtzu v3.2d, v3.2d
+; CHECK-GI-NEXT:    cmhi v5.2d, v4.2d, v0.2d
+; CHECK-GI-NEXT:    cmhi v6.2d, v4.2d, v1.2d
+; CHECK-GI-NEXT:    cmhi v7.2d, v4.2d, v2.2d
+; CHECK-GI-NEXT:    cmhi v16.2d, v4.2d, v3.2d
+; CHECK-GI-NEXT:    bif v0.16b, v4.16b, v5.16b
+; CHECK-GI-NEXT:    bif v1.16b, v4.16b, v6.16b
+; CHECK-GI-NEXT:    bif v2.16b, v4.16b, v7.16b
+; CHECK-GI-NEXT:    bif v3.16b, v4.16b, v16.16b
+; CHECK-GI-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    uzp1 v1.4s, v2.4s, v3.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    ret
     %x = call <8 x i16> @llvm.fptoui.sat.v8f64.v8i16(<8 x double> %f)
     ret <8 x i16> %x
 }
 
 define <16 x i16> @test_unsigned_v16f64_v16i16(<16 x double> %f) {
-; CHECK-LABEL: test_unsigned_v16f64_v16i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov d16, v3.d[1]
-; CHECK-NEXT:    mov d17, v2.d[1]
-; CHECK-NEXT:    mov w8, #65535 // =0xffff
-; CHECK-NEXT:    fcvtzu w9, d3
-; CHECK-NEXT:    mov d3, v1.d[1]
-; CHECK-NEXT:    fcvtzu w10, d1
-; CHECK-NEXT:    mov d1, v0.d[1]
-; CHECK-NEXT:    fcvtzu w11, d2
-; CHECK-NEXT:    fcvtzu w12, d0
-; CHECK-NEXT:    mov d0, v7.d[1]
-; CHECK-NEXT:    mov d2, v6.d[1]
-; CHECK-NEXT:    fcvtzu w14, d7
-; CHECK-NEXT:    fcvtzu w13, d16
-; CHECK-NEXT:    fcvtzu w16, d17
-; CHECK-NEXT:    fcvtzu w15, d6
-; CHECK-NEXT:    fcvtzu w17, d3
-; CHECK-NEXT:    mov d6, v5.d[1]
-; CHECK-NEXT:    mov d3, v4.d[1]
-; CHECK-NEXT:    fcvtzu w18, d1
-; CHECK-NEXT:    cmp w13, w8
-; CHECK-NEXT:    csel w13, w13, w8, lo
-; CHECK-NEXT:    cmp w9, w8
-; CHECK-NEXT:    csel w9, w9, w8, lo
-; CHECK-NEXT:    cmp w16, w8
-; CHECK-NEXT:    fmov s19, w9
-; CHECK-NEXT:    csel w9, w16, w8, lo
-; CHECK-NEXT:    cmp w11, w8
-; CHECK-NEXT:    fcvtzu w16, d0
-; CHECK-NEXT:    csel w11, w11, w8, lo
-; CHECK-NEXT:    cmp w17, w8
-; CHECK-NEXT:    mov v19.s[1], w13
-; CHECK-NEXT:    csel w13, w17, w8, lo
-; CHECK-NEXT:    cmp w10, w8
-; CHECK-NEXT:    csel w10, w10, w8, lo
-; CHECK-NEXT:    cmp w18, w8
-; CHECK-NEXT:    fmov s18, w11
-; CHECK-NEXT:    csel w11, w18, w8, lo
-; CHECK-NEXT:    cmp w12, w8
-; CHECK-NEXT:    fcvtzu w17, d2
-; CHECK-NEXT:    csel w12, w12, w8, lo
-; CHECK-NEXT:    cmp w16, w8
-; CHECK-NEXT:    fcvtzu w18, d6
-; CHECK-NEXT:    mov v18.s[1], w9
-; CHECK-NEXT:    csel w9, w16, w8, lo
-; CHECK-NEXT:    cmp w14, w8
-; CHECK-NEXT:    fmov s17, w10
-; CHECK-NEXT:    csel w10, w14, w8, lo
-; CHECK-NEXT:    fcvtzu w16, d5
-; CHECK-NEXT:    fmov s23, w10
-; CHECK-NEXT:    cmp w17, w8
-; CHECK-NEXT:    fcvtzu w14, d3
-; CHECK-NEXT:    csel w10, w17, w8, lo
-; CHECK-NEXT:    cmp w15, w8
-; CHECK-NEXT:    fcvtzu w17, d4
-; CHECK-NEXT:    mov v17.s[1], w13
-; CHECK-NEXT:    mov v23.s[1], w9
-; CHECK-NEXT:    csel w9, w15, w8, lo
-; CHECK-NEXT:    cmp w18, w8
-; CHECK-NEXT:    fmov s22, w9
-; CHECK-NEXT:    csel w9, w18, w8, lo
-; CHECK-NEXT:    cmp w16, w8
-; CHECK-NEXT:    fmov s16, w12
-; CHECK-NEXT:    mov v22.s[1], w10
-; CHECK-NEXT:    csel w10, w16, w8, lo
-; CHECK-NEXT:    cmp w14, w8
-; CHECK-NEXT:    fmov s21, w10
-; CHECK-NEXT:    csel w10, w14, w8, lo
-; CHECK-NEXT:    cmp w17, w8
-; CHECK-NEXT:    csel w8, w17, w8, lo
-; CHECK-NEXT:    mov v16.s[1], w11
-; CHECK-NEXT:    mov v21.s[1], w9
-; CHECK-NEXT:    fmov s20, w8
-; CHECK-NEXT:    adrp x8, .LCPI85_0
-; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI85_0]
-; CHECK-NEXT:    mov v20.s[1], w10
-; CHECK-NEXT:    tbl v0.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b
-; CHECK-NEXT:    tbl v1.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v1.16b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_v16f64_v16i16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov d16, v3.d[1]
+; CHECK-SD-NEXT:    mov d17, v2.d[1]
+; CHECK-SD-NEXT:    mov w8, #65535 // =0xffff
+; CHECK-SD-NEXT:    fcvtzu w9, d3
+; CHECK-SD-NEXT:    mov d3, v1.d[1]
+; CHECK-SD-NEXT:    fcvtzu w10, d1
+; CHECK-SD-NEXT:    mov d1, v0.d[1]
+; CHECK-SD-NEXT:    fcvtzu w11, d2
+; CHECK-SD-NEXT:    fcvtzu w12, d0
+; CHECK-SD-NEXT:    mov d0, v7.d[1]
+; CHECK-SD-NEXT:    mov d2, v6.d[1]
+; CHECK-SD-NEXT:    fcvtzu w14, d7
+; CHECK-SD-NEXT:    fcvtzu w13, d16
+; CHECK-SD-NEXT:    fcvtzu w16, d17
+; CHECK-SD-NEXT:    fcvtzu w15, d6
+; CHECK-SD-NEXT:    fcvtzu w17, d3
+; CHECK-SD-NEXT:    mov d6, v5.d[1]
+; CHECK-SD-NEXT:    mov d3, v4.d[1]
+; CHECK-SD-NEXT:    fcvtzu w18, d1
+; CHECK-SD-NEXT:    cmp w13, w8
+; CHECK-SD-NEXT:    csel w13, w13, w8, lo
+; CHECK-SD-NEXT:    cmp w9, w8
+; CHECK-SD-NEXT:    csel w9, w9, w8, lo
+; CHECK-SD-NEXT:    cmp w16, w8
+; CHECK-SD-NEXT:    fmov s19, w9
+; CHECK-SD-NEXT:    csel w9, w16, w8, lo
+; CHECK-SD-NEXT:    cmp w11, w8
+; CHECK-SD-NEXT:    fcvtzu w16, d0
+; CHECK-SD-NEXT:    csel w11, w11, w8, lo
+; CHECK-SD-NEXT:    cmp w17, w8
+; CHECK-SD-NEXT:    mov v19.s[1], w13
+; CHECK-SD-NEXT:    csel w13, w17, w8, lo
+; CHECK-SD-NEXT:    cmp w10, w8
+; CHECK-SD-NEXT:    csel w10, w10, w8, lo
+; CHECK-SD-NEXT:    cmp w18, w8
+; CHECK-SD-NEXT:    fmov s18, w11
+; CHECK-SD-NEXT:    csel w11, w18, w8, lo
+; CHECK-SD-NEXT:    cmp w12, w8
+; CHECK-SD-NEXT:    fcvtzu w17, d2
+; CHECK-SD-NEXT:    csel w12, w12, w8, lo
+; CHECK-SD-NEXT:    cmp w16, w8
+; CHECK-SD-NEXT:    fcvtzu w18, d6
+; CHECK-SD-NEXT:    mov v18.s[1], w9
+; CHECK-SD-NEXT:    csel w9, w16, w8, lo
+; CHECK-SD-NEXT:    cmp w14, w8
+; CHECK-SD-NEXT:    fmov s17, w10
+; CHECK-SD-NEXT:    csel w10, w14, w8, lo
+; CHECK-SD-NEXT:    fcvtzu w16, d5
+; CHECK-SD-NEXT:    fmov s23, w10
+; CHECK-SD-NEXT:    cmp w17, w8
+; CHECK-SD-NEXT:    fcvtzu w14, d3
+; CHECK-SD-NEXT:    csel w10, w17, w8, lo
+; CHECK-SD-NEXT:    cmp w15, w8
+; CHECK-SD-NEXT:    fcvtzu w17, d4
+; CHECK-SD-NEXT:    mov v17.s[1], w13
+; CHECK-SD-NEXT:    mov v23.s[1], w9
+; CHECK-SD-NEXT:    csel w9, w15, w8, lo
+; CHECK-SD-NEXT:    cmp w18, w8
+; CHECK-SD-NEXT:    fmov s22, w9
+; CHECK-SD-NEXT:    csel w9, w18, w8, lo
+; CHECK-SD-NEXT:    cmp w16, w8
+; CHECK-SD-NEXT:    fmov s16, w12
+; CHECK-SD-NEXT:    mov v22.s[1], w10
+; CHECK-SD-NEXT:    csel w10, w16, w8, lo
+; CHECK-SD-NEXT:    cmp w14, w8
+; CHECK-SD-NEXT:    fmov s21, w10
+; CHECK-SD-NEXT:    csel w10, w14, w8, lo
+; CHECK-SD-NEXT:    cmp w17, w8
+; CHECK-SD-NEXT:    csel w8, w17, w8, lo
+; CHECK-SD-NEXT:    mov v16.s[1], w11
+; CHECK-SD-NEXT:    mov v21.s[1], w9
+; CHECK-SD-NEXT:    fmov s20, w8
+; CHECK-SD-NEXT:    adrp x8, .LCPI85_0
+; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI85_0]
+; CHECK-SD-NEXT:    mov v20.s[1], w10
+; CHECK-SD-NEXT:    tbl v0.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b
+; CHECK-SD-NEXT:    tbl v1.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v1.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_v16f64_v16i16:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-GI-NEXT:    fcvtzu v1.2d, v1.2d
+; CHECK-GI-NEXT:    movi v16.2d, #0x0000000000ffff
+; CHECK-GI-NEXT:    fcvtzu v2.2d, v2.2d
+; CHECK-GI-NEXT:    fcvtzu v3.2d, v3.2d
+; CHECK-GI-NEXT:    fcvtzu v4.2d, v4.2d
+; CHECK-GI-NEXT:    fcvtzu v5.2d, v5.2d
+; CHECK-GI-NEXT:    fcvtzu v6.2d, v6.2d
+; CHECK-GI-NEXT:    fcvtzu v7.2d, v7.2d
+; CHECK-GI-NEXT:    cmhi v17.2d, v16.2d, v0.2d
+; CHECK-GI-NEXT:    cmhi v18.2d, v16.2d, v1.2d
+; CHECK-GI-NEXT:    cmhi v19.2d, v16.2d, v2.2d
+; CHECK-GI-NEXT:    cmhi v20.2d, v16.2d, v3.2d
+; CHECK-GI-NEXT:    cmhi v21.2d, v16.2d, v4.2d
+; CHECK-GI-NEXT:    cmhi v22.2d, v16.2d, v5.2d
+; CHECK-GI-NEXT:    cmhi v23.2d, v16.2d, v6.2d
+; CHECK-GI-NEXT:    cmhi v24.2d, v16.2d, v7.2d
+; CHECK-GI-NEXT:    bif v0.16b, v16.16b, v17.16b
+; CHECK-GI-NEXT:    bif v1.16b, v16.16b, v18.16b
+; CHECK-GI-NEXT:    bif v2.16b, v16.16b, v19.16b
+; CHECK-GI-NEXT:    bif v3.16b, v16.16b, v20.16b
+; CHECK-GI-NEXT:    bif v4.16b, v16.16b, v21.16b
+; CHECK-GI-NEXT:    bif v5.16b, v16.16b, v22.16b
+; CHECK-GI-NEXT:    bif v6.16b, v16.16b, v23.16b
+; CHECK-GI-NEXT:    bif v7.16b, v16.16b, v24.16b
+; CHECK-GI-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    uzp1 v1.4s, v2.4s, v3.4s
+; CHECK-GI-NEXT:    uzp1 v2.4s, v4.4s, v5.4s
+; CHECK-GI-NEXT:    uzp1 v3.4s, v6.4s, v7.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    uzp1 v1.8h, v2.8h, v3.8h
+; CHECK-GI-NEXT:    ret
     %x = call <16 x i16> @llvm.fptoui.sat.v16f64.v16i16(<16 x double> %f)
     ret <16 x i16> %x
 }
diff --git a/llvm/test/TableGen/GlobalISelEmitter.td b/llvm/test/TableGen/GlobalISelEmitter.td
index 853831366fa531..b9aea33ac96aaa 100644
--- a/llvm/test/TableGen/GlobalISelEmitter.td
+++ b/llvm/test/TableGen/GlobalISelEmitter.td
@@ -513,7 +513,7 @@ def : Pat<(frag GPR32:$src1, complex:$src2, complex:$src3),
 // R00O-NEXT:  GIM_Reject,
 // R00O:       // Label [[DEFAULT_NUM]]: @[[DEFAULT]]
 // R00O-NEXT:  GIM_Reject,
-// R00O-NEXT:  }; // Size: 1816 bytes
+// R00O-NEXT:  }; // Size: 1824 bytes
 
 def INSNBOB : I<(outs GPR32:$dst), (ins GPR32:$src1, GPR32:$src2, GPR32:$src3, GPR32:$src4),
                  [(set GPR32:$dst,



More information about the llvm-commits mailing list