[llvm] [GlobalISel][AArch64] Add G_FPTOSI_SAT/G_FPTOUI_SAT (PR #96297)

David Green via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 6 09:11:18 PDT 2024


https://github.com/davemgreen updated https://github.com/llvm/llvm-project/pull/96297

>From 9a2711e5b99db14ba06d22f32b069b9d3c050058 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Fri, 21 Jun 2024 12:00:35 +0100
Subject: [PATCH 1/3] [GlobalISel][AArch6] Add G_FPTOSI_SAT/G_FPTOUI_SAT

This is an implementation of the saturating fp to int conversions for
GlobalISel. On AArch64 the converstion instrctions work this way, producing
saturating results.  LegalizerHelper::lowerFPTOINT_SAT is ported from SDAG.

AArch64 has a lot of existing tests for fptosi_sat, covering a wide range of
types. I have tried to make most of them work all at once, but a few fall back
due to other missing features such as f128 handling for min/max.
---
 llvm/docs/GlobalISel/GenericOpcode.rst        |   5 +
 .../CodeGen/GlobalISel/GenericMachineInstrs.h |   2 +
 .../llvm/CodeGen/GlobalISel/LegalizerHelper.h |   1 +
 .../CodeGen/GlobalISel/MachineIRBuilder.h     |  10 +
 llvm/include/llvm/Support/TargetOpcodes.def   |   6 +
 llvm/include/llvm/Target/GenericOpcodes.td    |  12 +
 .../Target/GlobalISel/SelectionDAGCompat.td   |   2 +
 llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp  |   8 +
 .../CodeGen/GlobalISel/LegalizerHelper.cpp    | 147 +++
 .../GISel/AArch64InstructionSelector.cpp      |   6 +
 .../AArch64/GISel/AArch64LegalizerInfo.cpp    |  49 +
 .../AArch64/GISel/AArch64RegisterBankInfo.cpp |   4 +
 .../GlobalISel/legalizer-info-validation.mir  |   7 +
 .../test/CodeGen/AArch64/fptosi-sat-scalar.ll | 974 ++++++++++++------
 .../test/CodeGen/AArch64/fptoui-sat-scalar.ll | 712 ++++++++-----
 llvm/test/TableGen/GlobalISelEmitter.td       |   2 +-
 16 files changed, 1387 insertions(+), 560 deletions(-)

diff --git a/llvm/docs/GlobalISel/GenericOpcode.rst b/llvm/docs/GlobalISel/GenericOpcode.rst
index d32aeff5a69bb1..4c9a807c598c02 100644
--- a/llvm/docs/GlobalISel/GenericOpcode.rst
+++ b/llvm/docs/GlobalISel/GenericOpcode.rst
@@ -504,6 +504,11 @@ G_FPTOSI, G_FPTOUI, G_SITOFP, G_UITOFP
 
 Convert between integer and floating point.
 
+G_FPTOSI_SAT, G_FPTOUI_SAT
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Saturating convert between integer and floating point.
+
 G_FABS
 ^^^^^^
 
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index ef1171d9f1f64d..b7c545e5136f43 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -823,6 +823,8 @@ class GCastOp : public GenericMachineInstr {
     case TargetOpcode::G_FPEXT:
     case TargetOpcode::G_FPTOSI:
     case TargetOpcode::G_FPTOUI:
+    case TargetOpcode::G_FPTOSI_SAT:
+    case TargetOpcode::G_FPTOUI_SAT:
     case TargetOpcode::G_FPTRUNC:
     case TargetOpcode::G_INTTOPTR:
     case TargetOpcode::G_PTRTOINT:
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index afd68250f5ca6e..5360850deeffd8 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -398,6 +398,7 @@ class LegalizerHelper {
   LegalizeResult lowerSITOFP(MachineInstr &MI);
   LegalizeResult lowerFPTOUI(MachineInstr &MI);
   LegalizeResult lowerFPTOSI(MachineInstr &MI);
+  LegalizeResult lowerFPTOINT_SAT(MachineInstr &MI);
 
   LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI);
   LegalizeResult lowerFPTRUNC(MachineInstr &MI);
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
index 56a77b8596a18b..e02a967988a575 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -2000,6 +2000,16 @@ class MachineIRBuilder {
     return buildInstr(TargetOpcode::G_FPTOSI, {Dst}, {Src0});
   }
 
+  /// Build and insert \p Res = G_FPTOUI_SAT \p Src0
+  MachineInstrBuilder buildFPTOUI_SAT(const DstOp &Dst, const SrcOp &Src0) {
+    return buildInstr(TargetOpcode::G_FPTOUI_SAT, {Dst}, {Src0});
+  }
+
+  /// Build and insert \p Res = G_FPTOSI_SAT \p Src0
+  MachineInstrBuilder buildFPTOSI_SAT(const DstOp &Dst, const SrcOp &Src0) {
+    return buildInstr(TargetOpcode::G_FPTOSI_SAT, {Dst}, {Src0});
+  }
+
   /// Build and insert \p Dst = G_INTRINSIC_ROUNDEVEN \p Src0, \p Src1
   MachineInstrBuilder
   buildIntrinsicRoundeven(const DstOp &Dst, const SrcOp &Src0,
diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def
index e1883de0c93b4c..cd3d7c2e11f922 100644
--- a/llvm/include/llvm/Support/TargetOpcodes.def
+++ b/llvm/include/llvm/Support/TargetOpcodes.def
@@ -682,6 +682,12 @@ HANDLE_TARGET_OPCODE(G_SITOFP)
 /// Generic unsigned-int to float conversion
 HANDLE_TARGET_OPCODE(G_UITOFP)
 
+/// Generic saturating float to signed-int conversion
+HANDLE_TARGET_OPCODE(G_FPTOSI_SAT)
+
+/// Generic saturating float to unsigned-int conversion
+HANDLE_TARGET_OPCODE(G_FPTOUI_SAT)
+
 /// Generic FP absolute value.
 HANDLE_TARGET_OPCODE(G_FABS)
 
diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td
index 36a0a087ba457c..f5570db373e4a7 100644
--- a/llvm/include/llvm/Target/GenericOpcodes.td
+++ b/llvm/include/llvm/Target/GenericOpcodes.td
@@ -769,6 +769,18 @@ def G_UITOFP : GenericInstruction {
   let hasSideEffects = false;
 }
 
+def G_FPTOSI_SAT : GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type1:$src);
+  let hasSideEffects = false;
+}
+
+def G_FPTOUI_SAT : GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type1:$src);
+  let hasSideEffects = false;
+}
+
 def G_FABS : GenericInstruction {
   let OutOperandList = (outs type0:$dst);
   let InOperandList = (ins type0:$src);
diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index 72d155b483cf2b..627c57429941ea 100644
--- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -98,6 +98,8 @@ def : GINodeEquiv<G_FPTOSI, fp_to_sint>;
 def : GINodeEquiv<G_FPTOUI, fp_to_uint>;
 def : GINodeEquiv<G_SITOFP, sint_to_fp>;
 def : GINodeEquiv<G_UITOFP, uint_to_fp>;
+def : GINodeEquiv<G_FPTOSI_SAT, fp_to_sint_sat>;
+def : GINodeEquiv<G_FPTOUI_SAT, fp_to_uint_sat>;
 def : GINodeEquiv<G_FADD, fadd>;
 def : GINodeEquiv<G_FSUB, fsub>;
 def : GINodeEquiv<G_FMA, fma>;
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index b290d7fb4ce4a1..99267fab832b80 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -2340,6 +2340,14 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
                            MachineInstr::copyFlagsFromInstruction(CI));
     return true;
   }
+  case Intrinsic::fptosi_sat:
+    MIRBuilder.buildFPTOSI_SAT(getOrCreateVReg(CI),
+                               getOrCreateVReg(*CI.getArgOperand(0)));
+    return true;
+  case Intrinsic::fptoui_sat:
+    MIRBuilder.buildFPTOUI_SAT(getOrCreateVReg(CI),
+                               getOrCreateVReg(*CI.getArgOperand(0)));
+    return true;
   case Intrinsic::memcpy_inline:
     return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMCPY_INLINE);
   case Intrinsic::memcpy:
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 3640b77ff4068c..44d28fff47a2b4 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -1880,6 +1880,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
   }
   case TargetOpcode::G_FPTOUI:
   case TargetOpcode::G_FPTOSI:
+  case TargetOpcode::G_FPTOUI_SAT:
+  case TargetOpcode::G_FPTOSI_SAT:
     return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
   case TargetOpcode::G_FPEXT:
     if (TypeIdx != 0)
@@ -2872,6 +2874,47 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
     else
       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
 
+    Observer.changedInstr(MI);
+    return Legalized;
+  case TargetOpcode::G_FPTOSI_SAT:
+  case TargetOpcode::G_FPTOUI_SAT:
+    Observer.changingInstr(MI);
+
+    if (TypeIdx == 0) {
+      Register OldDst = MI.getOperand(0).getReg();
+      LLT Ty = MRI.getType(OldDst);
+      Register ExtReg = MRI.createGenericVirtualRegister(WideTy);
+      Register NewDst;
+      MI.getOperand(0).setReg(ExtReg);
+      uint64_t ShortBits = Ty.getScalarSizeInBits();
+      uint64_t WideBits = WideTy.getScalarSizeInBits();
+      MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
+      if (Opcode == TargetOpcode::G_FPTOSI_SAT) {
+        // z = i16 fptosi_sat(a)
+        // ->
+        // x = i32 fptosi_sat(a)
+        // y = smin(x, 32767)
+        // z = smax(y, -32768)
+        auto MaxVal = MIRBuilder.buildConstant(
+            WideTy, APInt::getSignedMaxValue(ShortBits).sext(WideBits));
+        auto MinVal = MIRBuilder.buildConstant(
+            WideTy, APInt::getSignedMinValue(ShortBits).sext(WideBits));
+        Register MidReg =
+            MIRBuilder.buildSMin(WideTy, ExtReg, MaxVal).getReg(0);
+        NewDst = MIRBuilder.buildSMax(WideTy, MidReg, MinVal).getReg(0);
+      } else {
+        // z = i16 fptoui_sat(a)
+        // ->
+        // x = i32 fptoui_sat(a)
+        // y = smin(x, 65535)
+        auto MaxVal = MIRBuilder.buildConstant(
+            WideTy, APInt::getAllOnes(ShortBits).zext(WideBits));
+        NewDst = MIRBuilder.buildUMin(WideTy, ExtReg, MaxVal).getReg(0);
+      }
+      MIRBuilder.buildTrunc(OldDst, NewDst);
+    } else
+      widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
+
     Observer.changedInstr(MI);
     return Legalized;
   case TargetOpcode::G_LOAD:
@@ -4170,6 +4213,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
     return lowerFPTOUI(MI);
   case G_FPTOSI:
     return lowerFPTOSI(MI);
+  case G_FPTOUI_SAT:
+  case G_FPTOSI_SAT:
+    return lowerFPTOINT_SAT(MI);
   case G_FPTRUNC:
     return lowerFPTRUNC(MI);
   case G_FPOWI:
@@ -4986,6 +5032,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
   case G_UITOFP:
   case G_FPTOSI:
   case G_FPTOUI:
+  case G_FPTOSI_SAT:
+  case G_FPTOUI_SAT:
   case G_INTTOPTR:
   case G_PTRTOINT:
   case G_ADDRSPACE_CAST:
@@ -5777,6 +5825,8 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
   case TargetOpcode::G_FPEXT:
   case TargetOpcode::G_FPTOSI:
   case TargetOpcode::G_FPTOUI:
+  case TargetOpcode::G_FPTOSI_SAT:
+  case TargetOpcode::G_FPTOUI_SAT:
   case TargetOpcode::G_SITOFP:
   case TargetOpcode::G_UITOFP: {
     Observer.changingInstr(MI);
@@ -7285,6 +7335,103 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) {
   return Legalized;
 }
 
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerFPTOINT_SAT(MachineInstr &MI) {
+  auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
+
+  bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI_SAT;
+  unsigned SatWidth = DstTy.getScalarSizeInBits();
+
+  // Determine minimum and maximum integer values and their corresponding
+  // floating-point values.
+  APInt MinInt, MaxInt;
+  if (IsSigned) {
+    MinInt = APInt::getSignedMinValue(SatWidth);
+    MaxInt = APInt::getSignedMaxValue(SatWidth);
+  } else {
+    MinInt = APInt::getMinValue(SatWidth);
+    MaxInt = APInt::getMaxValue(SatWidth);
+  }
+
+  const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
+  APFloat MinFloat(Semantics);
+  APFloat MaxFloat(Semantics);
+
+  APFloat::opStatus MinStatus =
+      MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
+  APFloat::opStatus MaxStatus =
+      MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
+  bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
+                             !(MaxStatus & APFloat::opStatus::opInexact);
+
+  // If the integer bounds are exactly representable as floats and min/max are
+  // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
+  // of comparisons and selects.
+  bool MinMaxLegal = LI.isLegal({TargetOpcode::G_FMINNUM, SrcTy}) &&
+                     LI.isLegal({TargetOpcode::G_FMAXNUM, SrcTy});
+  if (AreExactFloatBounds && MinMaxLegal) {
+    // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
+    auto Max = MIRBuilder.buildFMaxNum(
+        SrcTy, Src, MIRBuilder.buildFConstant(SrcTy, MinFloat));
+    // Clamp by MaxFloat from above. NaN cannot occur.
+    auto Min = MIRBuilder.buildFMinNum(
+        SrcTy, Max, MIRBuilder.buildFConstant(SrcTy, MaxFloat),
+        MachineInstr::FmNoNans);
+    // Convert clamped value to integer. In the unsigned case we're done,
+    // because we mapped NaN to MinFloat, which will cast to zero.
+    if (!IsSigned) {
+      MIRBuilder.buildFPTOUI(Dst, Min);
+      MI.eraseFromParent();
+      return Legalized;
+    }
+
+    // Otherwise, select 0 if Src is NaN.
+    auto FpToInt = MIRBuilder.buildFPTOSI(DstTy, Min);
+    auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_UNO,
+                                       DstTy.changeElementSize(1), Src, Src);
+    MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0),
+                           FpToInt);
+    MI.eraseFromParent();
+    return Legalized;
+  }
+
+  // Result of direct conversion. The assumption here is that the operation is
+  // non-trapping and it's fine to apply it to an out-of-range value if we
+  // select it away later.
+  auto FpToInt = IsSigned ? MIRBuilder.buildFPTOSI(DstTy, Src)
+                          : MIRBuilder.buildFPTOUI(DstTy, Src);
+
+  // If Src ULT MinFloat, select MinInt. In particular, this also selects
+  // MinInt if Src is NaN.
+  auto ULT =
+      MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, SrcTy.changeElementSize(1), Src,
+                           MIRBuilder.buildFConstant(SrcTy, MinFloat));
+  auto Max = MIRBuilder.buildSelect(
+      DstTy, ULT, MIRBuilder.buildConstant(DstTy, MinInt), FpToInt);
+  // If Src OGT MaxFloat, select MaxInt.
+  auto OGT =
+      MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, SrcTy.changeElementSize(1), Src,
+                           MIRBuilder.buildFConstant(SrcTy, MaxFloat));
+
+  // In the unsigned case we are done, because we mapped NaN to MinInt, which
+  // is already zero.
+  if (!IsSigned) {
+    MIRBuilder.buildSelect(Dst, OGT, MIRBuilder.buildConstant(DstTy, MaxInt),
+                           Max, MachineInstr::FmNoNans);
+    MI.eraseFromParent();
+    return Legalized;
+  }
+
+  // Otherwise, select 0 if Src is NaN.
+  auto Min = MIRBuilder.buildSelect(
+      DstTy, OGT, MIRBuilder.buildConstant(DstTy, MaxInt), Max);
+  auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_UNO,
+                                     DstTy.changeElementSize(1), Src, Src);
+  MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0), Min);
+  MI.eraseFromParent();
+  return Legalized;
+}
+
 // f64 -> f16 conversion using round-to-nearest-even rounding mode.
 LegalizerHelper::LegalizeResult
 LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 18361cf3685642..accadfd352f30e 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -2152,6 +2152,12 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
     }
     return false;
   }
+  case TargetOpcode::G_FPTOSI_SAT:
+    I.setDesc(TII.get(TargetOpcode::G_FPTOSI));
+    return true;
+  case TargetOpcode::G_FPTOUI_SAT:
+    I.setDesc(TII.get(TargetOpcode::G_FPTOUI));
+    return true;
   default:
     return false;
   }
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index db5cd1d32d73d0..b0c1215bd5514a 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -728,6 +728,55 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
       .libcallFor(
           {{s32, s128}, {s64, s128}, {s128, s128}, {s128, s32}, {s128, s64}});
 
+  getActionDefinitionsBuilder({G_FPTOSI_SAT, G_FPTOUI_SAT})
+      .legalFor({{s32, s32},
+                 {s64, s32},
+                 {s32, s64},
+                 {s64, s64},
+                 {v2s64, v2s64},
+                 {v4s32, v4s32},
+                 {v2s32, v2s32}})
+      .legalIf([=](const LegalityQuery &Query) {
+        return HasFP16 &&
+               (Query.Types[1] == s16 || Query.Types[1] == v4s16 ||
+                Query.Types[1] == v8s16) &&
+               (Query.Types[0] == s32 || Query.Types[0] == s64 ||
+                Query.Types[0] == v4s16 || Query.Types[0] == v8s16);
+      })
+      // Handle types larger than i64 by scalarizing/lowering.
+      .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
+      .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
+      // The range of a fp16 value fits into an i17, so we can lower the width
+      // to i64.
+      .narrowScalarIf(
+          [=](const LegalityQuery &Query) {
+            return Query.Types[1] == s16 && Query.Types[0].getSizeInBits() > 64;
+          },
+          changeTo(0, s64))
+      .lowerIf(::any(scalarWiderThan(0, 64), scalarWiderThan(1, 64)), 0)
+      .moreElementsToNextPow2(0)
+      .widenScalarToNextPow2(0, /*MinSize=*/32)
+      .minScalar(0, s32)
+      .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32)
+      .widenScalarIf(
+          [=](const LegalityQuery &Query) {
+            unsigned ITySize = Query.Types[0].getScalarSizeInBits();
+            return (ITySize == 16 || ITySize == 32 || ITySize == 64) &&
+                   ITySize > Query.Types[1].getScalarSizeInBits();
+          },
+          LegalizeMutations::changeElementSizeTo(1, 0))
+      .widenScalarIf(
+          [=](const LegalityQuery &Query) {
+            unsigned FTySize = Query.Types[1].getScalarSizeInBits();
+            return (FTySize == 16 || FTySize == 32 || FTySize == 64) &&
+                   Query.Types[0].getScalarSizeInBits() < FTySize;
+          },
+          LegalizeMutations::changeElementSizeTo(0, 1))
+      .widenScalarOrEltToNextPow2(0)
+      .clampNumElements(0, v4s16, v8s16)
+      .clampNumElements(0, v2s32, v4s32)
+      .clampMaxNumElements(0, s64, 2);
+
   getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
       .legalFor({{s32, s32},
                  {s64, s32},
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index 23e135063147a1..8d63c36eb015f3 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -535,6 +535,8 @@ bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
   switch (MI.getOpcode()) {
   case TargetOpcode::G_FPTOSI:
   case TargetOpcode::G_FPTOUI:
+  case TargetOpcode::G_FPTOSI_SAT:
+  case TargetOpcode::G_FPTOUI_SAT:
   case TargetOpcode::G_FCMP:
   case TargetOpcode::G_LROUND:
   case TargetOpcode::G_LLROUND:
@@ -799,6 +801,8 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
   }
   case TargetOpcode::G_FPTOSI:
   case TargetOpcode::G_FPTOUI:
+  case TargetOpcode::G_FPTOSI_SAT:
+  case TargetOpcode::G_FPTOUI_SAT:
   case TargetOpcode::G_INTRINSIC_LRINT:
   case TargetOpcode::G_INTRINSIC_LLRINT:
     if (MRI.getType(MI.getOperand(0).getReg()).isVector())
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
index b3b85090d11251..ddae45d96f6a78 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
@@ -538,6 +538,13 @@
 # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
 # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
 # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: G_FPTOSI_SAT (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
+# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: G_FPTOUI_SAT (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
+# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
+# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
 # DEBUG-NEXT: G_FABS (opcode {{[0-9]+}}): 1 type index, 0 imm indices
 # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
 # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
index eeb1504d8dc77b..588cfca431efe8 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-CVT
-; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
+; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-CVT
+; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16
+; RUN: llc < %s -mtriple=aarch64 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-CVT
+; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16
 
 ;
 ; 32-bit float to signed integer
@@ -18,13 +20,23 @@ declare i100 @llvm.fptosi.sat.i100.f32(float)
 declare i128 @llvm.fptosi.sat.i128.f32(float)
 
 define i1 @test_signed_i1_f32(float %f) nounwind {
-; CHECK-LABEL: test_signed_i1_f32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs w8, s0
-; CHECK-NEXT:    ands w8, w8, w8, asr #31
-; CHECK-NEXT:    csinv w8, w8, wzr, ge
-; CHECK-NEXT:    and w0, w8, #0x1
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_i1_f32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs w8, s0
+; CHECK-SD-NEXT:    ands w8, w8, w8, asr #31
+; CHECK-SD-NEXT:    csinv w8, w8, wzr, ge
+; CHECK-SD-NEXT:    and w0, w8, #0x1
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_i1_f32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcvtzs w8, s0
+; CHECK-GI-NEXT:    cmp w8, #0
+; CHECK-GI-NEXT:    csel w8, w8, wzr, lt
+; CHECK-GI-NEXT:    cmp w8, #0
+; CHECK-GI-NEXT:    csinv w8, w8, wzr, ge
+; CHECK-GI-NEXT:    and w0, w8, #0x1
+; CHECK-GI-NEXT:    ret
     %x = call i1 @llvm.fptosi.sat.i1.f32(float %f)
     ret i1 %x
 }
@@ -99,16 +111,27 @@ define i32 @test_signed_i32_f32(float %f) nounwind {
 }
 
 define i50 @test_signed_i50_f32(float %f) nounwind {
-; CHECK-LABEL: test_signed_i50_f32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs x8, s0
-; CHECK-NEXT:    mov x9, #562949953421311 // =0x1ffffffffffff
-; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    csel x8, x8, x9, lt
-; CHECK-NEXT:    mov x9, #-562949953421312 // =0xfffe000000000000
-; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    csel x0, x8, x9, gt
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_i50_f32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs x8, s0
+; CHECK-SD-NEXT:    mov x9, #562949953421311 // =0x1ffffffffffff
+; CHECK-SD-NEXT:    cmp x8, x9
+; CHECK-SD-NEXT:    csel x8, x8, x9, lt
+; CHECK-SD-NEXT:    mov x9, #-562949953421312 // =0xfffe000000000000
+; CHECK-SD-NEXT:    cmp x8, x9
+; CHECK-SD-NEXT:    csel x0, x8, x9, gt
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_i50_f32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcvtzs x8, s0
+; CHECK-GI-NEXT:    mov x9, #562949953421311 // =0x1ffffffffffff
+; CHECK-GI-NEXT:    mov x10, #-562949953421312 // =0xfffe000000000000
+; CHECK-GI-NEXT:    cmp x8, x9
+; CHECK-GI-NEXT:    csel x8, x8, x9, lt
+; CHECK-GI-NEXT:    cmp x8, x10
+; CHECK-GI-NEXT:    csel x0, x8, x10, gt
+; CHECK-GI-NEXT:    ret
     %x = call i50 @llvm.fptosi.sat.i50.f32(float %f)
     ret i50 %x
 }
@@ -123,57 +146,105 @@ define i64 @test_signed_i64_f32(float %f) nounwind {
 }
 
 define i100 @test_signed_i100_f32(float %f) nounwind {
-; CHECK-LABEL: test_signed_i100_f32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
-; CHECK-NEXT:    fmov s8, s0
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    movi v0.2s, #241, lsl #24
-; CHECK-NEXT:    mov w8, #1895825407 // =0x70ffffff
-; CHECK-NEXT:    mov x10, #34359738367 // =0x7ffffffff
-; CHECK-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT:    fcmp s8, s0
-; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    mov x8, #-34359738368 // =0xfffffff800000000
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    csel x8, x8, x1, lt
-; CHECK-NEXT:    fcmp s8, s0
-; CHECK-NEXT:    csel x8, x10, x8, gt
-; CHECK-NEXT:    csinv x9, x9, xzr, le
-; CHECK-NEXT:    fcmp s8, s8
-; CHECK-NEXT:    csel x0, xzr, x9, vs
-; CHECK-NEXT:    csel x1, xzr, x8, vs
-; CHECK-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_i100_f32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    fmov s8, s0
+; CHECK-SD-NEXT:    bl __fixsfti
+; CHECK-SD-NEXT:    movi v0.2s, #241, lsl #24
+; CHECK-SD-NEXT:    mov w8, #1895825407 // =0x70ffffff
+; CHECK-SD-NEXT:    mov x10, #34359738367 // =0x7ffffffff
+; CHECK-SD-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    fcmp s8, s0
+; CHECK-SD-NEXT:    fmov s0, w8
+; CHECK-SD-NEXT:    mov x8, #-34359738368 // =0xfffffff800000000
+; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x8, x8, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s0
+; CHECK-SD-NEXT:    csel x8, x10, x8, gt
+; CHECK-SD-NEXT:    csinv x9, x9, xzr, le
+; CHECK-SD-NEXT:    fcmp s8, s8
+; CHECK-SD-NEXT:    csel x0, xzr, x9, vs
+; CHECK-SD-NEXT:    csel x1, xzr, x8, vs
+; CHECK-SD-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_i100_f32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-GI-NEXT:    fmov s8, s0
+; CHECK-GI-NEXT:    bl __fixsfti
+; CHECK-GI-NEXT:    movi v0.2s, #241, lsl #24
+; CHECK-GI-NEXT:    mov w8, #1895825407 // =0x70ffffff
+; CHECK-GI-NEXT:    mov x10, #34359738367 // =0x7ffffffff
+; CHECK-GI-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    fcmp s8, s0
+; CHECK-GI-NEXT:    fmov s0, w8
+; CHECK-GI-NEXT:    mov x8, #34359738368 // =0x800000000
+; CHECK-GI-NEXT:    csel x9, xzr, x0, lt
+; CHECK-GI-NEXT:    csel x8, x8, x1, lt
+; CHECK-GI-NEXT:    fcmp s8, s0
+; CHECK-GI-NEXT:    csinv x9, x9, xzr, le
+; CHECK-GI-NEXT:    csel x8, x10, x8, gt
+; CHECK-GI-NEXT:    fcmp s8, s8
+; CHECK-GI-NEXT:    csel x0, xzr, x9, vs
+; CHECK-GI-NEXT:    csel x1, xzr, x8, vs
+; CHECK-GI-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
     %x = call i100 @llvm.fptosi.sat.i100.f32(float %f)
     ret i100 %x
 }
 
 define i128 @test_signed_i128_f32(float %f) nounwind {
-; CHECK-LABEL: test_signed_i128_f32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
-; CHECK-NEXT:    fmov s8, s0
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    movi v0.2s, #255, lsl #24
-; CHECK-NEXT:    mov w8, #2130706431 // =0x7effffff
-; CHECK-NEXT:    mov x10, #9223372036854775807 // =0x7fffffffffffffff
-; CHECK-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT:    fcmp s8, s0
-; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    mov x8, #-9223372036854775808 // =0x8000000000000000
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    csel x8, x8, x1, lt
-; CHECK-NEXT:    fcmp s8, s0
-; CHECK-NEXT:    csel x8, x10, x8, gt
-; CHECK-NEXT:    csinv x9, x9, xzr, le
-; CHECK-NEXT:    fcmp s8, s8
-; CHECK-NEXT:    csel x0, xzr, x9, vs
-; CHECK-NEXT:    csel x1, xzr, x8, vs
-; CHECK-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_i128_f32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    fmov s8, s0
+; CHECK-SD-NEXT:    bl __fixsfti
+; CHECK-SD-NEXT:    movi v0.2s, #255, lsl #24
+; CHECK-SD-NEXT:    mov w8, #2130706431 // =0x7effffff
+; CHECK-SD-NEXT:    mov x10, #9223372036854775807 // =0x7fffffffffffffff
+; CHECK-SD-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    fcmp s8, s0
+; CHECK-SD-NEXT:    fmov s0, w8
+; CHECK-SD-NEXT:    mov x8, #-9223372036854775808 // =0x8000000000000000
+; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x8, x8, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s0
+; CHECK-SD-NEXT:    csel x8, x10, x8, gt
+; CHECK-SD-NEXT:    csinv x9, x9, xzr, le
+; CHECK-SD-NEXT:    fcmp s8, s8
+; CHECK-SD-NEXT:    csel x0, xzr, x9, vs
+; CHECK-SD-NEXT:    csel x1, xzr, x8, vs
+; CHECK-SD-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_i128_f32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-GI-NEXT:    fmov s8, s0
+; CHECK-GI-NEXT:    bl __fixsfti
+; CHECK-GI-NEXT:    movi v0.2s, #255, lsl #24
+; CHECK-GI-NEXT:    mov w8, #2130706431 // =0x7effffff
+; CHECK-GI-NEXT:    mov x10, #9223372036854775807 // =0x7fffffffffffffff
+; CHECK-GI-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    fcmp s8, s0
+; CHECK-GI-NEXT:    fmov s0, w8
+; CHECK-GI-NEXT:    mov x8, #-9223372036854775808 // =0x8000000000000000
+; CHECK-GI-NEXT:    csel x9, xzr, x0, lt
+; CHECK-GI-NEXT:    csel x8, x8, x1, lt
+; CHECK-GI-NEXT:    fcmp s8, s0
+; CHECK-GI-NEXT:    csinv x9, x9, xzr, le
+; CHECK-GI-NEXT:    csel x8, x10, x8, gt
+; CHECK-GI-NEXT:    fcmp s8, s8
+; CHECK-GI-NEXT:    csel x0, xzr, x9, vs
+; CHECK-GI-NEXT:    csel x1, xzr, x8, vs
+; CHECK-GI-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
     %x = call i128 @llvm.fptosi.sat.i128.f32(float %f)
     ret i128 %x
 }
@@ -194,13 +265,23 @@ declare i100 @llvm.fptosi.sat.i100.f64(double)
 declare i128 @llvm.fptosi.sat.i128.f64(double)
 
 define i1 @test_signed_i1_f64(double %f) nounwind {
-; CHECK-LABEL: test_signed_i1_f64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs w8, d0
-; CHECK-NEXT:    ands w8, w8, w8, asr #31
-; CHECK-NEXT:    csinv w8, w8, wzr, ge
-; CHECK-NEXT:    and w0, w8, #0x1
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_i1_f64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs w8, d0
+; CHECK-SD-NEXT:    ands w8, w8, w8, asr #31
+; CHECK-SD-NEXT:    csinv w8, w8, wzr, ge
+; CHECK-SD-NEXT:    and w0, w8, #0x1
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_i1_f64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcvtzs w8, d0
+; CHECK-GI-NEXT:    cmp w8, #0
+; CHECK-GI-NEXT:    csel w8, w8, wzr, lt
+; CHECK-GI-NEXT:    cmp w8, #0
+; CHECK-GI-NEXT:    csinv w8, w8, wzr, ge
+; CHECK-GI-NEXT:    and w0, w8, #0x1
+; CHECK-GI-NEXT:    ret
     %x = call i1 @llvm.fptosi.sat.i1.f64(double %f)
     ret i1 %x
 }
@@ -275,16 +356,27 @@ define i32 @test_signed_i32_f64(double %f) nounwind {
 }
 
 define i50 @test_signed_i50_f64(double %f) nounwind {
-; CHECK-LABEL: test_signed_i50_f64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs x8, d0
-; CHECK-NEXT:    mov x9, #562949953421311 // =0x1ffffffffffff
-; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    csel x8, x8, x9, lt
-; CHECK-NEXT:    mov x9, #-562949953421312 // =0xfffe000000000000
-; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    csel x0, x8, x9, gt
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_i50_f64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs x8, d0
+; CHECK-SD-NEXT:    mov x9, #562949953421311 // =0x1ffffffffffff
+; CHECK-SD-NEXT:    cmp x8, x9
+; CHECK-SD-NEXT:    csel x8, x8, x9, lt
+; CHECK-SD-NEXT:    mov x9, #-562949953421312 // =0xfffe000000000000
+; CHECK-SD-NEXT:    cmp x8, x9
+; CHECK-SD-NEXT:    csel x0, x8, x9, gt
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_i50_f64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcvtzs x8, d0
+; CHECK-GI-NEXT:    mov x9, #562949953421311 // =0x1ffffffffffff
+; CHECK-GI-NEXT:    mov x10, #-562949953421312 // =0xfffe000000000000
+; CHECK-GI-NEXT:    cmp x8, x9
+; CHECK-GI-NEXT:    csel x8, x8, x9, lt
+; CHECK-GI-NEXT:    cmp x8, x10
+; CHECK-GI-NEXT:    csel x0, x8, x10, gt
+; CHECK-GI-NEXT:    ret
     %x = call i50 @llvm.fptosi.sat.i50.f64(double %f)
     ret i50 %x
 }
@@ -299,59 +391,109 @@ define i64 @test_signed_i64_f64(double %f) nounwind {
 }
 
 define i100 @test_signed_i100_f64(double %f) nounwind {
-; CHECK-LABEL: test_signed_i100_f64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
-; CHECK-NEXT:    fmov d8, d0
-; CHECK-NEXT:    bl __fixdfti
-; CHECK-NEXT:    mov x8, #-4170333254945079296 // =0xc620000000000000
-; CHECK-NEXT:    mov x10, #34359738367 // =0x7ffffffff
-; CHECK-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    mov x8, #5053038781909696511 // =0x461fffffffffffff
-; CHECK-NEXT:    fcmp d8, d0
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    mov x8, #-34359738368 // =0xfffffff800000000
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    csel x8, x8, x1, lt
-; CHECK-NEXT:    fcmp d8, d0
-; CHECK-NEXT:    csel x8, x10, x8, gt
-; CHECK-NEXT:    csinv x9, x9, xzr, le
-; CHECK-NEXT:    fcmp d8, d8
-; CHECK-NEXT:    csel x0, xzr, x9, vs
-; CHECK-NEXT:    csel x1, xzr, x8, vs
-; CHECK-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_i100_f64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    fmov d8, d0
+; CHECK-SD-NEXT:    bl __fixdfti
+; CHECK-SD-NEXT:    mov x8, #-4170333254945079296 // =0xc620000000000000
+; CHECK-SD-NEXT:    mov x10, #34359738367 // =0x7ffffffff
+; CHECK-SD-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    fmov d0, x8
+; CHECK-SD-NEXT:    mov x8, #5053038781909696511 // =0x461fffffffffffff
+; CHECK-SD-NEXT:    fcmp d8, d0
+; CHECK-SD-NEXT:    fmov d0, x8
+; CHECK-SD-NEXT:    mov x8, #-34359738368 // =0xfffffff800000000
+; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x8, x8, x1, lt
+; CHECK-SD-NEXT:    fcmp d8, d0
+; CHECK-SD-NEXT:    csel x8, x10, x8, gt
+; CHECK-SD-NEXT:    csinv x9, x9, xzr, le
+; CHECK-SD-NEXT:    fcmp d8, d8
+; CHECK-SD-NEXT:    csel x0, xzr, x9, vs
+; CHECK-SD-NEXT:    csel x1, xzr, x8, vs
+; CHECK-SD-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_i100_f64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-GI-NEXT:    fmov d8, d0
+; CHECK-GI-NEXT:    bl __fixdfti
+; CHECK-GI-NEXT:    mov x8, #-4170333254945079296 // =0xc620000000000000
+; CHECK-GI-NEXT:    mov x10, #34359738367 // =0x7ffffffff
+; CHECK-GI-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    fmov d0, x8
+; CHECK-GI-NEXT:    mov x8, #5053038781909696511 // =0x461fffffffffffff
+; CHECK-GI-NEXT:    fcmp d8, d0
+; CHECK-GI-NEXT:    fmov d0, x8
+; CHECK-GI-NEXT:    mov x8, #34359738368 // =0x800000000
+; CHECK-GI-NEXT:    csel x9, xzr, x0, lt
+; CHECK-GI-NEXT:    csel x8, x8, x1, lt
+; CHECK-GI-NEXT:    fcmp d8, d0
+; CHECK-GI-NEXT:    csinv x9, x9, xzr, le
+; CHECK-GI-NEXT:    csel x8, x10, x8, gt
+; CHECK-GI-NEXT:    fcmp d8, d8
+; CHECK-GI-NEXT:    csel x0, xzr, x9, vs
+; CHECK-GI-NEXT:    csel x1, xzr, x8, vs
+; CHECK-GI-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
     %x = call i100 @llvm.fptosi.sat.i100.f64(double %f)
     ret i100 %x
 }
 
 define i128 @test_signed_i128_f64(double %f) nounwind {
-; CHECK-LABEL: test_signed_i128_f64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
-; CHECK-NEXT:    fmov d8, d0
-; CHECK-NEXT:    bl __fixdfti
-; CHECK-NEXT:    mov x8, #-4044232465378705408 // =0xc7e0000000000000
-; CHECK-NEXT:    mov x10, #9223372036854775807 // =0x7fffffffffffffff
-; CHECK-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    mov x8, #5179139571476070399 // =0x47dfffffffffffff
-; CHECK-NEXT:    fcmp d8, d0
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    mov x8, #-9223372036854775808 // =0x8000000000000000
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    csel x8, x8, x1, lt
-; CHECK-NEXT:    fcmp d8, d0
-; CHECK-NEXT:    csel x8, x10, x8, gt
-; CHECK-NEXT:    csinv x9, x9, xzr, le
-; CHECK-NEXT:    fcmp d8, d8
-; CHECK-NEXT:    csel x0, xzr, x9, vs
-; CHECK-NEXT:    csel x1, xzr, x8, vs
-; CHECK-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_i128_f64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    fmov d8, d0
+; CHECK-SD-NEXT:    bl __fixdfti
+; CHECK-SD-NEXT:    mov x8, #-4044232465378705408 // =0xc7e0000000000000
+; CHECK-SD-NEXT:    mov x10, #9223372036854775807 // =0x7fffffffffffffff
+; CHECK-SD-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    fmov d0, x8
+; CHECK-SD-NEXT:    mov x8, #5179139571476070399 // =0x47dfffffffffffff
+; CHECK-SD-NEXT:    fcmp d8, d0
+; CHECK-SD-NEXT:    fmov d0, x8
+; CHECK-SD-NEXT:    mov x8, #-9223372036854775808 // =0x8000000000000000
+; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x8, x8, x1, lt
+; CHECK-SD-NEXT:    fcmp d8, d0
+; CHECK-SD-NEXT:    csel x8, x10, x8, gt
+; CHECK-SD-NEXT:    csinv x9, x9, xzr, le
+; CHECK-SD-NEXT:    fcmp d8, d8
+; CHECK-SD-NEXT:    csel x0, xzr, x9, vs
+; CHECK-SD-NEXT:    csel x1, xzr, x8, vs
+; CHECK-SD-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_i128_f64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-GI-NEXT:    fmov d8, d0
+; CHECK-GI-NEXT:    bl __fixdfti
+; CHECK-GI-NEXT:    mov x8, #-4044232465378705408 // =0xc7e0000000000000
+; CHECK-GI-NEXT:    mov x10, #9223372036854775807 // =0x7fffffffffffffff
+; CHECK-GI-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    fmov d0, x8
+; CHECK-GI-NEXT:    mov x8, #5179139571476070399 // =0x47dfffffffffffff
+; CHECK-GI-NEXT:    fcmp d8, d0
+; CHECK-GI-NEXT:    fmov d0, x8
+; CHECK-GI-NEXT:    mov x8, #-9223372036854775808 // =0x8000000000000000
+; CHECK-GI-NEXT:    csel x9, xzr, x0, lt
+; CHECK-GI-NEXT:    csel x8, x8, x1, lt
+; CHECK-GI-NEXT:    fcmp d8, d0
+; CHECK-GI-NEXT:    csinv x9, x9, xzr, le
+; CHECK-GI-NEXT:    csel x8, x10, x8, gt
+; CHECK-GI-NEXT:    fcmp d8, d8
+; CHECK-GI-NEXT:    csel x0, xzr, x9, vs
+; CHECK-GI-NEXT:    csel x1, xzr, x8, vs
+; CHECK-GI-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
     %x = call i128 @llvm.fptosi.sat.i128.f64(double %f)
     ret i128 %x
 }
@@ -372,245 +514,429 @@ declare i100 @llvm.fptosi.sat.i100.f16(half)
 declare i128 @llvm.fptosi.sat.i128.f16(half)
 
 define i1 @test_signed_i1_f16(half %f) nounwind {
-; CHECK-CVT-LABEL: test_signed_i1_f16:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    fcvtzs w8, s0
-; CHECK-CVT-NEXT:    ands w8, w8, w8, asr #31
-; CHECK-CVT-NEXT:    csinv w8, w8, wzr, ge
-; CHECK-CVT-NEXT:    and w0, w8, #0x1
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_signed_i1_f16:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzs w8, h0
-; CHECK-FP16-NEXT:    ands w8, w8, w8, asr #31
-; CHECK-FP16-NEXT:    csinv w8, w8, wzr, ge
-; CHECK-FP16-NEXT:    and w0, w8, #0x1
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_signed_i1_f16:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvt s0, h0
+; CHECK-SD-CVT-NEXT:    fcvtzs w8, s0
+; CHECK-SD-CVT-NEXT:    ands w8, w8, w8, asr #31
+; CHECK-SD-CVT-NEXT:    csinv w8, w8, wzr, ge
+; CHECK-SD-CVT-NEXT:    and w0, w8, #0x1
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_signed_i1_f16:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzs w8, h0
+; CHECK-SD-FP16-NEXT:    ands w8, w8, w8, asr #31
+; CHECK-SD-FP16-NEXT:    csinv w8, w8, wzr, ge
+; CHECK-SD-FP16-NEXT:    and w0, w8, #0x1
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_i1_f16:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    fcvtzs w8, s0
+; CHECK-GI-CVT-NEXT:    cmp w8, #0
+; CHECK-GI-CVT-NEXT:    csel w8, w8, wzr, lt
+; CHECK-GI-CVT-NEXT:    cmp w8, #0
+; CHECK-GI-CVT-NEXT:    csinv w8, w8, wzr, ge
+; CHECK-GI-CVT-NEXT:    and w0, w8, #0x1
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_signed_i1_f16:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzs w8, h0
+; CHECK-GI-FP16-NEXT:    cmp w8, #0
+; CHECK-GI-FP16-NEXT:    csel w8, w8, wzr, lt
+; CHECK-GI-FP16-NEXT:    cmp w8, #0
+; CHECK-GI-FP16-NEXT:    csinv w8, w8, wzr, ge
+; CHECK-GI-FP16-NEXT:    and w0, w8, #0x1
+; CHECK-GI-FP16-NEXT:    ret
     %x = call i1 @llvm.fptosi.sat.i1.f16(half %f)
     ret i1 %x
 }
 
 define i8 @test_signed_i8_f16(half %f) nounwind {
-; CHECK-CVT-LABEL: test_signed_i8_f16:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    mov w8, #127 // =0x7f
-; CHECK-CVT-NEXT:    fcvtzs w9, s0
-; CHECK-CVT-NEXT:    cmp w9, #127
-; CHECK-CVT-NEXT:    csel w8, w9, w8, lt
-; CHECK-CVT-NEXT:    mov w9, #-128 // =0xffffff80
-; CHECK-CVT-NEXT:    cmn w8, #128
-; CHECK-CVT-NEXT:    csel w0, w8, w9, gt
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_signed_i8_f16:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzs w9, h0
-; CHECK-FP16-NEXT:    mov w8, #127 // =0x7f
-; CHECK-FP16-NEXT:    cmp w9, #127
-; CHECK-FP16-NEXT:    csel w8, w9, w8, lt
-; CHECK-FP16-NEXT:    mov w9, #-128 // =0xffffff80
-; CHECK-FP16-NEXT:    cmn w8, #128
-; CHECK-FP16-NEXT:    csel w0, w8, w9, gt
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_signed_i8_f16:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvt s0, h0
+; CHECK-SD-CVT-NEXT:    mov w8, #127 // =0x7f
+; CHECK-SD-CVT-NEXT:    fcvtzs w9, s0
+; CHECK-SD-CVT-NEXT:    cmp w9, #127
+; CHECK-SD-CVT-NEXT:    csel w8, w9, w8, lt
+; CHECK-SD-CVT-NEXT:    mov w9, #-128 // =0xffffff80
+; CHECK-SD-CVT-NEXT:    cmn w8, #128
+; CHECK-SD-CVT-NEXT:    csel w0, w8, w9, gt
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_signed_i8_f16:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzs w9, h0
+; CHECK-SD-FP16-NEXT:    mov w8, #127 // =0x7f
+; CHECK-SD-FP16-NEXT:    cmp w9, #127
+; CHECK-SD-FP16-NEXT:    csel w8, w9, w8, lt
+; CHECK-SD-FP16-NEXT:    mov w9, #-128 // =0xffffff80
+; CHECK-SD-FP16-NEXT:    cmn w8, #128
+; CHECK-SD-FP16-NEXT:    csel w0, w8, w9, gt
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_i8_f16:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    mov w8, #127 // =0x7f
+; CHECK-GI-CVT-NEXT:    fcvtzs w9, s0
+; CHECK-GI-CVT-NEXT:    cmp w9, #127
+; CHECK-GI-CVT-NEXT:    csel w8, w9, w8, lt
+; CHECK-GI-CVT-NEXT:    mov w9, #-128 // =0xffffff80
+; CHECK-GI-CVT-NEXT:    cmn w8, #128
+; CHECK-GI-CVT-NEXT:    csel w0, w8, w9, gt
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_signed_i8_f16:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzs w9, h0
+; CHECK-GI-FP16-NEXT:    mov w8, #127 // =0x7f
+; CHECK-GI-FP16-NEXT:    cmp w9, #127
+; CHECK-GI-FP16-NEXT:    csel w8, w9, w8, lt
+; CHECK-GI-FP16-NEXT:    mov w9, #-128 // =0xffffff80
+; CHECK-GI-FP16-NEXT:    cmn w8, #128
+; CHECK-GI-FP16-NEXT:    csel w0, w8, w9, gt
+; CHECK-GI-FP16-NEXT:    ret
     %x = call i8 @llvm.fptosi.sat.i8.f16(half %f)
     ret i8 %x
 }
 
 define i13 @test_signed_i13_f16(half %f) nounwind {
-; CHECK-CVT-LABEL: test_signed_i13_f16:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    mov w8, #4095 // =0xfff
-; CHECK-CVT-NEXT:    fcvtzs w9, s0
-; CHECK-CVT-NEXT:    cmp w9, #4095
-; CHECK-CVT-NEXT:    csel w8, w9, w8, lt
-; CHECK-CVT-NEXT:    mov w9, #-4096 // =0xfffff000
-; CHECK-CVT-NEXT:    cmn w8, #1, lsl #12 // =4096
-; CHECK-CVT-NEXT:    csel w0, w8, w9, gt
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_signed_i13_f16:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzs w9, h0
-; CHECK-FP16-NEXT:    mov w8, #4095 // =0xfff
-; CHECK-FP16-NEXT:    cmp w9, #4095
-; CHECK-FP16-NEXT:    csel w8, w9, w8, lt
-; CHECK-FP16-NEXT:    mov w9, #-4096 // =0xfffff000
-; CHECK-FP16-NEXT:    cmn w8, #1, lsl #12 // =4096
-; CHECK-FP16-NEXT:    csel w0, w8, w9, gt
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_signed_i13_f16:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvt s0, h0
+; CHECK-SD-CVT-NEXT:    mov w8, #4095 // =0xfff
+; CHECK-SD-CVT-NEXT:    fcvtzs w9, s0
+; CHECK-SD-CVT-NEXT:    cmp w9, #4095
+; CHECK-SD-CVT-NEXT:    csel w8, w9, w8, lt
+; CHECK-SD-CVT-NEXT:    mov w9, #-4096 // =0xfffff000
+; CHECK-SD-CVT-NEXT:    cmn w8, #1, lsl #12 // =4096
+; CHECK-SD-CVT-NEXT:    csel w0, w8, w9, gt
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_signed_i13_f16:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzs w9, h0
+; CHECK-SD-FP16-NEXT:    mov w8, #4095 // =0xfff
+; CHECK-SD-FP16-NEXT:    cmp w9, #4095
+; CHECK-SD-FP16-NEXT:    csel w8, w9, w8, lt
+; CHECK-SD-FP16-NEXT:    mov w9, #-4096 // =0xfffff000
+; CHECK-SD-FP16-NEXT:    cmn w8, #1, lsl #12 // =4096
+; CHECK-SD-FP16-NEXT:    csel w0, w8, w9, gt
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_i13_f16:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    mov w8, #4095 // =0xfff
+; CHECK-GI-CVT-NEXT:    fcvtzs w9, s0
+; CHECK-GI-CVT-NEXT:    cmp w9, #4095
+; CHECK-GI-CVT-NEXT:    csel w8, w9, w8, lt
+; CHECK-GI-CVT-NEXT:    mov w9, #-4096 // =0xfffff000
+; CHECK-GI-CVT-NEXT:    cmn w8, #1, lsl #12 // =4096
+; CHECK-GI-CVT-NEXT:    csel w0, w8, w9, gt
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_signed_i13_f16:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzs w9, h0
+; CHECK-GI-FP16-NEXT:    mov w8, #4095 // =0xfff
+; CHECK-GI-FP16-NEXT:    cmp w9, #4095
+; CHECK-GI-FP16-NEXT:    csel w8, w9, w8, lt
+; CHECK-GI-FP16-NEXT:    mov w9, #-4096 // =0xfffff000
+; CHECK-GI-FP16-NEXT:    cmn w8, #1, lsl #12 // =4096
+; CHECK-GI-FP16-NEXT:    csel w0, w8, w9, gt
+; CHECK-GI-FP16-NEXT:    ret
     %x = call i13 @llvm.fptosi.sat.i13.f16(half %f)
     ret i13 %x
 }
 
 define i16 @test_signed_i16_f16(half %f) nounwind {
-; CHECK-CVT-LABEL: test_signed_i16_f16:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    mov w9, #32767 // =0x7fff
-; CHECK-CVT-NEXT:    fcvtzs w8, s0
-; CHECK-CVT-NEXT:    cmp w8, w9
-; CHECK-CVT-NEXT:    csel w8, w8, w9, lt
-; CHECK-CVT-NEXT:    mov w9, #-32768 // =0xffff8000
-; CHECK-CVT-NEXT:    cmn w8, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT:    csel w0, w8, w9, gt
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_signed_i16_f16:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzs w8, h0
-; CHECK-FP16-NEXT:    mov w9, #32767 // =0x7fff
-; CHECK-FP16-NEXT:    cmp w8, w9
-; CHECK-FP16-NEXT:    csel w8, w8, w9, lt
-; CHECK-FP16-NEXT:    mov w9, #-32768 // =0xffff8000
-; CHECK-FP16-NEXT:    cmn w8, #8, lsl #12 // =32768
-; CHECK-FP16-NEXT:    csel w0, w8, w9, gt
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_signed_i16_f16:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvt s0, h0
+; CHECK-SD-CVT-NEXT:    mov w9, #32767 // =0x7fff
+; CHECK-SD-CVT-NEXT:    fcvtzs w8, s0
+; CHECK-SD-CVT-NEXT:    cmp w8, w9
+; CHECK-SD-CVT-NEXT:    csel w8, w8, w9, lt
+; CHECK-SD-CVT-NEXT:    mov w9, #-32768 // =0xffff8000
+; CHECK-SD-CVT-NEXT:    cmn w8, #8, lsl #12 // =32768
+; CHECK-SD-CVT-NEXT:    csel w0, w8, w9, gt
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_signed_i16_f16:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzs w8, h0
+; CHECK-SD-FP16-NEXT:    mov w9, #32767 // =0x7fff
+; CHECK-SD-FP16-NEXT:    cmp w8, w9
+; CHECK-SD-FP16-NEXT:    csel w8, w8, w9, lt
+; CHECK-SD-FP16-NEXT:    mov w9, #-32768 // =0xffff8000
+; CHECK-SD-FP16-NEXT:    cmn w8, #8, lsl #12 // =32768
+; CHECK-SD-FP16-NEXT:    csel w0, w8, w9, gt
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_i16_f16:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    mov w9, #32767 // =0x7fff
+; CHECK-GI-CVT-NEXT:    fcvtzs w8, s0
+; CHECK-GI-CVT-NEXT:    cmp w8, w9
+; CHECK-GI-CVT-NEXT:    csel w8, w8, w9, lt
+; CHECK-GI-CVT-NEXT:    mov w9, #-32768 // =0xffff8000
+; CHECK-GI-CVT-NEXT:    cmn w8, #8, lsl #12 // =32768
+; CHECK-GI-CVT-NEXT:    csel w0, w8, w9, gt
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_signed_i16_f16:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzs w8, h0
+; CHECK-GI-FP16-NEXT:    mov w9, #32767 // =0x7fff
+; CHECK-GI-FP16-NEXT:    cmp w8, w9
+; CHECK-GI-FP16-NEXT:    csel w8, w8, w9, lt
+; CHECK-GI-FP16-NEXT:    mov w9, #-32768 // =0xffff8000
+; CHECK-GI-FP16-NEXT:    cmn w8, #8, lsl #12 // =32768
+; CHECK-GI-FP16-NEXT:    csel w0, w8, w9, gt
+; CHECK-GI-FP16-NEXT:    ret
     %x = call i16 @llvm.fptosi.sat.i16.f16(half %f)
     ret i16 %x
 }
 
 define i19 @test_signed_i19_f16(half %f) nounwind {
-; CHECK-CVT-LABEL: test_signed_i19_f16:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    mov w9, #262143 // =0x3ffff
-; CHECK-CVT-NEXT:    fcvtzs w8, s0
-; CHECK-CVT-NEXT:    cmp w8, w9
-; CHECK-CVT-NEXT:    csel w8, w8, w9, lt
-; CHECK-CVT-NEXT:    mov w9, #-262144 // =0xfffc0000
-; CHECK-CVT-NEXT:    cmn w8, #64, lsl #12 // =262144
-; CHECK-CVT-NEXT:    csel w0, w8, w9, gt
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_signed_i19_f16:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzs w8, h0
-; CHECK-FP16-NEXT:    mov w9, #262143 // =0x3ffff
-; CHECK-FP16-NEXT:    cmp w8, w9
-; CHECK-FP16-NEXT:    csel w8, w8, w9, lt
-; CHECK-FP16-NEXT:    mov w9, #-262144 // =0xfffc0000
-; CHECK-FP16-NEXT:    cmn w8, #64, lsl #12 // =262144
-; CHECK-FP16-NEXT:    csel w0, w8, w9, gt
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_signed_i19_f16:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvt s0, h0
+; CHECK-SD-CVT-NEXT:    mov w9, #262143 // =0x3ffff
+; CHECK-SD-CVT-NEXT:    fcvtzs w8, s0
+; CHECK-SD-CVT-NEXT:    cmp w8, w9
+; CHECK-SD-CVT-NEXT:    csel w8, w8, w9, lt
+; CHECK-SD-CVT-NEXT:    mov w9, #-262144 // =0xfffc0000
+; CHECK-SD-CVT-NEXT:    cmn w8, #64, lsl #12 // =262144
+; CHECK-SD-CVT-NEXT:    csel w0, w8, w9, gt
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_signed_i19_f16:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzs w8, h0
+; CHECK-SD-FP16-NEXT:    mov w9, #262143 // =0x3ffff
+; CHECK-SD-FP16-NEXT:    cmp w8, w9
+; CHECK-SD-FP16-NEXT:    csel w8, w8, w9, lt
+; CHECK-SD-FP16-NEXT:    mov w9, #-262144 // =0xfffc0000
+; CHECK-SD-FP16-NEXT:    cmn w8, #64, lsl #12 // =262144
+; CHECK-SD-FP16-NEXT:    csel w0, w8, w9, gt
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_i19_f16:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    mov w9, #262143 // =0x3ffff
+; CHECK-GI-CVT-NEXT:    fcvtzs w8, s0
+; CHECK-GI-CVT-NEXT:    cmp w8, w9
+; CHECK-GI-CVT-NEXT:    csel w8, w8, w9, lt
+; CHECK-GI-CVT-NEXT:    mov w9, #-262144 // =0xfffc0000
+; CHECK-GI-CVT-NEXT:    cmn w8, #64, lsl #12 // =262144
+; CHECK-GI-CVT-NEXT:    csel w0, w8, w9, gt
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_signed_i19_f16:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzs w8, h0
+; CHECK-GI-FP16-NEXT:    mov w9, #262143 // =0x3ffff
+; CHECK-GI-FP16-NEXT:    cmp w8, w9
+; CHECK-GI-FP16-NEXT:    csel w8, w8, w9, lt
+; CHECK-GI-FP16-NEXT:    mov w9, #-262144 // =0xfffc0000
+; CHECK-GI-FP16-NEXT:    cmn w8, #64, lsl #12 // =262144
+; CHECK-GI-FP16-NEXT:    csel w0, w8, w9, gt
+; CHECK-GI-FP16-NEXT:    ret
     %x = call i19 @llvm.fptosi.sat.i19.f16(half %f)
     ret i19 %x
 }
 
 define i32 @test_signed_i32_f16(half %f) nounwind {
-; CHECK-CVT-LABEL: test_signed_i32_f16:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    fcvtzs w0, s0
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_signed_i32_f16:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzs w0, h0
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_signed_i32_f16:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvt s0, h0
+; CHECK-SD-CVT-NEXT:    fcvtzs w0, s0
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_signed_i32_f16:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzs w0, h0
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_i32_f16:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    fcvtzs w0, s0
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_signed_i32_f16:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzs w0, h0
+; CHECK-GI-FP16-NEXT:    ret
     %x = call i32 @llvm.fptosi.sat.i32.f16(half %f)
     ret i32 %x
 }
 
 define i50 @test_signed_i50_f16(half %f) nounwind {
-; CHECK-CVT-LABEL: test_signed_i50_f16:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    mov x9, #562949953421311 // =0x1ffffffffffff
-; CHECK-CVT-NEXT:    fcvtzs x8, s0
-; CHECK-CVT-NEXT:    cmp x8, x9
-; CHECK-CVT-NEXT:    csel x8, x8, x9, lt
-; CHECK-CVT-NEXT:    mov x9, #-562949953421312 // =0xfffe000000000000
-; CHECK-CVT-NEXT:    cmp x8, x9
-; CHECK-CVT-NEXT:    csel x0, x8, x9, gt
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_signed_i50_f16:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzs x8, h0
-; CHECK-FP16-NEXT:    mov x9, #562949953421311 // =0x1ffffffffffff
-; CHECK-FP16-NEXT:    cmp x8, x9
-; CHECK-FP16-NEXT:    csel x8, x8, x9, lt
-; CHECK-FP16-NEXT:    mov x9, #-562949953421312 // =0xfffe000000000000
-; CHECK-FP16-NEXT:    cmp x8, x9
-; CHECK-FP16-NEXT:    csel x0, x8, x9, gt
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_signed_i50_f16:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvt s0, h0
+; CHECK-SD-CVT-NEXT:    mov x9, #562949953421311 // =0x1ffffffffffff
+; CHECK-SD-CVT-NEXT:    fcvtzs x8, s0
+; CHECK-SD-CVT-NEXT:    cmp x8, x9
+; CHECK-SD-CVT-NEXT:    csel x8, x8, x9, lt
+; CHECK-SD-CVT-NEXT:    mov x9, #-562949953421312 // =0xfffe000000000000
+; CHECK-SD-CVT-NEXT:    cmp x8, x9
+; CHECK-SD-CVT-NEXT:    csel x0, x8, x9, gt
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_signed_i50_f16:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzs x8, h0
+; CHECK-SD-FP16-NEXT:    mov x9, #562949953421311 // =0x1ffffffffffff
+; CHECK-SD-FP16-NEXT:    cmp x8, x9
+; CHECK-SD-FP16-NEXT:    csel x8, x8, x9, lt
+; CHECK-SD-FP16-NEXT:    mov x9, #-562949953421312 // =0xfffe000000000000
+; CHECK-SD-FP16-NEXT:    cmp x8, x9
+; CHECK-SD-FP16-NEXT:    csel x0, x8, x9, gt
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_i50_f16:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    mov x9, #562949953421311 // =0x1ffffffffffff
+; CHECK-GI-CVT-NEXT:    mov x10, #-562949953421312 // =0xfffe000000000000
+; CHECK-GI-CVT-NEXT:    fcvtzs x8, s0
+; CHECK-GI-CVT-NEXT:    cmp x8, x9
+; CHECK-GI-CVT-NEXT:    csel x8, x8, x9, lt
+; CHECK-GI-CVT-NEXT:    cmp x8, x10
+; CHECK-GI-CVT-NEXT:    csel x0, x8, x10, gt
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_signed_i50_f16:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzs x8, h0
+; CHECK-GI-FP16-NEXT:    mov x9, #562949953421311 // =0x1ffffffffffff
+; CHECK-GI-FP16-NEXT:    mov x10, #-562949953421312 // =0xfffe000000000000
+; CHECK-GI-FP16-NEXT:    cmp x8, x9
+; CHECK-GI-FP16-NEXT:    csel x8, x8, x9, lt
+; CHECK-GI-FP16-NEXT:    cmp x8, x10
+; CHECK-GI-FP16-NEXT:    csel x0, x8, x10, gt
+; CHECK-GI-FP16-NEXT:    ret
     %x = call i50 @llvm.fptosi.sat.i50.f16(half %f)
     ret i50 %x
 }
 
 define i64 @test_signed_i64_f16(half %f) nounwind {
-; CHECK-CVT-LABEL: test_signed_i64_f16:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    fcvtzs x0, s0
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_signed_i64_f16:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzs x0, h0
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_signed_i64_f16:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvt s0, h0
+; CHECK-SD-CVT-NEXT:    fcvtzs x0, s0
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_signed_i64_f16:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzs x0, h0
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_i64_f16:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    fcvtzs x0, s0
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_signed_i64_f16:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzs x0, h0
+; CHECK-GI-FP16-NEXT:    ret
     %x = call i64 @llvm.fptosi.sat.i64.f16(half %f)
     ret i64 %x
 }
 
 define i100 @test_signed_i100_f16(half %f) nounwind {
-; CHECK-LABEL: test_signed_i100_f16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    movi v0.2s, #241, lsl #24
-; CHECK-NEXT:    mov w8, #1895825407 // =0x70ffffff
-; CHECK-NEXT:    mov x10, #34359738367 // =0x7ffffffff
-; CHECK-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT:    fcmp s8, s0
-; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    mov x8, #-34359738368 // =0xfffffff800000000
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    csel x8, x8, x1, lt
-; CHECK-NEXT:    fcmp s8, s0
-; CHECK-NEXT:    csel x8, x10, x8, gt
-; CHECK-NEXT:    csinv x9, x9, xzr, le
-; CHECK-NEXT:    fcmp s8, s8
-; CHECK-NEXT:    csel x0, xzr, x9, vs
-; CHECK-NEXT:    csel x1, xzr, x8, vs
-; CHECK-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_i100_f16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixsfti
+; CHECK-SD-NEXT:    movi v0.2s, #241, lsl #24
+; CHECK-SD-NEXT:    mov w8, #1895825407 // =0x70ffffff
+; CHECK-SD-NEXT:    mov x10, #34359738367 // =0x7ffffffff
+; CHECK-SD-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    fcmp s8, s0
+; CHECK-SD-NEXT:    fmov s0, w8
+; CHECK-SD-NEXT:    mov x8, #-34359738368 // =0xfffffff800000000
+; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x8, x8, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s0
+; CHECK-SD-NEXT:    csel x8, x10, x8, gt
+; CHECK-SD-NEXT:    csinv x9, x9, xzr, le
+; CHECK-SD-NEXT:    fcmp s8, s8
+; CHECK-SD-NEXT:    csel x0, xzr, x9, vs
+; CHECK-SD-NEXT:    csel x1, xzr, x8, vs
+; CHECK-SD-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_i100_f16:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    mov x1, xzr
+; CHECK-GI-CVT-NEXT:    fcvtzs x0, s0
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_signed_i100_f16:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzs x0, h0
+; CHECK-GI-FP16-NEXT:    mov x1, xzr
+; CHECK-GI-FP16-NEXT:    ret
     %x = call i100 @llvm.fptosi.sat.i100.f16(half %f)
     ret i100 %x
 }
 
 define i128 @test_signed_i128_f16(half %f) nounwind {
-; CHECK-LABEL: test_signed_i128_f16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    movi v0.2s, #255, lsl #24
-; CHECK-NEXT:    mov w8, #2130706431 // =0x7effffff
-; CHECK-NEXT:    mov x10, #9223372036854775807 // =0x7fffffffffffffff
-; CHECK-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT:    fcmp s8, s0
-; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    mov x8, #-9223372036854775808 // =0x8000000000000000
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    csel x8, x8, x1, lt
-; CHECK-NEXT:    fcmp s8, s0
-; CHECK-NEXT:    csel x8, x10, x8, gt
-; CHECK-NEXT:    csinv x9, x9, xzr, le
-; CHECK-NEXT:    fcmp s8, s8
-; CHECK-NEXT:    csel x0, xzr, x9, vs
-; CHECK-NEXT:    csel x1, xzr, x8, vs
-; CHECK-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_i128_f16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixsfti
+; CHECK-SD-NEXT:    movi v0.2s, #255, lsl #24
+; CHECK-SD-NEXT:    mov w8, #2130706431 // =0x7effffff
+; CHECK-SD-NEXT:    mov x10, #9223372036854775807 // =0x7fffffffffffffff
+; CHECK-SD-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    fcmp s8, s0
+; CHECK-SD-NEXT:    fmov s0, w8
+; CHECK-SD-NEXT:    mov x8, #-9223372036854775808 // =0x8000000000000000
+; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x8, x8, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s0
+; CHECK-SD-NEXT:    csel x8, x10, x8, gt
+; CHECK-SD-NEXT:    csinv x9, x9, xzr, le
+; CHECK-SD-NEXT:    fcmp s8, s8
+; CHECK-SD-NEXT:    csel x0, xzr, x9, vs
+; CHECK-SD-NEXT:    csel x1, xzr, x8, vs
+; CHECK-SD-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_i128_f16:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    mov x1, xzr
+; CHECK-GI-CVT-NEXT:    fcvtzs x0, s0
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_signed_i128_f16:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzs x0, h0
+; CHECK-GI-FP16-NEXT:    mov x1, xzr
+; CHECK-GI-FP16-NEXT:    ret
     %x = call i128 @llvm.fptosi.sat.i128.f16(half %f)
     ret i128 %x
 }
diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
index 1e1e7327f71fdc..ce6eba88838349 100644
--- a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
+++ b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-CVT
-; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
+; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-CVT
+; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16
+; RUN: llc < %s -mtriple=aarch64 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-CVT
+; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16
 
 ;
 ; 32-bit float to unsigned integer
@@ -18,12 +20,20 @@ declare i100 @llvm.fptoui.sat.i100.f32(float)
 declare i128 @llvm.fptoui.sat.i128.f32(float)
 
 define i1 @test_unsigned_i1_f32(float %f) nounwind {
-; CHECK-LABEL: test_unsigned_i1_f32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu w8, s0
-; CHECK-NEXT:    cmp w8, #1
-; CHECK-NEXT:    csinc w0, w8, wzr, lo
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_i1_f32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu w8, s0
+; CHECK-SD-NEXT:    cmp w8, #1
+; CHECK-SD-NEXT:    csinc w0, w8, wzr, lo
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_i1_f32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcvtzu w8, s0
+; CHECK-GI-NEXT:    cmp w8, #1
+; CHECK-GI-NEXT:    csinc w8, w8, wzr, lo
+; CHECK-GI-NEXT:    and w0, w8, #0x1
+; CHECK-GI-NEXT:    ret
     %x = call i1 @llvm.fptoui.sat.i1.f32(float %f)
     ret i1 %x
 }
@@ -107,46 +117,83 @@ define i64 @test_unsigned_i64_f32(float %f) nounwind {
 }
 
 define i100 @test_unsigned_i100_f32(float %f) nounwind {
-; CHECK-LABEL: test_unsigned_i100_f32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
-; CHECK-NEXT:    fmov s8, s0
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    mov w8, #1904214015 // =0x717fffff
-; CHECK-NEXT:    fcmp s8, #0.0
-; CHECK-NEXT:    mov x10, #68719476735 // =0xfffffffff
-; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, xzr, x1, lt
-; CHECK-NEXT:    fcmp s8, s0
-; CHECK-NEXT:    csel x1, x10, x9, gt
-; CHECK-NEXT:    csinv x0, x8, xzr, le
-; CHECK-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_i100_f32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    fmov s8, s0
+; CHECK-SD-NEXT:    bl __fixunssfti
+; CHECK-SD-NEXT:    mov w8, #1904214015 // =0x717fffff
+; CHECK-SD-NEXT:    fcmp s8, #0.0
+; CHECK-SD-NEXT:    mov x10, #68719476735 // =0xfffffffff
+; CHECK-SD-NEXT:    fmov s0, w8
+; CHECK-SD-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s0
+; CHECK-SD-NEXT:    csel x1, x10, x9, gt
+; CHECK-SD-NEXT:    csinv x0, x8, xzr, le
+; CHECK-SD-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_i100_f32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-GI-NEXT:    fmov s8, s0
+; CHECK-GI-NEXT:    bl __fixunssfti
+; CHECK-GI-NEXT:    mov w8, #1904214015 // =0x717fffff
+; CHECK-GI-NEXT:    fcmp s8, #0.0
+; CHECK-GI-NEXT:    mov x10, #68719476735 // =0xfffffffff
+; CHECK-GI-NEXT:    fmov s0, w8
+; CHECK-GI-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    csel x8, xzr, x0, lt
+; CHECK-GI-NEXT:    csel x9, xzr, x1, lt
+; CHECK-GI-NEXT:    fcmp s8, s0
+; CHECK-GI-NEXT:    csinv x0, x8, xzr, le
+; CHECK-GI-NEXT:    csel x1, x10, x9, gt
+; CHECK-GI-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
     %x = call i100 @llvm.fptoui.sat.i100.f32(float %f)
     ret i100 %x
 }
 
 define i128 @test_unsigned_i128_f32(float %f) nounwind {
-; CHECK-LABEL: test_unsigned_i128_f32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
-; CHECK-NEXT:    fmov s8, s0
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    mov w8, #2139095039 // =0x7f7fffff
-; CHECK-NEXT:    fcmp s8, #0.0
-; CHECK-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    csel x8, xzr, x1, lt
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    fcmp s8, s0
-; CHECK-NEXT:    csinv x0, x9, xzr, le
-; CHECK-NEXT:    csinv x1, x8, xzr, le
-; CHECK-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_i128_f32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    fmov s8, s0
+; CHECK-SD-NEXT:    bl __fixunssfti
+; CHECK-SD-NEXT:    mov w8, #2139095039 // =0x7f7fffff
+; CHECK-SD-NEXT:    fcmp s8, #0.0
+; CHECK-SD-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    fmov s0, w8
+; CHECK-SD-NEXT:    csel x8, xzr, x1, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
+; CHECK-SD-NEXT:    fcmp s8, s0
+; CHECK-SD-NEXT:    csinv x0, x9, xzr, le
+; CHECK-SD-NEXT:    csinv x1, x8, xzr, le
+; CHECK-SD-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_i128_f32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-GI-NEXT:    fmov s8, s0
+; CHECK-GI-NEXT:    bl __fixunssfti
+; CHECK-GI-NEXT:    mov w8, #2139095039 // =0x7f7fffff
+; CHECK-GI-NEXT:    fcmp s8, #0.0
+; CHECK-GI-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    fmov s0, w8
+; CHECK-GI-NEXT:    csel x8, xzr, x0, lt
+; CHECK-GI-NEXT:    csel x9, xzr, x1, lt
+; CHECK-GI-NEXT:    fcmp s8, s0
+; CHECK-GI-NEXT:    csinv x0, x8, xzr, le
+; CHECK-GI-NEXT:    csinv x1, x9, xzr, le
+; CHECK-GI-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
     %x = call i128 @llvm.fptoui.sat.i128.f32(float %f)
     ret i128 %x
 }
@@ -167,12 +214,20 @@ declare i100 @llvm.fptoui.sat.i100.f64(double)
 declare i128 @llvm.fptoui.sat.i128.f64(double)
 
 define i1 @test_unsigned_i1_f64(double %f) nounwind {
-; CHECK-LABEL: test_unsigned_i1_f64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu w8, d0
-; CHECK-NEXT:    cmp w8, #1
-; CHECK-NEXT:    csinc w0, w8, wzr, lo
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_i1_f64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu w8, d0
+; CHECK-SD-NEXT:    cmp w8, #1
+; CHECK-SD-NEXT:    csinc w0, w8, wzr, lo
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_i1_f64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcvtzu w8, d0
+; CHECK-GI-NEXT:    cmp w8, #1
+; CHECK-GI-NEXT:    csinc w8, w8, wzr, lo
+; CHECK-GI-NEXT:    and w0, w8, #0x1
+; CHECK-GI-NEXT:    ret
     %x = call i1 @llvm.fptoui.sat.i1.f64(double %f)
     ret i1 %x
 }
@@ -256,46 +311,83 @@ define i64 @test_unsigned_i64_f64(double %f) nounwind {
 }
 
 define i100 @test_unsigned_i100_f64(double %f) nounwind {
-; CHECK-LABEL: test_unsigned_i100_f64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
-; CHECK-NEXT:    fmov d8, d0
-; CHECK-NEXT:    bl __fixunsdfti
-; CHECK-NEXT:    mov x8, #5057542381537067007 // =0x462fffffffffffff
-; CHECK-NEXT:    fcmp d8, #0.0
-; CHECK-NEXT:    mov x10, #68719476735 // =0xfffffffff
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, xzr, x1, lt
-; CHECK-NEXT:    fcmp d8, d0
-; CHECK-NEXT:    csel x1, x10, x9, gt
-; CHECK-NEXT:    csinv x0, x8, xzr, le
-; CHECK-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_i100_f64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    fmov d8, d0
+; CHECK-SD-NEXT:    bl __fixunsdfti
+; CHECK-SD-NEXT:    mov x8, #5057542381537067007 // =0x462fffffffffffff
+; CHECK-SD-NEXT:    fcmp d8, #0.0
+; CHECK-SD-NEXT:    mov x10, #68719476735 // =0xfffffffff
+; CHECK-SD-NEXT:    fmov d0, x8
+; CHECK-SD-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x1, lt
+; CHECK-SD-NEXT:    fcmp d8, d0
+; CHECK-SD-NEXT:    csel x1, x10, x9, gt
+; CHECK-SD-NEXT:    csinv x0, x8, xzr, le
+; CHECK-SD-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_i100_f64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-GI-NEXT:    fmov d8, d0
+; CHECK-GI-NEXT:    bl __fixunsdfti
+; CHECK-GI-NEXT:    mov x8, #5057542381537067007 // =0x462fffffffffffff
+; CHECK-GI-NEXT:    fcmp d8, #0.0
+; CHECK-GI-NEXT:    mov x10, #68719476735 // =0xfffffffff
+; CHECK-GI-NEXT:    fmov d0, x8
+; CHECK-GI-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    csel x8, xzr, x0, lt
+; CHECK-GI-NEXT:    csel x9, xzr, x1, lt
+; CHECK-GI-NEXT:    fcmp d8, d0
+; CHECK-GI-NEXT:    csinv x0, x8, xzr, le
+; CHECK-GI-NEXT:    csel x1, x10, x9, gt
+; CHECK-GI-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
     %x = call i100 @llvm.fptoui.sat.i100.f64(double %f)
     ret i100 %x
 }
 
 define i128 @test_unsigned_i128_f64(double %f) nounwind {
-; CHECK-LABEL: test_unsigned_i128_f64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
-; CHECK-NEXT:    fmov d8, d0
-; CHECK-NEXT:    bl __fixunsdfti
-; CHECK-NEXT:    mov x8, #5183643171103440895 // =0x47efffffffffffff
-; CHECK-NEXT:    fcmp d8, #0.0
-; CHECK-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    csel x8, xzr, x1, lt
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    fcmp d8, d0
-; CHECK-NEXT:    csinv x0, x9, xzr, le
-; CHECK-NEXT:    csinv x1, x8, xzr, le
-; CHECK-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_i128_f64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    fmov d8, d0
+; CHECK-SD-NEXT:    bl __fixunsdfti
+; CHECK-SD-NEXT:    mov x8, #5183643171103440895 // =0x47efffffffffffff
+; CHECK-SD-NEXT:    fcmp d8, #0.0
+; CHECK-SD-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    fmov d0, x8
+; CHECK-SD-NEXT:    csel x8, xzr, x1, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
+; CHECK-SD-NEXT:    fcmp d8, d0
+; CHECK-SD-NEXT:    csinv x0, x9, xzr, le
+; CHECK-SD-NEXT:    csinv x1, x8, xzr, le
+; CHECK-SD-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_i128_f64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-GI-NEXT:    fmov d8, d0
+; CHECK-GI-NEXT:    bl __fixunsdfti
+; CHECK-GI-NEXT:    mov x8, #5183643171103440895 // =0x47efffffffffffff
+; CHECK-GI-NEXT:    fcmp d8, #0.0
+; CHECK-GI-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    fmov d0, x8
+; CHECK-GI-NEXT:    csel x8, xzr, x0, lt
+; CHECK-GI-NEXT:    csel x9, xzr, x1, lt
+; CHECK-GI-NEXT:    fcmp d8, d0
+; CHECK-GI-NEXT:    csinv x0, x8, xzr, le
+; CHECK-GI-NEXT:    csinv x1, x9, xzr, le
+; CHECK-GI-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
     %x = call i128 @llvm.fptoui.sat.i128.f64(double %f)
     ret i128 %x
 }
@@ -316,202 +408,352 @@ declare i100 @llvm.fptoui.sat.i100.f16(half)
 declare i128 @llvm.fptoui.sat.i128.f16(half)
 
 define i1 @test_unsigned_i1_f16(half %f) nounwind {
-; CHECK-CVT-LABEL: test_unsigned_i1_f16:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    fcvtzu w8, s0
-; CHECK-CVT-NEXT:    cmp w8, #1
-; CHECK-CVT-NEXT:    csinc w0, w8, wzr, lo
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_unsigned_i1_f16:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzu w8, h0
-; CHECK-FP16-NEXT:    cmp w8, #1
-; CHECK-FP16-NEXT:    csinc w0, w8, wzr, lo
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_unsigned_i1_f16:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvt s0, h0
+; CHECK-SD-CVT-NEXT:    fcvtzu w8, s0
+; CHECK-SD-CVT-NEXT:    cmp w8, #1
+; CHECK-SD-CVT-NEXT:    csinc w0, w8, wzr, lo
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_unsigned_i1_f16:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzu w8, h0
+; CHECK-SD-FP16-NEXT:    cmp w8, #1
+; CHECK-SD-FP16-NEXT:    csinc w0, w8, wzr, lo
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_i1_f16:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    fcvtzu w8, s0
+; CHECK-GI-CVT-NEXT:    cmp w8, #1
+; CHECK-GI-CVT-NEXT:    csinc w8, w8, wzr, lo
+; CHECK-GI-CVT-NEXT:    and w0, w8, #0x1
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_unsigned_i1_f16:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzu w8, h0
+; CHECK-GI-FP16-NEXT:    cmp w8, #1
+; CHECK-GI-FP16-NEXT:    csinc w8, w8, wzr, lo
+; CHECK-GI-FP16-NEXT:    and w0, w8, #0x1
+; CHECK-GI-FP16-NEXT:    ret
     %x = call i1 @llvm.fptoui.sat.i1.f16(half %f)
     ret i1 %x
 }
 
 define i8 @test_unsigned_i8_f16(half %f) nounwind {
-; CHECK-CVT-LABEL: test_unsigned_i8_f16:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    mov w8, #255 // =0xff
-; CHECK-CVT-NEXT:    fcvtzu w9, s0
-; CHECK-CVT-NEXT:    cmp w9, #255
-; CHECK-CVT-NEXT:    csel w0, w9, w8, lo
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_unsigned_i8_f16:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzu w9, h0
-; CHECK-FP16-NEXT:    mov w8, #255 // =0xff
-; CHECK-FP16-NEXT:    cmp w9, #255
-; CHECK-FP16-NEXT:    csel w0, w9, w8, lo
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_unsigned_i8_f16:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvt s0, h0
+; CHECK-SD-CVT-NEXT:    mov w8, #255 // =0xff
+; CHECK-SD-CVT-NEXT:    fcvtzu w9, s0
+; CHECK-SD-CVT-NEXT:    cmp w9, #255
+; CHECK-SD-CVT-NEXT:    csel w0, w9, w8, lo
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_unsigned_i8_f16:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzu w9, h0
+; CHECK-SD-FP16-NEXT:    mov w8, #255 // =0xff
+; CHECK-SD-FP16-NEXT:    cmp w9, #255
+; CHECK-SD-FP16-NEXT:    csel w0, w9, w8, lo
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_i8_f16:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    mov w8, #255 // =0xff
+; CHECK-GI-CVT-NEXT:    fcvtzu w9, s0
+; CHECK-GI-CVT-NEXT:    cmp w9, #255
+; CHECK-GI-CVT-NEXT:    csel w0, w9, w8, lo
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_unsigned_i8_f16:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzu w9, h0
+; CHECK-GI-FP16-NEXT:    mov w8, #255 // =0xff
+; CHECK-GI-FP16-NEXT:    cmp w9, #255
+; CHECK-GI-FP16-NEXT:    csel w0, w9, w8, lo
+; CHECK-GI-FP16-NEXT:    ret
     %x = call i8 @llvm.fptoui.sat.i8.f16(half %f)
     ret i8 %x
 }
 
 define i13 @test_unsigned_i13_f16(half %f) nounwind {
-; CHECK-CVT-LABEL: test_unsigned_i13_f16:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    mov w9, #8191 // =0x1fff
-; CHECK-CVT-NEXT:    fcvtzu w8, s0
-; CHECK-CVT-NEXT:    cmp w8, w9
-; CHECK-CVT-NEXT:    csel w0, w8, w9, lo
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_unsigned_i13_f16:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzu w8, h0
-; CHECK-FP16-NEXT:    mov w9, #8191 // =0x1fff
-; CHECK-FP16-NEXT:    cmp w8, w9
-; CHECK-FP16-NEXT:    csel w0, w8, w9, lo
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_unsigned_i13_f16:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvt s0, h0
+; CHECK-SD-CVT-NEXT:    mov w9, #8191 // =0x1fff
+; CHECK-SD-CVT-NEXT:    fcvtzu w8, s0
+; CHECK-SD-CVT-NEXT:    cmp w8, w9
+; CHECK-SD-CVT-NEXT:    csel w0, w8, w9, lo
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_unsigned_i13_f16:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzu w8, h0
+; CHECK-SD-FP16-NEXT:    mov w9, #8191 // =0x1fff
+; CHECK-SD-FP16-NEXT:    cmp w8, w9
+; CHECK-SD-FP16-NEXT:    csel w0, w8, w9, lo
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_i13_f16:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    mov w9, #8191 // =0x1fff
+; CHECK-GI-CVT-NEXT:    fcvtzu w8, s0
+; CHECK-GI-CVT-NEXT:    cmp w8, w9
+; CHECK-GI-CVT-NEXT:    csel w0, w8, w9, lo
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_unsigned_i13_f16:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzu w8, h0
+; CHECK-GI-FP16-NEXT:    mov w9, #8191 // =0x1fff
+; CHECK-GI-FP16-NEXT:    cmp w8, w9
+; CHECK-GI-FP16-NEXT:    csel w0, w8, w9, lo
+; CHECK-GI-FP16-NEXT:    ret
     %x = call i13 @llvm.fptoui.sat.i13.f16(half %f)
     ret i13 %x
 }
 
 define i16 @test_unsigned_i16_f16(half %f) nounwind {
-; CHECK-CVT-LABEL: test_unsigned_i16_f16:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    mov w9, #65535 // =0xffff
-; CHECK-CVT-NEXT:    fcvtzu w8, s0
-; CHECK-CVT-NEXT:    cmp w8, w9
-; CHECK-CVT-NEXT:    csel w0, w8, w9, lo
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_unsigned_i16_f16:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzu w8, h0
-; CHECK-FP16-NEXT:    mov w9, #65535 // =0xffff
-; CHECK-FP16-NEXT:    cmp w8, w9
-; CHECK-FP16-NEXT:    csel w0, w8, w9, lo
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_unsigned_i16_f16:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvt s0, h0
+; CHECK-SD-CVT-NEXT:    mov w9, #65535 // =0xffff
+; CHECK-SD-CVT-NEXT:    fcvtzu w8, s0
+; CHECK-SD-CVT-NEXT:    cmp w8, w9
+; CHECK-SD-CVT-NEXT:    csel w0, w8, w9, lo
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_unsigned_i16_f16:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzu w8, h0
+; CHECK-SD-FP16-NEXT:    mov w9, #65535 // =0xffff
+; CHECK-SD-FP16-NEXT:    cmp w8, w9
+; CHECK-SD-FP16-NEXT:    csel w0, w8, w9, lo
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_i16_f16:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    mov w9, #65535 // =0xffff
+; CHECK-GI-CVT-NEXT:    fcvtzu w8, s0
+; CHECK-GI-CVT-NEXT:    cmp w8, w9
+; CHECK-GI-CVT-NEXT:    csel w0, w8, w9, lo
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_unsigned_i16_f16:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzu w8, h0
+; CHECK-GI-FP16-NEXT:    mov w9, #65535 // =0xffff
+; CHECK-GI-FP16-NEXT:    cmp w8, w9
+; CHECK-GI-FP16-NEXT:    csel w0, w8, w9, lo
+; CHECK-GI-FP16-NEXT:    ret
     %x = call i16 @llvm.fptoui.sat.i16.f16(half %f)
     ret i16 %x
 }
 
 define i19 @test_unsigned_i19_f16(half %f) nounwind {
-; CHECK-CVT-LABEL: test_unsigned_i19_f16:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    mov w9, #524287 // =0x7ffff
-; CHECK-CVT-NEXT:    fcvtzu w8, s0
-; CHECK-CVT-NEXT:    cmp w8, w9
-; CHECK-CVT-NEXT:    csel w0, w8, w9, lo
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_unsigned_i19_f16:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzu w8, h0
-; CHECK-FP16-NEXT:    mov w9, #524287 // =0x7ffff
-; CHECK-FP16-NEXT:    cmp w8, w9
-; CHECK-FP16-NEXT:    csel w0, w8, w9, lo
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_unsigned_i19_f16:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvt s0, h0
+; CHECK-SD-CVT-NEXT:    mov w9, #524287 // =0x7ffff
+; CHECK-SD-CVT-NEXT:    fcvtzu w8, s0
+; CHECK-SD-CVT-NEXT:    cmp w8, w9
+; CHECK-SD-CVT-NEXT:    csel w0, w8, w9, lo
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_unsigned_i19_f16:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzu w8, h0
+; CHECK-SD-FP16-NEXT:    mov w9, #524287 // =0x7ffff
+; CHECK-SD-FP16-NEXT:    cmp w8, w9
+; CHECK-SD-FP16-NEXT:    csel w0, w8, w9, lo
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_i19_f16:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    mov w9, #524287 // =0x7ffff
+; CHECK-GI-CVT-NEXT:    fcvtzu w8, s0
+; CHECK-GI-CVT-NEXT:    cmp w8, w9
+; CHECK-GI-CVT-NEXT:    csel w0, w8, w9, lo
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_unsigned_i19_f16:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzu w8, h0
+; CHECK-GI-FP16-NEXT:    mov w9, #524287 // =0x7ffff
+; CHECK-GI-FP16-NEXT:    cmp w8, w9
+; CHECK-GI-FP16-NEXT:    csel w0, w8, w9, lo
+; CHECK-GI-FP16-NEXT:    ret
     %x = call i19 @llvm.fptoui.sat.i19.f16(half %f)
     ret i19 %x
 }
 
 define i32 @test_unsigned_i32_f16(half %f) nounwind {
-; CHECK-CVT-LABEL: test_unsigned_i32_f16:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    fcvtzu w0, s0
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_unsigned_i32_f16:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzu w0, h0
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_unsigned_i32_f16:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvt s0, h0
+; CHECK-SD-CVT-NEXT:    fcvtzu w0, s0
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_unsigned_i32_f16:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzu w0, h0
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_i32_f16:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    fcvtzu w0, s0
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_unsigned_i32_f16:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzu w0, h0
+; CHECK-GI-FP16-NEXT:    ret
     %x = call i32 @llvm.fptoui.sat.i32.f16(half %f)
     ret i32 %x
 }
 
 define i50 @test_unsigned_i50_f16(half %f) nounwind {
-; CHECK-CVT-LABEL: test_unsigned_i50_f16:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    mov x9, #1125899906842623 // =0x3ffffffffffff
-; CHECK-CVT-NEXT:    fcvtzu x8, s0
-; CHECK-CVT-NEXT:    cmp x8, x9
-; CHECK-CVT-NEXT:    csel x0, x8, x9, lo
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_unsigned_i50_f16:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzu x8, h0
-; CHECK-FP16-NEXT:    mov x9, #1125899906842623 // =0x3ffffffffffff
-; CHECK-FP16-NEXT:    cmp x8, x9
-; CHECK-FP16-NEXT:    csel x0, x8, x9, lo
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_unsigned_i50_f16:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvt s0, h0
+; CHECK-SD-CVT-NEXT:    mov x9, #1125899906842623 // =0x3ffffffffffff
+; CHECK-SD-CVT-NEXT:    fcvtzu x8, s0
+; CHECK-SD-CVT-NEXT:    cmp x8, x9
+; CHECK-SD-CVT-NEXT:    csel x0, x8, x9, lo
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_unsigned_i50_f16:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzu x8, h0
+; CHECK-SD-FP16-NEXT:    mov x9, #1125899906842623 // =0x3ffffffffffff
+; CHECK-SD-FP16-NEXT:    cmp x8, x9
+; CHECK-SD-FP16-NEXT:    csel x0, x8, x9, lo
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_i50_f16:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    mov x9, #1125899906842623 // =0x3ffffffffffff
+; CHECK-GI-CVT-NEXT:    fcvtzu x8, s0
+; CHECK-GI-CVT-NEXT:    cmp x8, x9
+; CHECK-GI-CVT-NEXT:    csel x0, x8, x9, lo
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_unsigned_i50_f16:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzu x8, h0
+; CHECK-GI-FP16-NEXT:    mov x9, #1125899906842623 // =0x3ffffffffffff
+; CHECK-GI-FP16-NEXT:    cmp x8, x9
+; CHECK-GI-FP16-NEXT:    csel x0, x8, x9, lo
+; CHECK-GI-FP16-NEXT:    ret
     %x = call i50 @llvm.fptoui.sat.i50.f16(half %f)
     ret i50 %x
 }
 
 define i64 @test_unsigned_i64_f16(half %f) nounwind {
-; CHECK-CVT-LABEL: test_unsigned_i64_f16:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    fcvtzu x0, s0
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: test_unsigned_i64_f16:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzu x0, h0
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_unsigned_i64_f16:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvt s0, h0
+; CHECK-SD-CVT-NEXT:    fcvtzu x0, s0
+; CHECK-SD-CVT-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: test_unsigned_i64_f16:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzu x0, h0
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_i64_f16:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    fcvtzu x0, s0
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_unsigned_i64_f16:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzu x0, h0
+; CHECK-GI-FP16-NEXT:    ret
     %x = call i64 @llvm.fptoui.sat.i64.f16(half %f)
     ret i64 %x
 }
 
 define i100 @test_unsigned_i100_f16(half %f) nounwind {
-; CHECK-LABEL: test_unsigned_i100_f16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    mov w8, #1904214015 // =0x717fffff
-; CHECK-NEXT:    fcmp s8, #0.0
-; CHECK-NEXT:    mov x10, #68719476735 // =0xfffffffff
-; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, xzr, x1, lt
-; CHECK-NEXT:    fcmp s8, s0
-; CHECK-NEXT:    csel x1, x10, x9, gt
-; CHECK-NEXT:    csinv x0, x8, xzr, le
-; CHECK-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_i100_f16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixunssfti
+; CHECK-SD-NEXT:    mov w8, #1904214015 // =0x717fffff
+; CHECK-SD-NEXT:    fcmp s8, #0.0
+; CHECK-SD-NEXT:    mov x10, #68719476735 // =0xfffffffff
+; CHECK-SD-NEXT:    fmov s0, w8
+; CHECK-SD-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s0
+; CHECK-SD-NEXT:    csel x1, x10, x9, gt
+; CHECK-SD-NEXT:    csinv x0, x8, xzr, le
+; CHECK-SD-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_i100_f16:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    mov x1, xzr
+; CHECK-GI-CVT-NEXT:    fcvtzu x0, s0
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_unsigned_i100_f16:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzu x0, h0
+; CHECK-GI-FP16-NEXT:    mov x1, xzr
+; CHECK-GI-FP16-NEXT:    ret
     %x = call i100 @llvm.fptoui.sat.i100.f16(half %f)
     ret i100 %x
 }
 
 define i128 @test_unsigned_i128_f16(half %f) nounwind {
-; CHECK-LABEL: test_unsigned_i128_f16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    mov w8, #2139095039 // =0x7f7fffff
-; CHECK-NEXT:    fcmp s8, #0.0
-; CHECK-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    csel x8, xzr, x1, lt
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    fcmp s8, s0
-; CHECK-NEXT:    csinv x0, x9, xzr, le
-; CHECK-NEXT:    csinv x1, x8, xzr, le
-; CHECK-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_i128_f16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixunssfti
+; CHECK-SD-NEXT:    mov w8, #2139095039 // =0x7f7fffff
+; CHECK-SD-NEXT:    fcmp s8, #0.0
+; CHECK-SD-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    fmov s0, w8
+; CHECK-SD-NEXT:    csel x8, xzr, x1, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
+; CHECK-SD-NEXT:    fcmp s8, s0
+; CHECK-SD-NEXT:    csinv x0, x9, xzr, le
+; CHECK-SD-NEXT:    csinv x1, x8, xzr, le
+; CHECK-SD-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_i128_f16:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    mov x1, xzr
+; CHECK-GI-CVT-NEXT:    fcvtzu x0, s0
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_unsigned_i128_f16:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    fcvtzu x0, h0
+; CHECK-GI-FP16-NEXT:    mov x1, xzr
+; CHECK-GI-FP16-NEXT:    ret
     %x = call i128 @llvm.fptoui.sat.i128.f16(half %f)
     ret i128 %x
 }
diff --git a/llvm/test/TableGen/GlobalISelEmitter.td b/llvm/test/TableGen/GlobalISelEmitter.td
index 853831366fa531..b9aea33ac96aaa 100644
--- a/llvm/test/TableGen/GlobalISelEmitter.td
+++ b/llvm/test/TableGen/GlobalISelEmitter.td
@@ -513,7 +513,7 @@ def : Pat<(frag GPR32:$src1, complex:$src2, complex:$src3),
 // R00O-NEXT:  GIM_Reject,
 // R00O:       // Label [[DEFAULT_NUM]]: @[[DEFAULT]]
 // R00O-NEXT:  GIM_Reject,
-// R00O-NEXT:  }; // Size: 1816 bytes
+// R00O-NEXT:  }; // Size: 1824 bytes
 
 def INSNBOB : I<(outs GPR32:$dst), (ins GPR32:$src1, GPR32:$src2, GPR32:$src3, GPR32:$src4),
                  [(set GPR32:$dst,

>From 627e6161ef872ef18da188be62c040f1cbfd74f5 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Thu, 5 Sep 2024 19:31:41 +0100
Subject: [PATCH 2/3] Add tablegen patterns

---
 .../Target/GlobalISel/SelectionDAGCompat.td   |  4 +--
 .../include/llvm/Target/TargetSelectionDAG.td |  2 ++
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   | 36 +++++++++++++++++--
 .../GISel/AArch64InstructionSelector.cpp      |  6 ----
 4 files changed, 37 insertions(+), 11 deletions(-)

diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index 627c57429941ea..bbe8aa93bf32b6 100644
--- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -98,8 +98,8 @@ def : GINodeEquiv<G_FPTOSI, fp_to_sint>;
 def : GINodeEquiv<G_FPTOUI, fp_to_uint>;
 def : GINodeEquiv<G_SITOFP, sint_to_fp>;
 def : GINodeEquiv<G_UITOFP, uint_to_fp>;
-def : GINodeEquiv<G_FPTOSI_SAT, fp_to_sint_sat>;
-def : GINodeEquiv<G_FPTOUI_SAT, fp_to_uint_sat>;
+def : GINodeEquiv<G_FPTOSI_SAT, fp_to_sint_sat_gi>;
+def : GINodeEquiv<G_FPTOUI_SAT, fp_to_uint_sat_gi>;
 def : GINodeEquiv<G_FADD, fadd>;
 def : GINodeEquiv<G_FSUB, fsub>;
 def : GINodeEquiv<G_FMA, fma>;
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index dd79002dcbdb48..abb1cc205358f7 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -569,6 +569,8 @@ def fp_to_sint : SDNode<"ISD::FP_TO_SINT" , SDTFPToIntOp>;
 def fp_to_uint : SDNode<"ISD::FP_TO_UINT" , SDTFPToIntOp>;
 def fp_to_sint_sat : SDNode<"ISD::FP_TO_SINT_SAT" , SDTFPToIntSatOp>;
 def fp_to_uint_sat : SDNode<"ISD::FP_TO_UINT_SAT" , SDTFPToIntSatOp>;
+def fp_to_sint_sat_gi : SDNode<"ISD::FP_TO_SINT_SAT" , SDTFPToIntOp>;
+def fp_to_uint_sat_gi : SDNode<"ISD::FP_TO_UINT_SAT" , SDTFPToIntOp>;
 def f16_to_fp  : SDNode<"ISD::FP16_TO_FP" , SDTIntToFPOp>;
 def fp_to_f16  : SDNode<"ISD::FP_TO_FP16" , SDTFPToIntOp>;
 def bf16_to_fp  : SDNode<"ISD::BF16_TO_FP" , SDTIntToFPOp>;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index ccef85bfaa8afc..32fa4af9ca7171 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -4724,7 +4724,7 @@ defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>;
 defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>;
 
 // AArch64's FCVT instructions saturate when out of range.
-multiclass FPToIntegerSatPats<SDNode to_int_sat, string INST> {
+multiclass FPToIntegerSatPats<SDNode to_int_sat, SDNode to_int_sat_gi, string INST> {
   let Predicates = [HasFullFP16] in {
   def : Pat<(i32 (to_int_sat f16:$Rn, i32)),
             (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
@@ -4740,6 +4740,21 @@ multiclass FPToIntegerSatPats<SDNode to_int_sat, string INST> {
   def : Pat<(i64 (to_int_sat f64:$Rn, i64)),
             (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
 
+  let Predicates = [HasFullFP16] in {
+  def : Pat<(i32 (to_int_sat_gi f16:$Rn)),
+            (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
+  def : Pat<(i64 (to_int_sat_gi f16:$Rn)),
+            (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
+  }
+  def : Pat<(i32 (to_int_sat_gi f32:$Rn)),
+            (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
+  def : Pat<(i64 (to_int_sat_gi f32:$Rn)),
+            (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
+  def : Pat<(i32 (to_int_sat_gi f64:$Rn)),
+            (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
+  def : Pat<(i64 (to_int_sat_gi f64:$Rn)),
+            (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
+
   let Predicates = [HasFullFP16] in {
   def : Pat<(i32 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i32:$scale), i32)),
             (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
@@ -4754,10 +4769,25 @@ multiclass FPToIntegerSatPats<SDNode to_int_sat, string INST> {
             (!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
   def : Pat<(i64 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i64:$scale), i64)),
             (!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
+
+  let Predicates = [HasFullFP16] in {
+  def : Pat<(i32 (to_int_sat_gi (fmul f16:$Rn, fixedpoint_f16_i32:$scale))),
+            (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
+  def : Pat<(i64 (to_int_sat_gi (fmul f16:$Rn, fixedpoint_f16_i64:$scale))),
+            (!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
+  }
+  def : Pat<(i32 (to_int_sat_gi (fmul f32:$Rn, fixedpoint_f32_i32:$scale))),
+            (!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
+  def : Pat<(i64 (to_int_sat_gi (fmul f32:$Rn, fixedpoint_f32_i64:$scale))),
+            (!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
+  def : Pat<(i32 (to_int_sat_gi (fmul f64:$Rn, fixedpoint_f64_i32:$scale))),
+            (!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
+  def : Pat<(i64 (to_int_sat_gi (fmul f64:$Rn, fixedpoint_f64_i64:$scale))),
+            (!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
 }
 
-defm : FPToIntegerSatPats<fp_to_sint_sat, "FCVTZS">;
-defm : FPToIntegerSatPats<fp_to_uint_sat, "FCVTZU">;
+defm : FPToIntegerSatPats<fp_to_sint_sat, fp_to_sint_sat_gi, "FCVTZS">;
+defm : FPToIntegerSatPats<fp_to_uint_sat, fp_to_uint_sat_gi, "FCVTZU">;
 
 multiclass FPToIntegerIntPats<Intrinsic round, string INST> {
   let Predicates = [HasFullFP16] in {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index accadfd352f30e..18361cf3685642 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -2152,12 +2152,6 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
     }
     return false;
   }
-  case TargetOpcode::G_FPTOSI_SAT:
-    I.setDesc(TII.get(TargetOpcode::G_FPTOSI));
-    return true;
-  case TargetOpcode::G_FPTOUI_SAT:
-    I.setDesc(TII.get(TargetOpcode::G_FPTOUI));
-    return true;
   default:
     return false;
   }

>From 33d0336e1c923ca39ec6eebd8bcbf03e044cf288 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Fri, 6 Sep 2024 17:08:46 +0100
Subject: [PATCH 3/3] Remove Min/Max legality check and add test case for it.

---
 .../CodeGen/GlobalISel/LegalizerHelper.cpp    | 25 +++---
 .../test/CodeGen/AArch64/fptosi-sat-scalar.ll | 86 +++++++++++++++++++
 .../test/CodeGen/AArch64/fptoui-sat-scalar.ll | 71 +++++++++++++++
 3 files changed, 171 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 44d28fff47a2b4..9ceae696f5ddec 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -7364,19 +7364,22 @@ LegalizerHelper::lowerFPTOINT_SAT(MachineInstr &MI) {
   bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
                              !(MaxStatus & APFloat::opStatus::opInexact);
 
-  // If the integer bounds are exactly representable as floats and min/max are
-  // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
-  // of comparisons and selects.
-  bool MinMaxLegal = LI.isLegal({TargetOpcode::G_FMINNUM, SrcTy}) &&
-                     LI.isLegal({TargetOpcode::G_FMAXNUM, SrcTy});
-  if (AreExactFloatBounds && MinMaxLegal) {
+  // If the integer bounds are exactly representable as floats, emit a
+  // min+max+fptoi sequence. Otherwise we have to use a sequence of comparisons
+  // and selects.
+  if (AreExactFloatBounds) {
     // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
-    auto Max = MIRBuilder.buildFMaxNum(
-        SrcTy, Src, MIRBuilder.buildFConstant(SrcTy, MinFloat));
+    auto MaxC = MIRBuilder.buildFConstant(SrcTy, MinFloat);
+    auto MaxP = MIRBuilder.buildFCmp(CmpInst::FCMP_ULT,
+                                     SrcTy.changeElementSize(1), Src, MaxC);
+    auto Max = MIRBuilder.buildSelect(SrcTy, MaxP, Src, MaxC);
     // Clamp by MaxFloat from above. NaN cannot occur.
-    auto Min = MIRBuilder.buildFMinNum(
-        SrcTy, Max, MIRBuilder.buildFConstant(SrcTy, MaxFloat),
-        MachineInstr::FmNoNans);
+    auto MinC = MIRBuilder.buildFConstant(SrcTy, MaxFloat);
+    auto MinP =
+        MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, SrcTy.changeElementSize(1), Max,
+                             MinC, MachineInstr::FmNoNans);
+    auto Min =
+        MIRBuilder.buildSelect(SrcTy, MinP, Max, MinC, MachineInstr::FmNoNans);
     // Convert clamped value to integer. In the unsigned case we're done,
     // because we mapped NaN to MinFloat, which will cast to zero.
     if (!IsSigned) {
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
index 588cfca431efe8..9c52b024d3e259 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
@@ -940,3 +940,89 @@ define i128 @test_signed_i128_f16(half %f) nounwind {
     %x = call i128 @llvm.fptosi.sat.i128.f16(half %f)
     ret i128 %x
 }
+
+define i32 @test_signed_f128_i32(fp128 %f) {
+; CHECK-SD-LABEL: test_signed_f128_i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sub sp, sp, #32
+; CHECK-SD-NEXT:    stp x30, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT:    .cfi_offset w19, -8
+; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    adrp x8, .LCPI30_0
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI30_0]
+; CHECK-SD-NEXT:    bl __getf2
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov w19, w0
+; CHECK-SD-NEXT:    bl __fixtfsi
+; CHECK-SD-NEXT:    cmp w19, #0
+; CHECK-SD-NEXT:    mov w8, #-2147483648 // =0x80000000
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csel w19, w8, w0, lt
+; CHECK-SD-NEXT:    adrp x8, .LCPI30_1
+; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI30_1]
+; CHECK-SD-NEXT:    bl __gttf2
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov w8, #2147483647 // =0x7fffffff
+; CHECK-SD-NEXT:    cmp w0, #0
+; CHECK-SD-NEXT:    csel w19, w8, w19, gt
+; CHECK-SD-NEXT:    mov v1.16b, v0.16b
+; CHECK-SD-NEXT:    bl __unordtf2
+; CHECK-SD-NEXT:    cmp w0, #0
+; CHECK-SD-NEXT:    csel w0, wzr, w19, ne
+; CHECK-SD-NEXT:    ldp x30, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    add sp, sp, #32
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_signed_f128_i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    sub sp, sp, #64
+; CHECK-GI-NEXT:    stp d9, d8, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp x30, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-GI-NEXT:    .cfi_offset w19, -8
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    .cfi_offset b8, -24
+; CHECK-GI-NEXT:    .cfi_offset b9, -32
+; CHECK-GI-NEXT:    adrp x8, .LCPI30_1
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI30_1]
+; CHECK-GI-NEXT:    stp q1, q0, [sp] // 32-byte Folded Spill
+; CHECK-GI-NEXT:    bl __getf2
+; CHECK-GI-NEXT:    ldp q3, q2, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    mov d0, v2.d[1]
+; CHECK-GI-NEXT:    mov d1, v3.d[1]
+; CHECK-GI-NEXT:    fcsel d8, d2, d3, lt
+; CHECK-GI-NEXT:    fmov x8, d8
+; CHECK-GI-NEXT:    fcsel d9, d0, d1, lt
+; CHECK-GI-NEXT:    mov v0.d[0], x8
+; CHECK-GI-NEXT:    fmov x8, d9
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    adrp x8, .LCPI30_0
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI30_0]
+; CHECK-GI-NEXT:    str q1, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    bl __gttf2
+; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    mov d0, v1.d[1]
+; CHECK-GI-NEXT:    fcsel d1, d8, d1, gt
+; CHECK-GI-NEXT:    fmov x8, d1
+; CHECK-GI-NEXT:    fcsel d2, d9, d0, gt
+; CHECK-GI-NEXT:    mov v0.d[0], x8
+; CHECK-GI-NEXT:    fmov x8, d2
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    bl __fixtfsi
+; CHECK-GI-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov w19, w0
+; CHECK-GI-NEXT:    mov v1.16b, v0.16b
+; CHECK-GI-NEXT:    bl __unordtf2
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    csel w0, wzr, w19, ne
+; CHECK-GI-NEXT:    ldp x30, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    add sp, sp, #64
+; CHECK-GI-NEXT:    ret
+    %x = call i32 @llvm.fptosi.sat.i32.f128(fp128 %f)
+    ret i32 %x
+}
diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
index ce6eba88838349..60f961fa8f9443 100644
--- a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
+++ b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
@@ -757,3 +757,74 @@ define i128 @test_unsigned_i128_f16(half %f) nounwind {
     %x = call i128 @llvm.fptoui.sat.i128.f16(half %f)
     ret i128 %x
 }
+
+define i32 @test_unsigned_f128_i32(fp128 %f) {
+; CHECK-SD-LABEL: test_unsigned_f128_i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sub sp, sp, #32
+; CHECK-SD-NEXT:    stp x30, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT:    .cfi_offset w19, -8
+; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    adrp x8, .LCPI30_0
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI30_0]
+; CHECK-SD-NEXT:    bl __getf2
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov w19, w0
+; CHECK-SD-NEXT:    bl __fixunstfsi
+; CHECK-SD-NEXT:    adrp x8, .LCPI30_1
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    cmp w19, #0
+; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI30_1]
+; CHECK-SD-NEXT:    csel w19, wzr, w0, lt
+; CHECK-SD-NEXT:    bl __gttf2
+; CHECK-SD-NEXT:    cmp w0, #0
+; CHECK-SD-NEXT:    csinv w0, w19, wzr, le
+; CHECK-SD-NEXT:    ldp x30, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    add sp, sp, #32
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsigned_f128_i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    sub sp, sp, #64
+; CHECK-GI-NEXT:    stp d9, d8, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    .cfi_offset b8, -24
+; CHECK-GI-NEXT:    .cfi_offset b9, -32
+; CHECK-GI-NEXT:    adrp x8, .LCPI30_1
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI30_1]
+; CHECK-GI-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
+; CHECK-GI-NEXT:    bl __getf2
+; CHECK-GI-NEXT:    ldp q3, q2, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    mov d0, v3.d[1]
+; CHECK-GI-NEXT:    mov d1, v2.d[1]
+; CHECK-GI-NEXT:    fcsel d8, d3, d2, lt
+; CHECK-GI-NEXT:    fmov x8, d8
+; CHECK-GI-NEXT:    fcsel d9, d0, d1, lt
+; CHECK-GI-NEXT:    mov v0.d[0], x8
+; CHECK-GI-NEXT:    fmov x8, d9
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    adrp x8, .LCPI30_0
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI30_0]
+; CHECK-GI-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    bl __gttf2
+; CHECK-GI-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    cmp w0, #0
+; CHECK-GI-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    mov d0, v1.d[1]
+; CHECK-GI-NEXT:    fcsel d1, d8, d1, gt
+; CHECK-GI-NEXT:    fmov x8, d1
+; CHECK-GI-NEXT:    fcsel d2, d9, d0, gt
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v0.d[0], x8
+; CHECK-GI-NEXT:    fmov x8, d2
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    add sp, sp, #64
+; CHECK-GI-NEXT:    b __fixunstfsi
+    %x = call i32 @llvm.fptoui.sat.i32.f128(fp128 %f)
+    ret i32 %x
+}



More information about the llvm-commits mailing list