[llvm] [AArch64][GlobalISel] Add codegen for simd fpcvt instructions (PR #156892)

Mon Sep 8 09:02:00 PDT 2025

https://github.com/Lukacma updated https://github.com/llvm/llvm-project/pull/156892

>From 8534e98a2e0b86ca6849f8213380b70b37cb176c Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Thu, 4 Sep 2025 13:04:57 +0000
Subject: [PATCH 1/4] [AArch64][GlobalISel] Add codegen for simd fpcvt
 instructions

---
 .../lib/Target/AArch64/AArch64InstrFormats.td |   26 +-
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   |  251 +++-
 .../AArch64/GISel/AArch64RegisterBankInfo.cpp |   54 +-
 .../AArch64/GlobalISel/regbank-fp-use-def.mir |    2 +-
 .../AArch64/GlobalISel/regbank-llround.mir    |    4 +-
 .../AArch64/GlobalISel/regbank-lround.mir     |    4 +-
 .../CodeGen/AArch64/arm64-cvt-simd-fptoi.ll   | 1306 +++++++++++++++++
 .../AArch64/arm64-cvt-simd-intrinsics.ll      |  612 ++++++++
 .../AArch64/arm64-cvt-simd-round-rint.ll      |  428 ++++++
 llvm/test/CodeGen/AArch64/arm64-neon-copy.ll  |   57 +-
 llvm/test/CodeGen/AArch64/arm64-vcvt.ll       |   22 +-
 .../test/CodeGen/AArch64/fptosi-sat-vector.ll |   21 +-
 .../test/CodeGen/AArch64/fptoui-sat-vector.ll |   21 +-
 llvm/test/CodeGen/AArch64/vector-lrint.ll     |   18 +-
 14 files changed, 2735 insertions(+), 91 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll
 create mode 100644 llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll
 create mode 100644 llvm/test/CodeGen/AArch64/arm64-cvt-simd-round-rint.ll

diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 8958ad129269c..690cb5500875f 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -5299,28 +5299,29 @@ multiclass FPToIntegerUnscaled<bits<2> rmode, bits<3> opcode, string asm,
   }
 }
 
-multiclass FPToIntegerSIMDScalar<bits<2> rmode, bits<3> opcode, string asm> {
+multiclass FPToIntegerSIMDScalar<bits<2> rmode, bits<3> opcode, string asm, 
+                                 SDPatternOperator OpN> {
   // double-precision to 32-bit SIMD/FPR
   def SDr :  BaseFPToIntegerUnscaled<0b01, rmode, opcode, FPR64, FPR32, asm,
-                                     []> {
+             [(set FPR32:$Rd, (i32 (OpN (f64 FPR64:$Rn))))]> {
     let Inst{31} = 0; // 32-bit FPR flag
   }
 
   // half-precision to 32-bit SIMD/FPR
   def SHr :  BaseFPToIntegerUnscaled<0b11, rmode, opcode, FPR16, FPR32, asm,
-                                     []> {
+             [(set FPR32:$Rd, (i32 (OpN (f16 FPR16:$Rn))))]> {
     let Inst{31} = 0; // 32-bit FPR flag
   }
 
   // half-precision to 64-bit SIMD/FPR
   def DHr :  BaseFPToIntegerUnscaled<0b11, rmode, opcode, FPR16, FPR64, asm,
-                                     []> {
+             [(set FPR64:$Rd, (i64 (OpN (f16 FPR16:$Rn))))]> {
     let Inst{31} = 1; // 64-bit FPR flag
   }
 
   // single-precision to 64-bit SIMD/FPR
   def DSr :  BaseFPToIntegerUnscaled<0b00, rmode, opcode, FPR32, FPR64, asm,
-                                     []> {
+             [(set FPR64:$Rd, (i64 (OpN (f32 FPR32:$Rn))))]> {
     let Inst{31} = 1; // 64-bit FPR flag
   }
 }
@@ -7949,6 +7950,21 @@ multiclass SIMDFPTwoScalar<bit U, bit S, bits<5> opc, string asm> {
   }
 }
 
+let mayRaiseFPException = 1, Uses = [FPCR] in
+multiclass SIMDFPTwoScalarFCVT<bit U, bit S, bits<5> opc, string asm,
+                               SDPatternOperator OpN> {
+  let Predicates = [HasNEONandIsStreamingSafe], FastISelShouldIgnore = 1  in {
+  def v1i64       : BaseSIMDTwoScalar<U, {S,1}, 0b00, opc, FPR64, FPR64, asm,
+                    [(set (i64 FPR64:$Rd), (OpN (f64 FPR64:$Rn)))]>;
+  def v1i32       : BaseSIMDTwoScalar<U, {S,0}, 0b00, opc, FPR32, FPR32, asm,
+                    [(set FPR32:$Rd, (i32 (OpN (f32 FPR32:$Rn))))]>;
+  }
+  let Predicates = [HasNEONandIsStreamingSafe, HasFullFP16] in {
+  def v1f16       : BaseSIMDTwoScalar<U, {S,1}, 0b11, opc, FPR16, FPR16, asm,
+                    [(set FPR16:$Rd, (i16 (OpN (f16 FPR16:$Rn))))]>;
+  }
+}
+
 let mayRaiseFPException = 1, Uses = [FPCR] in
 multiclass SIMDFPTwoScalarCVT<bit U, bit S, bits<5> opc, string asm,
                               SDPatternOperator OpNode> {
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 62b26b5239365..34e55dcafcd06 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -5212,18 +5212,55 @@ defm FCVTZU : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>;
 defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>;
 defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>;
 
+defm FCVTAS : SIMDFPTwoScalarFCVT<   0, 0, 0b11100, "fcvtas", int_aarch64_neon_fcvtas>;
+defm FCVTAU : SIMDFPTwoScalarFCVT<   1, 0, 0b11100, "fcvtau", int_aarch64_neon_fcvtau>;
+defm FCVTMS : SIMDFPTwoScalarFCVT<   0, 0, 0b11011, "fcvtms", int_aarch64_neon_fcvtms>;
+defm FCVTMU : SIMDFPTwoScalarFCVT<   1, 0, 0b11011, "fcvtmu", int_aarch64_neon_fcvtmu>;
+defm FCVTNS : SIMDFPTwoScalarFCVT<   0, 0, 0b11010, "fcvtns", int_aarch64_neon_fcvtns>;
+defm FCVTNU : SIMDFPTwoScalarFCVT<   1, 0, 0b11010, "fcvtnu", int_aarch64_neon_fcvtnu>;
+defm FCVTPS : SIMDFPTwoScalarFCVT<   0, 1, 0b11010, "fcvtps", int_aarch64_neon_fcvtps>;
+defm FCVTPU : SIMDFPTwoScalarFCVT<   1, 1, 0b11010, "fcvtpu", int_aarch64_neon_fcvtpu>;
+defm FCVTZS : SIMDFPTwoScalarFCVT<   0, 1, 0b11011, "fcvtzs", any_fp_to_sint>;
+defm FCVTZU : SIMDFPTwoScalarFCVT<   1, 1, 0b11011, "fcvtzu", any_fp_to_uint>;
+
 let Predicates = [HasNEON, HasFPRCVT] in{
-  defm FCVTAS : FPToIntegerSIMDScalar<0b11, 0b010, "fcvtas">;
-  defm FCVTAU : FPToIntegerSIMDScalar<0b11, 0b011, "fcvtau">;
-  defm FCVTMS : FPToIntegerSIMDScalar<0b10, 0b100, "fcvtms">;
-  defm FCVTMU : FPToIntegerSIMDScalar<0b10, 0b101, "fcvtmu">;
-  defm FCVTNS : FPToIntegerSIMDScalar<0b01, 0b010, "fcvtns">;
-  defm FCVTNU : FPToIntegerSIMDScalar<0b01, 0b011, "fcvtnu">;
-  defm FCVTPS : FPToIntegerSIMDScalar<0b10, 0b010, "fcvtps">;
-  defm FCVTPU : FPToIntegerSIMDScalar<0b10, 0b011, "fcvtpu">;
-  defm FCVTZS : FPToIntegerSIMDScalar<0b10, 0b110, "fcvtzs">;
-  defm FCVTZU : FPToIntegerSIMDScalar<0b10, 0b111, "fcvtzu">;
-}
+  defm FCVTAS : FPToIntegerSIMDScalar<0b11, 0b010, "fcvtas", int_aarch64_neon_fcvtas>;
+  defm FCVTAU : FPToIntegerSIMDScalar<0b11, 0b011, "fcvtau", int_aarch64_neon_fcvtau>;
+  defm FCVTMS : FPToIntegerSIMDScalar<0b10, 0b100, "fcvtms", int_aarch64_neon_fcvtms>;
+  defm FCVTMU : FPToIntegerSIMDScalar<0b10, 0b101, "fcvtmu", int_aarch64_neon_fcvtmu>;
+  defm FCVTNS : FPToIntegerSIMDScalar<0b01, 0b010, "fcvtns", int_aarch64_neon_fcvtns>;
+  defm FCVTNU : FPToIntegerSIMDScalar<0b01, 0b011, "fcvtnu", int_aarch64_neon_fcvtnu>;
+  defm FCVTPS : FPToIntegerSIMDScalar<0b10, 0b010, "fcvtps", int_aarch64_neon_fcvtps>;
+  defm FCVTPU : FPToIntegerSIMDScalar<0b10, 0b011, "fcvtpu", int_aarch64_neon_fcvtpu>;
+  defm FCVTZS : FPToIntegerSIMDScalar<0b10, 0b110, "fcvtzs", any_fp_to_sint>;
+  defm FCVTZU : FPToIntegerSIMDScalar<0b10, 0b111, "fcvtzu", any_fp_to_uint>;
+}
+
+multiclass FPToIntegerSIMDScalarPatterns<SDPatternOperator OpN, string INST> {
+  def : Pat<(f32 (bitconvert (i32 (OpN (f64 FPR64:$Rn))))),
+            (!cast<Instruction>(INST # SDr) FPR64:$Rn)>;
+  def : Pat<(f32 (bitconvert (i32 (OpN (f16 FPR16:$Rn))))),
+            (!cast<Instruction>(INST # SHr) FPR16:$Rn)>;
+  def : Pat<(f64 (bitconvert (i64 (OpN (f16 FPR16:$Rn))))),
+            (!cast<Instruction>(INST # DHr) FPR16:$Rn)>;
+  def : Pat<(f64 (bitconvert (i64 (OpN (f32 FPR32:$Rn))))),
+            (!cast<Instruction>(INST # DSr) FPR32:$Rn)>;
+  def : Pat<(f32 (bitconvert (i32 (OpN (f32 FPR32:$Rn))))),
+            (!cast<Instruction>(INST # v1i32) FPR32:$Rn)>;
+  def : Pat<(f64 (bitconvert (i64 (OpN (f64 FPR64:$Rn))))),
+            (!cast<Instruction>(INST # v1i64) FPR64:$Rn)>;
+            
+}
+defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtas, "FCVTAS">;
+defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtau, "FCVTAU">;
+defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtms, "FCVTMS">;
+defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtmu, "FCVTMU">;
+defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtns, "FCVTNS">;
+defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtnu, "FCVTNU">;
+defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtps, "FCVTPS">;
+defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtpu, "FCVTPU">;
+defm: FPToIntegerSIMDScalarPatterns<any_fp_to_sint, "FCVTZS">;
+defm: FPToIntegerSIMDScalarPatterns<any_fp_to_uint, "FCVTZU">;
 
 // AArch64's FCVT instructions saturate when out of range.
 multiclass FPToIntegerSatPats<SDNode to_int_sat, SDNode to_int_sat_gi, string INST> {
@@ -5257,6 +5294,52 @@ multiclass FPToIntegerSatPats<SDNode to_int_sat, SDNode to_int_sat_gi, string IN
   def : Pat<(i64 (to_int_sat_gi f64:$Rn)),
             (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
 
+  // For global-isel we can use register classes to determine
+  // which FCVT instruction to use.
+  let Predicates = [HasFPRCVT] in {
+  def : Pat<(i32 (to_int_sat_gi f16:$Rn)),
+            (!cast<Instruction>(INST # SHr) f16:$Rn)>;
+  def : Pat<(i64 (to_int_sat_gi f16:$Rn)),
+            (!cast<Instruction>(INST # DHr) f16:$Rn)>;
+  def : Pat<(i64 (to_int_sat_gi f32:$Rn)),
+            (!cast<Instruction>(INST # DSr) f32:$Rn)>;
+  def : Pat<(i32 (to_int_sat_gi f64:$Rn)),
+            (!cast<Instruction>(INST # SDr) f64:$Rn)>;
+  }
+  def : Pat<(i32 (to_int_sat_gi f32:$Rn)),
+            (!cast<Instruction>(INST # v1i32) f32:$Rn)>;
+  def : Pat<(i64 (to_int_sat_gi f64:$Rn)),
+            (!cast<Instruction>(INST # v1i64) f64:$Rn)>;
+
+  let Predicates = [HasFPRCVT] in {
+  def : Pat<(f32 (bitconvert (i32 (to_int_sat f16:$Rn, i32)))),
+            (!cast<Instruction>(INST # SHr) f16:$Rn)>;
+  def : Pat<(f64 (bitconvert (i64 (to_int_sat f16:$Rn, i64)))),
+            (!cast<Instruction>(INST # DHr) f16:$Rn)>;
+  def : Pat<(f64 (bitconvert (i64 (to_int_sat f32:$Rn, i64)))),
+            (!cast<Instruction>(INST # DSr) f32:$Rn)>;
+  def : Pat<(f32 (bitconvert (i32 (to_int_sat f64:$Rn, i32)))),
+            (!cast<Instruction>(INST # SDr) f64:$Rn)>;
+
+  def : Pat<(f32 (bitconvert (i32 (to_int_sat_gi f16:$Rn)))),
+            (!cast<Instruction>(INST # SHr) f16:$Rn)>;
+  def : Pat<(f64 (bitconvert (i64 (to_int_sat_gi f16:$Rn)))),
+            (!cast<Instruction>(INST # DHr) f16:$Rn)>;
+  def : Pat<(f64 (bitconvert (i64 (to_int_sat_gi f32:$Rn)))),
+            (!cast<Instruction>(INST # DSr) f32:$Rn)>;
+  def : Pat<(f32 (bitconvert (i32 (to_int_sat_gi f64:$Rn)))),
+            (!cast<Instruction>(INST # SDr) f64:$Rn)>;
+  }
+  def : Pat<(f32 (bitconvert (i32 (to_int_sat f32:$Rn, i32)))),
+            (!cast<Instruction>(INST # v1i32) f32:$Rn)>;
+  def : Pat<(f64 (bitconvert (i64 (to_int_sat f64:$Rn, i64)))),
+            (!cast<Instruction>(INST # v1i64) f64:$Rn)>;
+            
+  def : Pat<(f32 (bitconvert (i32 (to_int_sat_gi f32:$Rn)))),
+            (!cast<Instruction>(INST # v1i32) f32:$Rn)>;
+  def : Pat<(f64 (bitconvert (i64 (to_int_sat_gi f64:$Rn)))),
+            (!cast<Instruction>(INST # v1i64) f64:$Rn)>;
+
   let Predicates = [HasFullFP16] in {
   def : Pat<(i32 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i32:$scale), i32)),
             (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
@@ -5301,6 +5384,32 @@ multiclass FPToIntegerIntPats<Intrinsic round, string INST> {
   def : Pat<(i32 (round f64:$Rn)), (!cast<Instruction>(INST # UWDr) $Rn)>;
   def : Pat<(i64 (round f64:$Rn)), (!cast<Instruction>(INST # UXDr) $Rn)>;
 
+  // For global-isel we can use register classes to determine
+  // which FCVT instruction to use.
+  let Predicates = [HasFPRCVT] in {
+  def : Pat<(i32 (round f16:$Rn)), (!cast<Instruction>(INST # SHr) $Rn)>;
+  def : Pat<(i64 (round f16:$Rn)), (!cast<Instruction>(INST # DHr) $Rn)>;
+  def : Pat<(i64 (round f32:$Rn)), (!cast<Instruction>(INST # DSr) $Rn)>;
+  def : Pat<(i32 (round f64:$Rn)), (!cast<Instruction>(INST # SDr) $Rn)>;
+  }
+  def : Pat<(i32 (round f32:$Rn)), (!cast<Instruction>(INST # v1i32) $Rn)>;
+  def : Pat<(i64 (round f64:$Rn)), (!cast<Instruction>(INST # v1i64) $Rn)>;
+
+  let Predicates = [HasFPRCVT] in {
+  def : Pat<(f32 (bitconvert (i32 (round f16:$Rn)))), 
+            (!cast<Instruction>(INST # SHr) $Rn)>;
+  def : Pat<(f64 (bitconvert (i64 (round f16:$Rn)))), 
+            (!cast<Instruction>(INST # DHr) $Rn)>;
+  def : Pat<(f64 (bitconvert (i64 (round f32:$Rn)))), 
+            (!cast<Instruction>(INST # DSr) $Rn)>;
+  def : Pat<(f32 (bitconvert (i32 (round f64:$Rn)))), 
+            (!cast<Instruction>(INST # SDr) $Rn)>;
+  }
+  def : Pat<(f32 (bitconvert (i32 (round f32:$Rn)))), 
+            (!cast<Instruction>(INST # v1i32) $Rn)>;
+  def : Pat<(f64 (bitconvert (i64 (round f64:$Rn)))), 
+            (!cast<Instruction>(INST # v1i64) $Rn)>;
+
   let Predicates = [HasFullFP16] in {
   def : Pat<(i32 (round (fmul f16:$Rn, fixedpoint_f16_i32:$scale))),
             (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
@@ -5330,6 +5439,30 @@ multiclass FPToIntegerPats<SDNode to_int, SDNode to_int_sat, SDNode round, strin
   def : Pat<(i64 (to_int (round f64:$Rn))),
             (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
 
+  // For global-isel we can use register classes to determine
+  // which FCVT instruction to use.
+  def : Pat<(i32 (to_int (round f32:$Rn))),
+            (!cast<Instruction>(INST # v1i32) f32:$Rn)>;
+  let Predicates = [HasFPRCVT] in {
+  def : Pat<(i64 (to_int (round f32:$Rn))),
+            (!cast<Instruction>(INST # DSr) f32:$Rn)>;
+  def : Pat<(i32 (to_int (round f64:$Rn))),
+            (!cast<Instruction>(INST # SDr) f64:$Rn)>;
+  }
+  def : Pat<(i64 (to_int (round f64:$Rn))),
+            (!cast<Instruction>(INST # v1i64) f64:$Rn)>;
+  
+  let Predicates = [HasFPRCVT] in {
+  def : Pat<(f64 (bitconvert (i64 (to_int (round f32:$Rn))))),
+            (!cast<Instruction>(INST # DSr) f32:$Rn)>;
+  def : Pat<(f32 (bitconvert (i32 (to_int (round f64:$Rn))))),
+            (!cast<Instruction>(INST # SDr) f64:$Rn)>;
+  }
+  def : Pat<(f32 (bitconvert (i32 (to_int (round f32:$Rn))))),
+            (!cast<Instruction>(INST # v1i32) f32:$Rn)>;
+  def : Pat<(f64 (bitconvert (i64 (to_int (round f64:$Rn))))),
+            (!cast<Instruction>(INST # v1i64) f64:$Rn)>;
+
   // These instructions saturate like fp_to_[su]int_sat.
   let Predicates = [HasFullFP16] in {
   def : Pat<(i32 (to_int_sat (round f16:$Rn), i32)),
@@ -5345,6 +5478,21 @@ multiclass FPToIntegerPats<SDNode to_int, SDNode to_int_sat, SDNode round, strin
             (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
   def : Pat<(i64 (to_int_sat (round f64:$Rn), i64)),
             (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
+
+  let Predicates = [HasFPRCVT] in {
+    def : Pat<(f32 (bitconvert (i32 (to_int_sat (round f16:$Rn), i32)))),
+              (!cast<Instruction>(INST # SHr) f16:$Rn)>;
+    def : Pat<(f64 (bitconvert (i64 (to_int_sat (round f16:$Rn), i64)))),
+              (!cast<Instruction>(INST # DHr) f16:$Rn)>;
+    def : Pat<(f64 (bitconvert (i64 (to_int_sat (round f32:$Rn), i64)))),
+              (!cast<Instruction>(INST # DSr) f32:$Rn)>;
+    def : Pat<(f32 (bitconvert (i32 (to_int_sat (round f64:$Rn), i32)))),
+              (!cast<Instruction>(INST # SDr) f64:$Rn)>;
+  }
+  def : Pat<(f32 (bitconvert (i32 (to_int_sat (round f32:$Rn), i32)))),
+            (!cast<Instruction>(INST # v1i32) f32:$Rn)>;
+  def : Pat<(f64 (bitconvert (i64 (to_int_sat (round f64:$Rn), i64)))),
+            (!cast<Instruction>(INST # v1i64) f64:$Rn)>;
 }
 
 defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fceil,  "FCVTPS">;
@@ -5379,6 +5527,39 @@ def : Pat<(i64 (any_llround f32:$Rn)),
 def : Pat<(i64 (any_llround f64:$Rn)),
           (FCVTASUXDr f64:$Rn)>;
 
+// For global-isel we can use register classes to determine
+// which FCVT instruction to use.
+let Predicates = [HasFPRCVT] in {
+def : Pat<(i64 (any_lround f32:$Rn)),
+          (FCVTASDSr f32:$Rn)>;
+def : Pat<(i64 (any_llround f32:$Rn)),
+          (FCVTASDSr f32:$Rn)>;
+}
+def : Pat<(i64 (any_lround f64:$Rn)),
+          (FCVTASv1i64 f64:$Rn)>;
+def : Pat<(i64 (any_llround f64:$Rn)),
+          (FCVTASv1i64 f64:$Rn)>;
+
+let Predicates = [HasFPRCVT] in {
+  def : Pat<(f32 (bitconvert (i32 (any_lround f16:$Rn)))),
+            (FCVTASSHr f16:$Rn)>;
+  def : Pat<(f64 (bitconvert (i64 (any_lround f16:$Rn)))),
+            (FCVTASDHr f16:$Rn)>;
+  def : Pat<(f64 (bitconvert (i64 (any_llround f16:$Rn)))),
+            (FCVTASDHr f16:$Rn)>;
+  def : Pat<(f64 (bitconvert (i64 (any_lround f32:$Rn)))),
+            (FCVTASDSr f32:$Rn)>;
+  def : Pat<(f32 (bitconvert (i32 (any_lround f64:$Rn)))),
+            (FCVTASSDr f64:$Rn)>;
+  def : Pat<(f64 (bitconvert (i64 (any_llround f32:$Rn)))),
+            (FCVTASDSr f32:$Rn)>;
+}
+def : Pat<(f32 (bitconvert (i32 (any_lround f32:$Rn)))),
+          (FCVTASv1i32 f32:$Rn)>;
+def : Pat<(f64 (bitconvert (i64 (any_lround f64:$Rn)))),
+          (FCVTASv1i64 f64:$Rn)>;
+def : Pat<(f64 (bitconvert (i64 (any_llround f64:$Rn)))),
+          (FCVTASv1i64 f64:$Rn)>;
 //===----------------------------------------------------------------------===//
 // Scaled integer to floating point conversion instructions.
 //===----------------------------------------------------------------------===//
@@ -5524,6 +5705,44 @@ def : Pat<(i64 (any_llrint f32:$Rn)),
 def : Pat<(i64 (any_llrint f64:$Rn)),
           (FCVTZSUXDr (FRINTXDr f64:$Rn))>;
 
+// For global-isel we can use register classes to determine
+// which FCVT instruction to use.
+let Predicates = [HasFPRCVT] in {
+def : Pat<(i64 (any_lrint f16:$Rn)),
+          (FCVTZSDHr (FRINTXHr f16:$Rn))>;
+def : Pat<(i64 (any_llrint f16:$Rn)),
+          (FCVTZSDHr (FRINTXHr f16:$Rn))>;
+def : Pat<(i64 (any_lrint f32:$Rn)),
+          (FCVTZSDSr (FRINTXSr f32:$Rn))>;
+def : Pat<(i64 (any_llrint f32:$Rn)),
+          (FCVTZSDSr (FRINTXSr f32:$Rn))>;
+}
+def : Pat<(i64 (any_lrint f64:$Rn)),
+          (FCVTZSv1i64 (FRINTXDr f64:$Rn))>;
+def : Pat<(i64 (any_llrint f64:$Rn)),
+          (FCVTZSv1i64 (FRINTXDr f64:$Rn))>;
+
+let Predicates = [HasFPRCVT] in {
+  def : Pat<(f32 (bitconvert (i32 (any_lrint f16:$Rn)))),
+            (FCVTZSSHr (FRINTXHr f16:$Rn))>;
+  def : Pat<(f64 (bitconvert (i64 (any_lrint f16:$Rn)))),
+            (FCVTZSDHr (FRINTXHr f16:$Rn))>;
+  def : Pat<(f64 (bitconvert (i64 (any_llrint f16:$Rn)))),
+            (FCVTZSDHr (FRINTXHr f16:$Rn))>;
+  def : Pat<(f64 (bitconvert (i64 (any_lrint f32:$Rn)))),
+            (FCVTZSDSr (FRINTXSr f32:$Rn))>;
+  def : Pat<(f32 (bitconvert (i32 (any_lrint f64:$Rn)))),
+            (FCVTZSSDr (FRINTXDr f64:$Rn))>;
+  def : Pat<(f64 (bitconvert (i64 (any_llrint f32:$Rn)))),
+            (FCVTZSDSr (FRINTXSr f32:$Rn))>;
+}
+def : Pat<(f32 (bitconvert (i32 (any_lrint f32:$Rn)))),
+          (FCVTZSv1i32 (FRINTXSr f32:$Rn))>;
+def : Pat<(f64 (bitconvert (i64 (any_lrint f64:$Rn)))),
+          (FCVTZSv1i64 (FRINTXDr f64:$Rn))>;
+def : Pat<(f64 (bitconvert (i64 (any_llrint f64:$Rn)))),
+          (FCVTZSv1i64 (FRINTXDr f64:$Rn))>;
+
 //===----------------------------------------------------------------------===//
 // Floating point two operand instructions.
 //===----------------------------------------------------------------------===//
@@ -6549,17 +6768,7 @@ defm FCMGE  : SIMDFPCmpTwoScalar<1, 1, 0b01100, "fcmge", AArch64fcmgez>;
 defm FCMGT  : SIMDFPCmpTwoScalar<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>;
 defm FCMLE  : SIMDFPCmpTwoScalar<1, 1, 0b01101, "fcmle", AArch64fcmlez>;
 defm FCMLT  : SIMDFPCmpTwoScalar<0, 1, 0b01110, "fcmlt", AArch64fcmltz>;
-defm FCVTAS : SIMDFPTwoScalar<   0, 0, 0b11100, "fcvtas">;
-defm FCVTAU : SIMDFPTwoScalar<   1, 0, 0b11100, "fcvtau">;
-defm FCVTMS : SIMDFPTwoScalar<   0, 0, 0b11011, "fcvtms">;
-defm FCVTMU : SIMDFPTwoScalar<   1, 0, 0b11011, "fcvtmu">;
-defm FCVTNS : SIMDFPTwoScalar<   0, 0, 0b11010, "fcvtns">;
-defm FCVTNU : SIMDFPTwoScalar<   1, 0, 0b11010, "fcvtnu">;
-defm FCVTPS : SIMDFPTwoScalar<   0, 1, 0b11010, "fcvtps">;
-defm FCVTPU : SIMDFPTwoScalar<   1, 1, 0b11010, "fcvtpu">;
 def  FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">;
-defm FCVTZS : SIMDFPTwoScalar<   0, 1, 0b11011, "fcvtzs">;
-defm FCVTZU : SIMDFPTwoScalar<   1, 1, 0b11011, "fcvtzu">;
 defm FRECPE : SIMDFPTwoScalar<   0, 1, 0b11101, "frecpe">;
 defm FRECPX : SIMDFPTwoScalar<   0, 1, 0b11111, "frecpx">;
 defm FRSQRTE : SIMDFPTwoScalar<  1, 1, 0b11101, "frsqrte">;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index cf391c446a955..c75a3c406f60d 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -568,9 +568,7 @@ bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
     case Intrinsic::aarch64_neon_fcvtnu:
     case Intrinsic::aarch64_neon_fcvtps:
     case Intrinsic::aarch64_neon_fcvtpu:
-      // Force FPR register bank for half types, as those types otherwise
-      // don't get legalized correctly resulting in fp16 <-> gpr32 COPY's.
-      return MRI.getType(MI.getOperand(2).getReg()) == LLT::float16();
+      return true;
     default:
       break;
     }
@@ -864,10 +862,24 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
   case TargetOpcode::G_FPTOUI:
   case TargetOpcode::G_INTRINSIC_LRINT:
   case TargetOpcode::G_INTRINSIC_LLRINT:
+  case TargetOpcode::G_LROUND:
+  case TargetOpcode::G_LLROUND: {
     if (MRI.getType(MI.getOperand(0).getReg()).isVector())
       break;
-    OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
+    TypeSize DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
+    TypeSize SrcSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, TRI);
+    if (((DstSize == SrcSize) || STI.hasFeature(AArch64::FeatureFPRCVT)) &
+        all_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
+               [&](const MachineInstr &UseMI) {
+                 return onlyUsesFP(UseMI, MRI, TRI) ||
+                        prefersFPUse(UseMI, MRI, TRI);
+               }))
+      OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
+    else
+      OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
     break;
+  }
+
   case TargetOpcode::G_FCMP: {
     // If the result is a vector, it must use a FPR.
     AArch64GenRegisterBankInfo::PartialMappingIdx Idx0 =
@@ -1143,6 +1155,34 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
   case TargetOpcode::G_INTRINSIC:
   case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: {
     switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
+    case Intrinsic::aarch64_neon_fcvtas:
+    case Intrinsic::aarch64_neon_fcvtau:
+    case Intrinsic::aarch64_neon_fcvtzs:
+    case Intrinsic::aarch64_neon_fcvtzu:
+    case Intrinsic::aarch64_neon_fcvtms:
+    case Intrinsic::aarch64_neon_fcvtmu:
+    case Intrinsic::aarch64_neon_fcvtns:
+    case Intrinsic::aarch64_neon_fcvtnu:
+    case Intrinsic::aarch64_neon_fcvtps:
+    case Intrinsic::aarch64_neon_fcvtpu: {
+      OpRegBankIdx[2] = PMI_FirstFPR;
+      if (MRI.getType(MI.getOperand(0).getReg()).isVector()) {
+        OpRegBankIdx[0] = PMI_FirstFPR;
+        break;
+      }
+      TypeSize DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
+      TypeSize SrcSize = getSizeInBits(MI.getOperand(2).getReg(), MRI, TRI);
+      if (((DstSize == SrcSize) || STI.hasFeature(AArch64::FeatureFPRCVT)) &
+          all_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
+                 [&](const MachineInstr &UseMI) {
+                   return onlyUsesFP(UseMI, MRI, TRI) ||
+                          prefersFPUse(UseMI, MRI, TRI);
+                 }))
+        OpRegBankIdx[0] = PMI_FirstFPR;
+      else
+        OpRegBankIdx[0] = PMI_FirstGPR;
+      break;
+    }
     case Intrinsic::aarch64_neon_vcvtfxs2fp:
     case Intrinsic::aarch64_neon_vcvtfxu2fp:
     case Intrinsic::aarch64_neon_vcvtfp2fxs:
@@ -1179,12 +1219,6 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     }
     break;
   }
-  case TargetOpcode::G_LROUND:
-  case TargetOpcode::G_LLROUND: {
-    // Source is always floating point and destination is always integer.
-    OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
-    break;
-  }
   }
 
   // Finally construct the computed mapping.
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir
index b2528840a39cf..46dbc1556fb1d 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir
@@ -96,7 +96,7 @@ body:             |
     ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:fpr(s32) = G_SITOFP [[COPY1]](s32)
     ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr(s32) = COPY [[COPY2]](s32)
     ; CHECK-NEXT: [[SELECT:%[0-9]+]]:fpr(s32) = G_SELECT [[COPY2]](s32), [[COPY3]], [[SITOFP]]
-    ; CHECK-NEXT: [[FPTOSI:%[0-9]+]]:gpr(s32) = G_FPTOSI [[SELECT]](s32)
+    ; CHECK-NEXT: [[FPTOSI:%[0-9]+]]:fpr(s32) = G_FPTOSI [[SELECT]](s32)
     %0:_(s32) = COPY $w0
     %2:_(s32) = COPY $w1
     %3:_(s32) = COPY $w2
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-llround.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-llround.mir
index 420c7cfb07b74..16100f01017a6 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-llround.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-llround.mir
@@ -14,7 +14,7 @@ body:             |
     ; CHECK: liveins: $d0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: %fpr:fpr(s64) = COPY $d0
-    ; CHECK-NEXT: %llround:gpr(s64) = G_LLROUND %fpr(s64)
+    ; CHECK-NEXT: %llround:fpr(s64) = G_LLROUND %fpr(s64)
     ; CHECK-NEXT: $d0 = COPY %llround(s64)
     ; CHECK-NEXT: RET_ReallyLR implicit $s0
     %fpr:_(s64) = COPY $d0
@@ -35,7 +35,7 @@ body:             |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: %gpr:gpr(s64) = COPY $x0
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s64) = COPY %gpr(s64)
-    ; CHECK-NEXT: %llround:gpr(s64) = G_LLROUND [[COPY]](s64)
+    ; CHECK-NEXT: %llround:fpr(s64) = G_LLROUND [[COPY]](s64)
     ; CHECK-NEXT: $d0 = COPY %llround(s64)
     ; CHECK-NEXT: RET_ReallyLR implicit $s0
     %gpr:_(s64) = COPY $x0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-lround.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-lround.mir
index 775c6ca773c68..5cb93f7c4646d 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-lround.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-lround.mir
@@ -14,7 +14,7 @@ body:             |
     ; CHECK: liveins: $d0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: %fpr:fpr(s64) = COPY $d0
-    ; CHECK-NEXT: %lround:gpr(s64) = G_LROUND %fpr(s64)
+    ; CHECK-NEXT: %lround:fpr(s64) = G_LROUND %fpr(s64)
     ; CHECK-NEXT: $d0 = COPY %lround(s64)
     ; CHECK-NEXT: RET_ReallyLR implicit $s0
     %fpr:_(s64) = COPY $d0
@@ -35,7 +35,7 @@ body:             |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: %gpr:gpr(s64) = COPY $x0
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s64) = COPY %gpr(s64)
-    ; CHECK-NEXT: %lround:gpr(s64) = G_LROUND [[COPY]](s64)
+    ; CHECK-NEXT: %lround:fpr(s64) = G_LROUND [[COPY]](s64)
     ; CHECK-NEXT: $d0 = COPY %lround(s64)
     ; CHECK-NEXT: RET_ReallyLR implicit $s0
     %gpr:_(s64) = COPY $x0
diff --git a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll
new file mode 100644
index 0000000000000..936bdfc164810
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll
@@ -0,0 +1,1306 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple aarch64-unknown-unknown -mattr=+fprcvt,+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc < %s -mtriple aarch64-unknown-unknown -global-isel -global-isel-abort=2 -mattr=+fprcvt,+fullfp16 2>&1  | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+
+; CHECK-GI: warning: Instruction selection used fallback path for fptosi_i32_f16_simd
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i64_f16_simd
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i64_f32_simd
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i32_f64_simd
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i64_f64_simd
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i32_f32_simd
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i32_f16_simd
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i64_f16_simd
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i64_f32_simd
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i32_f64_simd
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i64_f64_simd
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i32_f32_simd
+
+;
+; FPTOI
+;
+
+define float @test_fptosi_f16_i32_simd(half %a)  {
+; CHECK-SD-LABEL: test_fptosi_f16_i32_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs s0, h0
+; CHECK-SD-NEXT:    ret
+  %r = fptosi half %a to i32
+  %bc = bitcast i32 %r to float
+  ret float %bc
+}
+
+define double @test_fptosi_f16_i64_simd(half %a)  {
+; CHECK-SD-LABEL: test_fptosi_f16_i64_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs d0, h0
+; CHECK-SD-NEXT:    ret
+  %r = fptosi half %a to i64
+  %bc = bitcast i64 %r to double
+  ret double %bc
+}
+
+define float @test_fptosi_f64_i32_simd(double %a)  {
+; CHECK-SD-LABEL: test_fptosi_f64_i32_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs s0, d0
+; CHECK-SD-NEXT:    ret
+  %r = fptosi double %a to i32
+  %bc = bitcast i32 %r to float
+  ret float %bc
+}
+
+define double @test_fptosi_f32_i64_simd(float %a)  {
+; CHECK-SD-LABEL: test_fptosi_f32_i64_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs d0, s0
+; CHECK-SD-NEXT:    ret
+  %r = fptosi float %a to i64
+  %bc = bitcast i64 %r to double
+  ret double %bc
+}
+
+define double @test_fptosi_f64_i64_simd(double %a)  {
+; CHECK-SD-LABEL: test_fptosi_f64_i64_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs d0, d0
+; CHECK-SD-NEXT:    ret
+  %r = fptosi double %a to i64
+  %bc = bitcast i64 %r to double
+  ret double %bc
+}
+
+
+define float @test_fptosi_f32_i32_simd(float %a)  {
+; CHECK-SD-LABEL: test_fptosi_f32_i32_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs s0, s0
+; CHECK-SD-NEXT:    ret
+  %r = fptosi float %a to i32
+  %bc = bitcast i32 %r to float
+  ret float %bc
+}
+
+define float @test_fptoui_f16_i32_simd(half %a)  {
+; CHECK-SD-LABEL: test_fptoui_f16_i32_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu s0, h0
+; CHECK-SD-NEXT:    ret
+  %r = fptoui half %a to i32
+  %bc = bitcast i32 %r to float
+  ret float %bc
+}
+
+define double @test_fptoui_f16_i64_simd(half %a)  {
+; CHECK-SD-LABEL: test_fptoui_f16_i64_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu d0, h0
+; CHECK-SD-NEXT:    ret
+  %r = fptoui half %a to i64
+  %bc = bitcast i64 %r to double
+  ret double %bc
+}
+
+define float @test_fptoui_f64_i32_simd(double %a)  {
+; CHECK-SD-LABEL: test_fptoui_f64_i32_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu s0, d0
+; CHECK-SD-NEXT:    ret
+  %r = fptoui double %a to i32
+  %bc = bitcast i32 %r to float
+  ret float %bc
+}
+
+define double @test_fptoui_f32_i64_simd(float %a)  {
+; CHECK-SD-LABEL: test_fptoui_f32_i64_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu d0, s0
+; CHECK-SD-NEXT:    ret
+  %r = fptoui float %a to i64
+  %bc = bitcast i64 %r to double
+  ret double %bc
+}
+
+define double @test_fptoui_f64_i64_simd(double %a)  {
+; CHECK-SD-LABEL: test_fptoui_f64_i64_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu d0, d0
+; CHECK-SD-NEXT:    ret
+  %r = fptoui double %a to i64
+  %bc = bitcast i64 %r to double
+  ret double %bc
+}
+
+
+define float @test_fptoui_f32_i32_simd(float %a)  {
+; CHECK-SD-LABEL: test_fptoui_f32_i32_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu s0, s0
+; CHECK-SD-NEXT:    ret
+  %r = fptoui float %a to i32
+  %bc = bitcast i32 %r to float
+  ret float %bc
+}
+
+
+;
+; FPTOI experimental
+;
+
+define float @fptosi_i32_f16_simd(half %x)  {
+; CHECK-SD-LABEL: fptosi_i32_f16_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs s0, h0
+; CHECK-SD-NEXT:    ret
+  %val = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %x, metadata !"fpexcept.strict")
+  %sum = bitcast i32 %val to float
+  ret float %sum
+}
+
+define double @fptosi_i64_f16_simd(half %x)  {
+; CHECK-SD-LABEL: fptosi_i64_f16_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs d0, h0
+; CHECK-SD-NEXT:    ret
+  %val = call i64 @llvm.experimental.constrained.fptosi.i64.f16(half %x, metadata !"fpexcept.strict")
+  %sum = bitcast i64 %val to double
+  ret double %sum
+}
+
+define double @fptosi_i64_f32_simd(float %x)  {
+; CHECK-SD-LABEL: fptosi_i64_f32_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs d0, s0
+; CHECK-SD-NEXT:    ret
+  %val = call i64 @llvm.experimental.constrained.fptosi.i64.f32(float %x, metadata !"fpexcept.strict")
+  %bc = bitcast i64 %val to double
+  ret double %bc
+}
+
+define float @fptosi_i32_f64_simd(double %x)  {
+; CHECK-SD-LABEL: fptosi_i32_f64_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs s0, d0
+; CHECK-SD-NEXT:    ret
+  %val = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %x, metadata !"fpexcept.strict")
+  %bc = bitcast i32 %val to float
+  ret float %bc
+}
+
+define double @fptosi_i64_f64_simd(double %x)  {
+; CHECK-SD-LABEL: fptosi_i64_f64_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs d0, d0
+; CHECK-SD-NEXT:    ret
+  %val = call i64 @llvm.experimental.constrained.fptosi.i64.f64(double %x, metadata !"fpexcept.strict")
+  %bc = bitcast i64 %val to double
+  ret double %bc
+}
+
+define float @fptosi_i32_f32_simd(float %x)  {
+; CHECK-SD-LABEL: fptosi_i32_f32_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs s0, s0
+; CHECK-SD-NEXT:    ret
+  %val = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %x, metadata !"fpexcept.strict")
+  %bc = bitcast i32 %val to float
+  ret float %bc
+}
+
+
+
+define float @fptoui_i32_f16_simd(half %x)  {
+; CHECK-SD-LABEL: fptoui_i32_f16_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu s0, h0
+; CHECK-SD-NEXT:    ret
+  %val = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %x, metadata !"fpexcept.strict")
+  %sum = bitcast i32 %val to float
+  ret float %sum
+}
+
+define double @fptoui_i64_f16_simd(half %x)  {
+; CHECK-SD-LABEL: fptoui_i64_f16_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu d0, h0
+; CHECK-SD-NEXT:    ret
+  %val = call i64 @llvm.experimental.constrained.fptoui.i64.f16(half %x, metadata !"fpexcept.strict")
+  %sum = bitcast i64 %val to double
+  ret double %sum
+}
+
+define double @fptoui_i64_f32_simd(float %x)  {
+; CHECK-SD-LABEL: fptoui_i64_f32_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu d0, s0
+; CHECK-SD-NEXT:    ret
+  %val = call i64 @llvm.experimental.constrained.fptoui.i64.f32(float %x, metadata !"fpexcept.strict")
+  %bc = bitcast i64 %val to double
+  ret double %bc
+}
+
+define float @fptoui_i32_f64_simd(double %x)  {
+; CHECK-SD-LABEL: fptoui_i32_f64_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu s0, d0
+; CHECK-SD-NEXT:    ret
+  %val = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %x, metadata !"fpexcept.strict")
+  %bc = bitcast i32 %val to float
+  ret float %bc
+}
+
+define double @fptoui_i64_f64_simd(double %x)  {
+; CHECK-SD-LABEL: fptoui_i64_f64_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu d0, d0
+; CHECK-SD-NEXT:    ret
+  %val = call i64 @llvm.experimental.constrained.fptoui.i64.f64(double %x, metadata !"fpexcept.strict")
+  %bc = bitcast i64 %val to double
+  ret double %bc
+}
+
+define float @fptoui_i32_f32_simd(float %x)  {
+; CHECK-SD-LABEL: fptoui_i32_f32_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu s0, s0
+; CHECK-SD-NEXT:    ret
+  %val = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %x, metadata !"fpexcept.strict")
+  %bc = bitcast i32 %val to float
+  ret float %bc
+}
+
+;
+; FPTOI rounding
+;
+
+
+define double @fcvtas_ds_round_simd(float %a) {
+; CHECK-SD-LABEL: fcvtas_ds_round_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtas d0, s0
+; CHECK-SD-NEXT:    ret
+  %r = call float @roundf(float %a) nounwind readnone
+  %i = fptosi float %r to i64
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtas_sd_round_simd(double %a) {
+; CHECK-SD-LABEL: fcvtas_sd_round_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtas s0, d0
+; CHECK-SD-NEXT:    ret
+  %r = call double @round(double %a) nounwind readnone
+  %i = fptosi double %r to i32
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define float @fcvtas_ss_round_simd(float %a) {
+; CHECK-SD-LABEL: fcvtas_ss_round_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtas s0, s0
+; CHECK-SD-NEXT:    ret
+  %r = call float @roundf(float %a) nounwind readnone
+  %i = fptosi float %r to i32
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtas_dd_round_simd(double %a) {
+; CHECK-SD-LABEL: fcvtas_dd_round_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtas d0, d0
+; CHECK-SD-NEXT:    ret
+  %r = call double @round(double %a) nounwind readnone
+  %i = fptosi double %r to i64
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+
+define double @fcvtau_ds_round_simd(float %a) {
+; CHECK-SD-LABEL: fcvtau_ds_round_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtau d0, s0
+; CHECK-SD-NEXT:    ret
+  %r = call float @roundf(float %a) nounwind readnone
+  %i = fptoui float %r to i64
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtau_sd_round_simd(double %a) {
+; CHECK-SD-LABEL: fcvtau_sd_round_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtau s0, d0
+; CHECK-SD-NEXT:    ret
+  %r = call double @round(double %a) nounwind readnone
+  %i = fptoui double %r to i32
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define float @fcvtau_ss_round_simd(float %a) {
+; CHECK-SD-LABEL: fcvtau_ss_round_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtas s0, s0
+; CHECK-SD-NEXT:    ret
+  %r = call float @roundf(float %a) nounwind readnone
+  %i = fptosi float %r to i32
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtau_dd_round_simd(double %a) {
+; CHECK-SD-LABEL: fcvtau_dd_round_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtas d0, d0
+; CHECK-SD-NEXT:    ret
+  %r = call double @round(double %a) nounwind readnone
+  %i = fptosi double %r to i64
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+
+define double @fcvtms_ds_round_simd(float %a) {
+; CHECK-SD-LABEL: fcvtms_ds_round_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtms d0, s0
+; CHECK-SD-NEXT:    ret
+  %r = call float @floorf(float %a) nounwind readnone
+  %i = fptosi float %r to i64
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtms_sd_round_simd(double %a) {
+; CHECK-SD-LABEL: fcvtms_sd_round_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtms s0, d0
+; CHECK-SD-NEXT:    ret
+  %r = call double @floor(double %a) nounwind readnone
+  %i = fptosi double %r to i32
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define float @fcvtms_ss_round_simd(float %a) {
+; CHECK-SD-LABEL: fcvtms_ss_round_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtms s0, s0
+; CHECK-SD-NEXT:    ret
+  %r = call float @floorf(float %a) nounwind readnone
+  %i = fptosi float %r to i32
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtms_dd_round_simd(double %a) {
+; CHECK-SD-LABEL: fcvtms_dd_round_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtms d0, d0
+; CHECK-SD-NEXT:    ret
+  %r = call double @floor(double %a) nounwind readnone
+  %i = fptosi double %r to i64
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+
+
+define double @fcvtmu_ds_round_simd(float %a) {
+; CHECK-SD-LABEL: fcvtmu_ds_round_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtmu d0, s0
+; CHECK-SD-NEXT:    ret
+  %r = call float @floorf(float %a) nounwind readnone
+  %i = fptoui float %r to i64
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtmu_sd_round_simd(double %a) {
+; CHECK-SD-LABEL: fcvtmu_sd_round_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtmu s0, d0
+; CHECK-SD-NEXT:    ret
+  %r = call double @floor(double %a) nounwind readnone
+  %i = fptoui double %r to i32
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define float @fcvtmu_ss_round_simd(float %a) {
+; CHECK-SD-LABEL: fcvtmu_ss_round_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtms s0, s0
+; CHECK-SD-NEXT:    ret
+  %r = call float @floorf(float %a) nounwind readnone
+  %i = fptosi float %r to i32
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtmu_dd_round_simd(double %a) {
+; CHECK-SD-LABEL: fcvtmu_dd_round_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtms d0, d0
+; CHECK-SD-NEXT:    ret
+  %r = call double @floor(double %a) nounwind readnone
+  %i = fptosi double %r to i64
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+
+define double @fcvtps_ds_round_simd(float %a) {
+; CHECK-SD-LABEL: fcvtps_ds_round_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtps d0, s0
+; CHECK-SD-NEXT:    ret
+  %r = call float @ceilf(float %a) nounwind readnone
+  %i = fptosi float %r to i64
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtps_sd_round_simd(double %a) {
+; CHECK-SD-LABEL: fcvtps_sd_round_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtps s0, d0
+; CHECK-SD-NEXT:    ret
+  %r = call double @ceil(double %a) nounwind readnone
+  %i = fptosi double %r to i32
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define float @fcvtps_ss_round_simd(float %a) {
+; CHECK-SD-LABEL: fcvtps_ss_round_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtps s0, s0
+; CHECK-SD-NEXT:    ret
+  %r = call float @ceilf(float %a) nounwind readnone
+  %i = fptosi float %r to i32
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtps_dd_round_simd(double %a) {
+; CHECK-SD-LABEL: fcvtps_dd_round_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtps d0, d0
+; CHECK-SD-NEXT:    ret
+  %r = call double @ceil(double %a) nounwind readnone
+  %i = fptosi double %r to i64
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+
+define double @fcvtpu_ds_round_simd(float %a) {
+; CHECK-SD-LABEL: fcvtpu_ds_round_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtpu d0, s0
+; CHECK-SD-NEXT:    ret
+  %r = call float @ceilf(float %a) nounwind readnone
+  %i = fptoui float %r to i64
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtpu_sd_round_simd(double %a) {
+; CHECK-SD-LABEL: fcvtpu_sd_round_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtpu s0, d0
+; CHECK-SD-NEXT:    ret
+  %r = call double @ceil(double %a) nounwind readnone
+  %i = fptoui double %r to i32
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define float @fcvtpu_ss_round_simd(float %a) {
+; CHECK-SD-LABEL: fcvtpu_ss_round_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtps s0, s0
+; CHECK-SD-NEXT:    ret
+  %r = call float @ceilf(float %a) nounwind readnone
+  %i = fptosi float %r to i32
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtpu_dd_round_simd(double %a) {
+; CHECK-SD-LABEL: fcvtpu_dd_round_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtps d0, d0
+; CHECK-SD-NEXT:    ret
+  %r = call double @ceil(double %a) nounwind readnone
+  %i = fptosi double %r to i64
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+
+define double @fcvtzs_ds_round_simd(float %a) {
+; CHECK-SD-LABEL: fcvtzs_ds_round_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs d0, s0
+; CHECK-SD-NEXT:    ret
+  %r = call float @truncf(float %a) nounwind readnone
+  %i = fptosi float %r to i64
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtzs_sd_round_simd(double %a) {
+; CHECK-SD-LABEL: fcvtzs_sd_round_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs s0, d0
+; CHECK-SD-NEXT:    ret
+  %r = call double @trunc(double %a) nounwind readnone
+  %i = fptosi double %r to i32
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define float @fcvtzs_ss_round_simd(float %a) {
+; CHECK-SD-LABEL: fcvtzs_ss_round_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs s0, s0
+; CHECK-SD-NEXT:    ret
+  %r = call float @truncf(float %a) nounwind readnone
+  %i = fptosi float %r to i32
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtzs_dd_round_simd(double %a) {
+; CHECK-SD-LABEL: fcvtzs_dd_round_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs d0, d0
+; CHECK-SD-NEXT:    ret
+  %r = call double @trunc(double %a) nounwind readnone
+  %i = fptosi double %r to i64
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define double @fcvtzu_ds_round_simd(float %a) {
+; CHECK-SD-LABEL: fcvtzu_ds_round_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu d0, s0
+; CHECK-SD-NEXT:    ret
+  %r = call float @truncf(float %a) nounwind readnone
+  %i = fptoui float %r to i64
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtzu_sd_round_simd(double %a) {
+; CHECK-SD-LABEL: fcvtzu_sd_round_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu s0, d0
+; CHECK-SD-NEXT:    ret
+  %r = call double @trunc(double %a) nounwind readnone
+  %i = fptoui double %r to i32
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define float @fcvtzu_ss_round_simd(float %a) {
+; CHECK-SD-LABEL: fcvtzu_ss_round_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs s0, s0
+; CHECK-SD-NEXT:    ret
+  %r = call float @truncf(float %a) nounwind readnone
+  %i = fptosi float %r to i32
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtzu_dd_round_simd(double %a) {
+; CHECK-SD-LABEL: fcvtzu_dd_round_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs d0, d0
+; CHECK-SD-NEXT:    ret
+  %r = call double @trunc(double %a) nounwind readnone
+  %i = fptosi double %r to i64
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+
+;
+; FPTOI saturating
+;
+
+define float @fcvtzs_sh_sat_simd(half %a) {
+; CHECK-SD-LABEL: fcvtzs_sh_sat_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs s0, h0
+; CHECK-SD-NEXT:    ret
+  %i = call i32 @llvm.fptosi.sat.i32.f16(half %a)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtzs_dh_sat_simd(half %a) {
+; CHECK-SD-LABEL: fcvtzs_dh_sat_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs d0, h0
+; CHECK-SD-NEXT:    ret
+  %i = call i64 @llvm.fptosi.sat.i64.f16(half %a)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define double @fcvtzs_ds_sat_simd(float %a) {
+; CHECK-SD-LABEL: fcvtzs_ds_sat_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs d0, s0
+; CHECK-SD-NEXT:    ret
+  %i = call i64 @llvm.fptosi.sat.i64.f32(float %a)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtzs_sd_sat_simd(double %a) {
+; CHECK-SD-LABEL: fcvtzs_sd_sat_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs s0, d0
+; CHECK-SD-NEXT:    ret
+  %i = call i32 @llvm.fptosi.sat.i32.f64(double %a)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define float @fcvtzs_ss_sat_simd(float %a) {
+; CHECK-SD-LABEL: fcvtzs_ss_sat_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs s0, s0
+; CHECK-SD-NEXT:    ret
+  %i = call i32 @llvm.fptosi.sat.i32.f32(float %a)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtzs_dd_sat_simd(double %a) {
+; CHECK-SD-LABEL: fcvtzs_dd_sat_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs d0, d0
+; CHECK-SD-NEXT:    ret
+  %i = call i64 @llvm.fptosi.sat.i64.f64(double %a)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtzu_sh_sat_simd(half %a) {
+; CHECK-SD-LABEL: fcvtzu_sh_sat_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu s0, h0
+; CHECK-SD-NEXT:    ret
+  %i = call i32 @llvm.fptoui.sat.i32.f16(half %a)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtzu_dh_sat_simd(half %a) {
+; CHECK-SD-LABEL: fcvtzu_dh_sat_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu d0, h0
+; CHECK-SD-NEXT:    ret
+  %i = call i64 @llvm.fptoui.sat.i64.f16(half %a)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define double @fcvtzu_ds_sat_simd(float %a) {
+; CHECK-SD-LABEL: fcvtzu_ds_sat_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu d0, s0
+; CHECK-SD-NEXT:    ret
+  %i = call i64 @llvm.fptoui.sat.i64.f32(float %a)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtzu_sd_sat_simd(double %a) {
+; CHECK-SD-LABEL: fcvtzu_sd_sat_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu s0, d0
+; CHECK-SD-NEXT:    ret
+  %i = call i32 @llvm.fptoui.sat.i32.f64(double %a)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define float @fcvtzu_ss_sat_simd(float %a) {
+; CHECK-SD-LABEL: fcvtzu_ss_sat_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs s0, s0
+; CHECK-SD-NEXT:    ret
+  %i = call i32 @llvm.fptosi.sat.i32.f32(float %a)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtzu_dd_sat_simd(double %a) {
+; CHECK-SD-LABEL: fcvtzu_dd_sat_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs d0, d0
+; CHECK-SD-NEXT:    ret
+  %i = call i64 @llvm.fptosi.sat.i64.f64(double %a)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+;
+; FPTOI saturating with rounding
+;
+
+define float @fcvtas_sh_simd(half %a) {
+; CHECK-SD-LABEL: fcvtas_sh_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtas s0, h0
+; CHECK-SD-NEXT:    ret
+  %r = call half @llvm.round.f16(half %a) nounwind readnone
+  %i = call i32 @llvm.fptosi.sat.i32.f16(half %r)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtas_dh_simd(half %a) {
+; CHECK-SD-LABEL: fcvtas_dh_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtas d0, h0
+; CHECK-SD-NEXT:    ret
+  %r = call half @llvm.round.f16(half %a) nounwind readnone
+  %i = call i64 @llvm.fptosi.sat.i64.f16(half %r)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define double @fcvtas_ds_simd(float %a) {
+; CHECK-SD-LABEL: fcvtas_ds_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtas d0, s0
+; CHECK-SD-NEXT:    ret
+  %r = call float @roundf(float %a) nounwind readnone
+  %i = call i64 @llvm.fptosi.sat.i64.f32(float %r)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtas_sd_simd(double %a) {
+; CHECK-SD-LABEL: fcvtas_sd_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtas s0, d0
+; CHECK-SD-NEXT:    ret
+  %r = call double @round(double %a) nounwind readnone
+  %i = call i32 @llvm.fptosi.sat.i32.f64(double %r)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define float @fcvtas_ss_simd(float %a) {
+; CHECK-SD-LABEL: fcvtas_ss_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtas s0, s0
+; CHECK-SD-NEXT:    ret
+  %r = call float @roundf(float %a) nounwind readnone
+  %i = call i32 @llvm.fptosi.sat.i32.f32(float %r)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtas_dd_simd(double %a) {
+; CHECK-SD-LABEL: fcvtas_dd_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtas d0, d0
+; CHECK-SD-NEXT:    ret
+  %r = call double @round(double %a) nounwind readnone
+  %i = call i64 @llvm.fptosi.sat.i64.f64(double %r)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtau_sh_simd(half %a) {
+; CHECK-SD-LABEL: fcvtau_sh_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtau s0, h0
+; CHECK-SD-NEXT:    ret
+  %r = call half @llvm.round.f16(half %a) nounwind readnone
+  %i = call i32 @llvm.fptoui.sat.i32.f16(half %r)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtau_dh_simd(half %a) {
+; CHECK-SD-LABEL: fcvtau_dh_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtau d0, h0
+; CHECK-SD-NEXT:    ret
+  %r = call half @llvm.round.f16(half %a) nounwind readnone
+  %i = call i64 @llvm.fptoui.sat.i64.f16(half %r)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define double @fcvtau_ds_simd(float %a) {
+; CHECK-SD-LABEL: fcvtau_ds_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtau d0, s0
+; CHECK-SD-NEXT:    ret
+  %r = call float @roundf(float %a) nounwind readnone
+  %i = call i64 @llvm.fptoui.sat.i64.f32(float %r)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtau_sd_simd(double %a) {
+; CHECK-SD-LABEL: fcvtau_sd_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtau s0, d0
+; CHECK-SD-NEXT:    ret
+  %r = call double @round(double %a) nounwind readnone
+  %i = call i32 @llvm.fptoui.sat.i32.f64(double %r)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define float @fcvtau_ss_simd(float %a) {
+; CHECK-SD-LABEL: fcvtau_ss_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtas s0, s0
+; CHECK-SD-NEXT:    ret
+  %r = call float @roundf(float %a) nounwind readnone
+  %i = call i32 @llvm.fptosi.sat.i32.f32(float %r)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtau_dd_simd(double %a) {
+; CHECK-SD-LABEL: fcvtau_dd_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtas d0, d0
+; CHECK-SD-NEXT:    ret
+  %r = call double @round(double %a) nounwind readnone
+  %i = call i64 @llvm.fptosi.sat.i64.f64(double %r)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtms_sh_simd(half %a) {
+; CHECK-SD-LABEL: fcvtms_sh_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtms s0, h0
+; CHECK-SD-NEXT:    ret
+  %r = call half @llvm.floor.f16(half %a) nounwind readnone
+  %i = call i32 @llvm.fptosi.sat.i32.f16(half %r)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtms_dh_simd(half %a) {
+; CHECK-SD-LABEL: fcvtms_dh_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtms d0, h0
+; CHECK-SD-NEXT:    ret
+  %r = call half @llvm.floor.f16(half %a) nounwind readnone
+  %i = call i64 @llvm.fptosi.sat.i64.f16(half %r)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define double @fcvtms_ds_simd(float %a) {
+; CHECK-SD-LABEL: fcvtms_ds_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtms d0, s0
+; CHECK-SD-NEXT:    ret
+  %r = call float @floorf(float %a) nounwind readnone
+  %i = call i64 @llvm.fptosi.sat.i64.f32(float %r)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtms_sd_simd(double %a) {
+; CHECK-SD-LABEL: fcvtms_sd_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtms s0, d0
+; CHECK-SD-NEXT:    ret
+  %r = call double @floor(double %a) nounwind readnone
+  %i = call i32 @llvm.fptosi.sat.i32.f64(double %r)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define float @fcvtms_ss_simd(float %a) {
+; CHECK-SD-LABEL: fcvtms_ss_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtms s0, s0
+; CHECK-SD-NEXT:    ret
+  %r = call float @floorf(float %a) nounwind readnone
+  %i = call i32 @llvm.fptosi.sat.i32.f32(float %r)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtms_dd_simd(double %a) {
+; CHECK-SD-LABEL: fcvtms_dd_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtms d0, d0
+; CHECK-SD-NEXT:    ret
+  %r = call double @floor(double %a) nounwind readnone
+  %i = call i64 @llvm.fptosi.sat.i64.f64(double %r)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtmu_sh_simd(half %a) {
+; CHECK-SD-LABEL: fcvtmu_sh_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtmu s0, h0
+; CHECK-SD-NEXT:    ret
+  %r = call half @llvm.floor.f16(half %a) nounwind readnone
+  %i = call i32 @llvm.fptoui.sat.i32.f16(half %r)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtmu_dh_simd(half %a) {
+; CHECK-SD-LABEL: fcvtmu_dh_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtmu d0, h0
+; CHECK-SD-NEXT:    ret
+  %r = call half @llvm.floor.f16(half %a) nounwind readnone
+  %i = call i64 @llvm.fptoui.sat.i64.f16(half %r)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define double @fcvtmu_ds_simd(float %a) {
+; CHECK-SD-LABEL: fcvtmu_ds_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtmu d0, s0
+; CHECK-SD-NEXT:    ret
+  %r = call float @floorf(float %a) nounwind readnone
+  %i = call i64 @llvm.fptoui.sat.i64.f32(float %r)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtmu_sd_simd(double %a) {
+; CHECK-SD-LABEL: fcvtmu_sd_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtmu s0, d0
+; CHECK-SD-NEXT:    ret
+  %r = call double @floor(double %a) nounwind readnone
+  %i = call i32 @llvm.fptoui.sat.i32.f64(double %r)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define float @fcvtmu_ss_simd(float %a) {
+; CHECK-SD-LABEL: fcvtmu_ss_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtms s0, s0
+; CHECK-SD-NEXT:    ret
+  %r = call float @floorf(float %a) nounwind readnone
+  %i = call i32 @llvm.fptosi.sat.i32.f32(float %r)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtmu_dd_simd(double %a) {
+; CHECK-SD-LABEL: fcvtmu_dd_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtms d0, d0
+; CHECK-SD-NEXT:    ret
+  %r = call double @floor(double %a) nounwind readnone
+  %i = call i64 @llvm.fptosi.sat.i64.f64(double %r)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtps_sh_simd(half %a) {
+; CHECK-SD-LABEL: fcvtps_sh_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtps s0, h0
+; CHECK-SD-NEXT:    ret
+  %r = call half @llvm.ceil.f16(half %a) nounwind readnone
+  %i = call i32 @llvm.fptosi.sat.i32.f16(half %r)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtps_dh_simd(half %a) {
+; CHECK-SD-LABEL: fcvtps_dh_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtps d0, h0
+; CHECK-SD-NEXT:    ret
+  %r = call half @llvm.ceil.f16(half %a) nounwind readnone
+  %i = call i64 @llvm.fptosi.sat.i64.f16(half %r)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define double @fcvtps_ds_simd(float %a) {
+; CHECK-SD-LABEL: fcvtps_ds_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtps d0, s0
+; CHECK-SD-NEXT:    ret
+  %r = call float @ceilf(float %a) nounwind readnone
+  %i = call i64 @llvm.fptosi.sat.i64.f32(float %r)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtps_sd_simd(double %a) {
+; CHECK-SD-LABEL: fcvtps_sd_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtps s0, d0
+; CHECK-SD-NEXT:    ret
+  %r = call double @ceil(double %a) nounwind readnone
+  %i = call i32 @llvm.fptosi.sat.i32.f64(double %r)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define float @fcvtps_ss_simd(float %a) {
+; CHECK-SD-LABEL: fcvtps_ss_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtps s0, s0
+; CHECK-SD-NEXT:    ret
+  %r = call float @ceilf(float %a) nounwind readnone
+  %i = call i32 @llvm.fptosi.sat.i32.f32(float %r)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtps_dd_simd(double %a) {
+; CHECK-SD-LABEL: fcvtps_dd_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtps d0, d0
+; CHECK-SD-NEXT:    ret
+  %r = call double @ceil(double %a) nounwind readnone
+  %i = call i64 @llvm.fptosi.sat.i64.f64(double %r)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtpu_sh_simd(half %a) {
+; CHECK-SD-LABEL: fcvtpu_sh_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtpu s0, h0
+; CHECK-SD-NEXT:    ret
+  %r = call half @llvm.ceil.f16(half %a) nounwind readnone
+  %i = call i32 @llvm.fptoui.sat.i32.f16(half %r)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtpu_dh_simd(half %a) {
+; CHECK-SD-LABEL: fcvtpu_dh_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtpu d0, h0
+; CHECK-SD-NEXT:    ret
+  %r = call half @llvm.ceil.f16(half %a) nounwind readnone
+  %i = call i64 @llvm.fptoui.sat.i64.f16(half %r)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define double @fcvtpu_ds_simd(float %a) {
+; CHECK-SD-LABEL: fcvtpu_ds_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtpu d0, s0
+; CHECK-SD-NEXT:    ret
+  %r = call float @ceilf(float %a) nounwind readnone
+  %i = call i64 @llvm.fptoui.sat.i64.f32(float %r)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtpu_sd_simd(double %a) {
+; CHECK-SD-LABEL: fcvtpu_sd_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtpu s0, d0
+; CHECK-SD-NEXT:    ret
+  %r = call double @ceil(double %a) nounwind readnone
+  %i = call i32 @llvm.fptoui.sat.i32.f64(double %r)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define float @fcvtpu_ss_simd(float %a) {
+; CHECK-SD-LABEL: fcvtpu_ss_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtps s0, s0
+; CHECK-SD-NEXT:    ret
+  %r = call float @ceilf(float %a) nounwind readnone
+  %i = call i32 @llvm.fptosi.sat.i32.f32(float %r)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtpu_dd_simd(double %a) {
+; CHECK-SD-LABEL: fcvtpu_dd_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtps d0, d0
+; CHECK-SD-NEXT:    ret
+  %r = call double @ceil(double %a) nounwind readnone
+  %i = call i64 @llvm.fptosi.sat.i64.f64(double %r)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtzs_sh_simd(half %a) {
+; CHECK-SD-LABEL: fcvtzs_sh_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs s0, h0
+; CHECK-SD-NEXT:    ret
+  %r = call half @llvm.trunc.f16(half %a) nounwind readnone
+  %i = call i32 @llvm.fptosi.sat.i32.f16(half %r)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtzs_dh_simd(half %a) {
+; CHECK-SD-LABEL: fcvtzs_dh_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs d0, h0
+; CHECK-SD-NEXT:    ret
+  %r = call half @llvm.trunc.f16(half %a) nounwind readnone
+  %i = call i64 @llvm.fptosi.sat.i64.f16(half %r)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define double @fcvtzs_ds_simd(float %a) {
+; CHECK-SD-LABEL: fcvtzs_ds_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs d0, s0
+; CHECK-SD-NEXT:    ret
+  %r = call float @truncf(float %a) nounwind readnone
+  %i = call i64 @llvm.fptosi.sat.i64.f32(float %r)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtzs_sd_simd(double %a) {
+; CHECK-SD-LABEL: fcvtzs_sd_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs s0, d0
+; CHECK-SD-NEXT:    ret
+  %r = call double @trunc(double %a) nounwind readnone
+  %i = call i32 @llvm.fptosi.sat.i32.f64(double %r)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define float @fcvtzs_ss_simd(float %a) {
+; CHECK-SD-LABEL: fcvtzs_ss_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs s0, s0
+; CHECK-SD-NEXT:    ret
+  %r = call float @truncf(float %a) nounwind readnone
+  %i = call i32 @llvm.fptosi.sat.i32.f32(float %r)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtzs_dd_simd(double %a) {
+; CHECK-SD-LABEL: fcvtzs_dd_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs d0, d0
+; CHECK-SD-NEXT:    ret
+  %r = call double @trunc(double %a) nounwind readnone
+  %i = call i64 @llvm.fptosi.sat.i64.f64(double %r)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtzu_sh_simd(half %a) {
+; CHECK-SD-LABEL: fcvtzu_sh_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu s0, h0
+; CHECK-SD-NEXT:    ret
+  %r = call half @llvm.trunc.f16(half %a) nounwind readnone
+  %i = call i32 @llvm.fptoui.sat.i32.f16(half %r)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtzu_dh_simd(half %a) {
+; CHECK-SD-LABEL: fcvtzu_dh_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu d0, h0
+; CHECK-SD-NEXT:    ret
+  %r = call half @llvm.trunc.f16(half %a) nounwind readnone
+  %i = call i64 @llvm.fptoui.sat.i64.f16(half %r)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define double @fcvtzu_ds_simd(float %a) {
+; CHECK-SD-LABEL: fcvtzu_ds_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu d0, s0
+; CHECK-SD-NEXT:    ret
+  %r = call float @truncf(float %a) nounwind readnone
+  %i = call i64 @llvm.fptoui.sat.i64.f32(float %r)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtzu_sd_simd(double %a) {
+; CHECK-SD-LABEL: fcvtzu_sd_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu s0, d0
+; CHECK-SD-NEXT:    ret
+  %r = call double @trunc(double %a) nounwind readnone
+  %i = call i32 @llvm.fptoui.sat.i32.f64(double %r)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define float @fcvtzu_ss_simd(float %a) {
+; CHECK-SD-LABEL: fcvtzu_ss_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu s0, s0
+; CHECK-SD-NEXT:    ret
+  %r = call float @truncf(float %a) nounwind readnone
+  %i = call i32 @llvm.fptoui.sat.i32.f32(float %r)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtzu_dd_simd(double %a) {
+; CHECK-SD-LABEL: fcvtzu_dd_simd:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu d0, d0
+; CHECK-SD-NEXT:    ret
+  %r = call double @trunc(double %a) nounwind readnone
+  %i = call i64 @llvm.fptoui.sat.i64.f64(double %r)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+declare half @llvm.floor.f16(half) nounwind readnone
+declare half @llvm.ceil.f16(half) nounwind readnone
+declare half @llvm.trunc.f16(half) nounwind readnone
+declare half @llvm.round.f16(half) nounwind readnone
+declare float @floorf(float) nounwind readnone
+declare float @ceilf(float) nounwind readnone
+declare float @truncf(float) nounwind readnone
+declare float @roundf(float) nounwind readnone
+declare double @floor(double) nounwind readnone
+declare double @ceil(double) nounwind readnone
+declare double @trunc(double) nounwind readnone
+declare double @round(double) nounwind readnone
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
+; CHECK-GI: {{.*}}
diff --git a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll
new file mode 100644
index 0000000000000..ae4f83a5bd261
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll
@@ -0,0 +1,612 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple aarch64-unknown-unknown -mattr=+fprcvt,+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc < %s -mtriple aarch64-unknown-unknown -global-isel -global-isel-abort=2 -mattr=+fprcvt,+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+
+
+;
+; Intriniscs
+;
+
+define float @fcvtas_1s1d_simd(double %A) nounwind {
+; CHECK-LABEL: fcvtas_1s1d_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas s0, d0
+; CHECK-NEXT:    ret
+  %i = call i32 @llvm.aarch64.neon.fcvtas.i32.f64(double %A)
+  %f = bitcast i32 %i to float
+  ret float %f
+}
+
+define double @fcvtas_1d1s_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtas_1d1s_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas d0, s0
+; CHECK-NEXT:    ret
+  %i = call i64 @llvm.aarch64.neon.fcvtas.i64.f32(float %A)
+  %d = bitcast i64 %i to double
+  ret double %d
+}
+
+define dso_local float @fcvtas_1s1h_simd(half %a) {
+; CHECK-LABEL: fcvtas_1s1h_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas s0, h0
+; CHECK-NEXT:    ret
+  %fcvt = tail call i32 @llvm.aarch64.neon.fcvtas.i32.f16(half %a)
+  %f = bitcast i32 %fcvt to float
+  ret float %f
+}
+
+define dso_local double @fcvtas_1d1h_simd(half %a) {
+; CHECK-LABEL: fcvtas_1d1h_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas d0, h0
+; CHECK-NEXT:    ret
+  %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtas.i64.f16(half %a)
+  %d = bitcast i64 %vcvtah_s64_f16 to double
+  ret double %d
+}
+
+define dso_local double @fcvtas_1d1d_simd(double %a) {
+; CHECK-LABEL: fcvtas_1d1d_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas d0, d0
+; CHECK-NEXT:    ret
+  %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtas.i64.f64(double %a)
+  %d = bitcast i64 %vcvtah_s64_f64 to double
+  ret double %d
+}
+
+define dso_local float @fcvtas_1s1s_simd(float %a) {
+; CHECK-LABEL: fcvtas_1s1s_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas s0, s0
+; CHECK-NEXT:    ret
+  %vcvtah_s32_f32 = tail call i32 @llvm.aarch64.neon.fcvtas.i32.f32(float %a)
+  %d = bitcast i32 %vcvtah_s32_f32 to float
+  ret float %d
+}
+
+
+define float @fcvtau_1s1d_simd(double %A) nounwind {
+; CHECK-LABEL: fcvtau_1s1d_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtau s0, d0
+; CHECK-NEXT:    ret
+  %i = call i32 @llvm.aarch64.neon.fcvtau.i32.f64(double %A)
+  %f = bitcast i32 %i to float
+  ret float %f
+}
+
+define double @fcvtau_1d1s_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtau_1d1s_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtau d0, s0
+; CHECK-NEXT:    ret
+  %i = call i64 @llvm.aarch64.neon.fcvtau.i64.f32(float %A)
+  %d = bitcast i64 %i to double
+  ret double %d
+}
+
+define dso_local float @fcvtau_1s1h_simd(half %a) {
+; CHECK-LABEL: fcvtau_1s1h_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtau s0, h0
+; CHECK-NEXT:    ret
+  %fcvt = tail call i32 @llvm.aarch64.neon.fcvtau.i32.f16(half %a)
+  %f = bitcast i32 %fcvt to float
+  ret float %f
+}
+
+define dso_local double @fcvtau_1d1h_simd(half %a) {
+; CHECK-LABEL: fcvtau_1d1h_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtau d0, h0
+; CHECK-NEXT:    ret
+  %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtau.i64.f16(half %a)
+  %d = bitcast i64 %vcvtah_s64_f16 to double
+  ret double %d
+}
+
+define dso_local double @fcvtau_1d1d_simd(double %a) {
+; CHECK-LABEL: fcvtau_1d1d_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtau d0, d0
+; CHECK-NEXT:    ret
+  %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtau.i64.f64(double %a)
+  %d = bitcast i64 %vcvtah_s64_f64 to double
+  ret double %d
+}
+
+define dso_local float @fcvtau_1s1s_simd(float %a) {
+; CHECK-LABEL: fcvtau_1s1s_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtau s0, s0
+; CHECK-NEXT:    ret
+  %vcvtah_s32_f32 = tail call i32 @llvm.aarch64.neon.fcvtau.i32.f32(float %a)
+  %d = bitcast i32 %vcvtah_s32_f32 to float
+  ret float %d
+}
+
+define float @fcvtms_1s1d_simd(double %A) nounwind {
+; CHECK-LABEL: fcvtms_1s1d_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtms s0, d0
+; CHECK-NEXT:    ret
+  %i = call i32 @llvm.aarch64.neon.fcvtms.i32.f64(double %A)
+  %f = bitcast i32 %i to float
+  ret float %f
+}
+
+define double @fcvtms_1d1s_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtms_1d1s_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtms d0, s0
+; CHECK-NEXT:    ret
+  %i = call i64 @llvm.aarch64.neon.fcvtms.i64.f32(float %A)
+  %d = bitcast i64 %i to double
+  ret double %d
+}
+
+define dso_local float @fcvtms_1s1h_simd(half %a) {
+; CHECK-LABEL: fcvtms_1s1h_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtms s0, h0
+; CHECK-NEXT:    ret
+  %fcvt = tail call i32 @llvm.aarch64.neon.fcvtms.i32.f16(half %a)
+  %f = bitcast i32 %fcvt to float
+  ret float %f
+}
+
+define dso_local double @fcvtms_1d1h_simd(half %a) {
+; CHECK-LABEL: fcvtms_1d1h_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtms d0, h0
+; CHECK-NEXT:    ret
+  %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtms.i64.f16(half %a)
+  %d = bitcast i64 %vcvtah_s64_f16 to double
+  ret double %d
+}
+
+define dso_local double @fcvtms_1d1d_simd(double %a) {
+; CHECK-LABEL: fcvtms_1d1d_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtms d0, d0
+; CHECK-NEXT:    ret
+  %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtms.i64.f64(double %a)
+  %d = bitcast i64 %vcvtah_s64_f64 to double
+  ret double %d
+}
+
+define dso_local float @fcvtms_1s1s_simd(float %a) {
+; CHECK-LABEL: fcvtms_1s1s_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtms s0, s0
+; CHECK-NEXT:    ret
+  %vcvtah_s32_f32 = tail call i32 @llvm.aarch64.neon.fcvtms.i32.f32(float %a)
+  %d = bitcast i32 %vcvtah_s32_f32 to float
+  ret float %d
+}
+
+define float @fcvtmu_1s1d_simd(double %A) nounwind {
+; CHECK-LABEL: fcvtmu_1s1d_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtmu s0, d0
+; CHECK-NEXT:    ret
+  %i = call i32 @llvm.aarch64.neon.fcvtmu.i32.f64(double %A)
+  %f = bitcast i32 %i to float
+  ret float %f
+}
+
+define double @fcvtmu_1d1s_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtmu_1d1s_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtmu d0, s0
+; CHECK-NEXT:    ret
+  %i = call i64 @llvm.aarch64.neon.fcvtmu.i64.f32(float %A)
+  %d = bitcast i64 %i to double
+  ret double %d
+}
+
+define dso_local float @fcvtmu_1s1h_simd(half %a) {
+; CHECK-LABEL: fcvtmu_1s1h_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtmu s0, h0
+; CHECK-NEXT:    ret
+  %fcvt = tail call i32 @llvm.aarch64.neon.fcvtmu.i32.f16(half %a)
+  %f = bitcast i32 %fcvt to float
+  ret float %f
+}
+
+define dso_local double @fcvtmu_1d1h_simd(half %a) {
+; CHECK-LABEL: fcvtmu_1d1h_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtmu d0, h0
+; CHECK-NEXT:    ret
+  %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtmu.i64.f16(half %a)
+  %d = bitcast i64 %vcvtah_s64_f16 to double
+  ret double %d
+}
+
+define dso_local double @fcvtmu_1d1d_simd(double %a) {
+; CHECK-LABEL: fcvtmu_1d1d_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtmu d0, d0
+; CHECK-NEXT:    ret
+  %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtmu.i64.f64(double %a)
+  %d = bitcast i64 %vcvtah_s64_f64 to double
+  ret double %d
+}
+
+define dso_local float @fcvtmu_1s1s_simd(float %a) {
+; CHECK-LABEL: fcvtmu_1s1s_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtmu s0, s0
+; CHECK-NEXT:    ret
+  %vcvtah_s32_f32 = tail call i32 @llvm.aarch64.neon.fcvtmu.i32.f32(float %a)
+  %d = bitcast i32 %vcvtah_s32_f32 to float
+  ret float %d
+}
+
+define float @fcvtns_1s1d_simd(double %A) nounwind {
+; CHECK-LABEL: fcvtns_1s1d_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtns s0, d0
+; CHECK-NEXT:    ret
+  %i = call i32 @llvm.aarch64.neon.fcvtns.i32.f64(double %A)
+  %f = bitcast i32 %i to float
+  ret float %f
+}
+
+define double @fcvtns_1d1s_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtns_1d1s_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtns d0, s0
+; CHECK-NEXT:    ret
+  %i = call i64 @llvm.aarch64.neon.fcvtns.i64.f32(float %A)
+  %d = bitcast i64 %i to double
+  ret double %d
+}
+
+define dso_local float @fcvtns_1s1h_simd(half %a) {
+; CHECK-LABEL: fcvtns_1s1h_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtns s0, h0
+; CHECK-NEXT:    ret
+  %fcvt = tail call i32 @llvm.aarch64.neon.fcvtns.i32.f16(half %a)
+  %f = bitcast i32 %fcvt to float
+  ret float %f
+}
+
+define dso_local double @fcvtns_1d1h_simd(half %a) {
+; CHECK-LABEL: fcvtns_1d1h_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtns d0, h0
+; CHECK-NEXT:    ret
+  %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtns.i64.f16(half %a)
+  %d = bitcast i64 %vcvtah_s64_f16 to double
+  ret double %d
+}
+
+define dso_local double @fcvtns_1d1d_simd(double %a) {
+; CHECK-LABEL: fcvtns_1d1d_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtns d0, d0
+; CHECK-NEXT:    ret
+  %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtns.i64.f64(double %a)
+  %d = bitcast i64 %vcvtah_s64_f64 to double
+  ret double %d
+}
+
+define dso_local float @fcvtns_1s1s_simd(float %a) {
+; CHECK-LABEL: fcvtns_1s1s_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtns s0, s0
+; CHECK-NEXT:    ret
+  %vcvtah_s32_f32 = tail call i32 @llvm.aarch64.neon.fcvtns.i32.f32(float %a)
+  %d = bitcast i32 %vcvtah_s32_f32 to float
+  ret float %d
+}
+
+define float @fcvtnu_1s1d_simd(double %A) nounwind {
+; CHECK-LABEL: fcvtnu_1s1d_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtnu s0, d0
+; CHECK-NEXT:    ret
+  %i = call i32 @llvm.aarch64.neon.fcvtnu.i32.f64(double %A)
+  %f = bitcast i32 %i to float
+  ret float %f
+}
+
+define double @fcvtnu_1d1s_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtnu_1d1s_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtnu d0, s0
+; CHECK-NEXT:    ret
+  %i = call i64 @llvm.aarch64.neon.fcvtnu.i64.f32(float %A)
+  %d = bitcast i64 %i to double
+  ret double %d
+}
+
+define dso_local float @fcvtnu_1s1h_simd(half %a) {
+; CHECK-LABEL: fcvtnu_1s1h_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtnu s0, h0
+; CHECK-NEXT:    ret
+  %fcvt = tail call i32 @llvm.aarch64.neon.fcvtnu.i32.f16(half %a)
+  %f = bitcast i32 %fcvt to float
+  ret float %f
+}
+
+define dso_local double @fcvtnu_1d1h_simd(half %a) {
+; CHECK-LABEL: fcvtnu_1d1h_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtnu d0, h0
+; CHECK-NEXT:    ret
+  %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtnu.i64.f16(half %a)
+  %d = bitcast i64 %vcvtah_s64_f16 to double
+  ret double %d
+}
+
+define dso_local double @fcvtnu_1d1d_simd(double %a) {
+; CHECK-LABEL: fcvtnu_1d1d_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtnu d0, d0
+; CHECK-NEXT:    ret
+  %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtnu.i64.f64(double %a)
+  %d = bitcast i64 %vcvtah_s64_f64 to double
+  ret double %d
+}
+
+define dso_local float @fcvtnu_1s1s_simd(float %a) {
+; CHECK-LABEL: fcvtnu_1s1s_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtnu s0, s0
+; CHECK-NEXT:    ret
+  %vcvtah_s32_f32 = tail call i32 @llvm.aarch64.neon.fcvtnu.i32.f32(float %a)
+  %d = bitcast i32 %vcvtah_s32_f32 to float
+  ret float %d
+}
+
+define float @fcvtps_1s1d_simd(double %A) nounwind {
+; CHECK-LABEL: fcvtps_1s1d_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtps s0, d0
+; CHECK-NEXT:    ret
+  %i = call i32 @llvm.aarch64.neon.fcvtps.i32.f64(double %A)
+  %f = bitcast i32 %i to float
+  ret float %f
+}
+
+define double @fcvtps_1d1s_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtps_1d1s_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtps d0, s0
+; CHECK-NEXT:    ret
+  %i = call i64 @llvm.aarch64.neon.fcvtps.i64.f32(float %A)
+  %d = bitcast i64 %i to double
+  ret double %d
+}
+
+define dso_local float @fcvtps_1s1h_simd(half %a) {
+; CHECK-LABEL: fcvtps_1s1h_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtps s0, h0
+; CHECK-NEXT:    ret
+  %fcvt = tail call i32 @llvm.aarch64.neon.fcvtps.i32.f16(half %a)
+  %f = bitcast i32 %fcvt to float
+  ret float %f
+}
+
+define dso_local double @fcvtps_1d1h_simd(half %a) {
+; CHECK-LABEL: fcvtps_1d1h_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtps d0, h0
+; CHECK-NEXT:    ret
+  %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtps.i64.f16(half %a)
+  %d = bitcast i64 %vcvtah_s64_f16 to double
+  ret double %d
+}
+
+define dso_local double @fcvtps_1d1d_simd(double %a) {
+; CHECK-LABEL: fcvtps_1d1d_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtps d0, d0
+; CHECK-NEXT:    ret
+  %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtps.i64.f64(double %a)
+  %d = bitcast i64 %vcvtah_s64_f64 to double
+  ret double %d
+}
+
+define dso_local float @fcvtps_1s1s_simd(float %a) {
+; CHECK-LABEL: fcvtps_1s1s_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtps s0, s0
+; CHECK-NEXT:    ret
+  %vcvtah_s32_f32 = tail call i32 @llvm.aarch64.neon.fcvtps.i32.f32(float %a)
+  %d = bitcast i32 %vcvtah_s32_f32 to float
+  ret float %d
+}
+
+define float @fcvtpu_1s1d_simd(double %A) nounwind {
+; CHECK-LABEL: fcvtpu_1s1d_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtpu s0, d0
+; CHECK-NEXT:    ret
+  %i = call i32 @llvm.aarch64.neon.fcvtpu.i32.f64(double %A)
+  %f = bitcast i32 %i to float
+  ret float %f
+}
+
+define double @fcvtpu_1d1s_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtpu_1d1s_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtpu d0, s0
+; CHECK-NEXT:    ret
+  %i = call i64 @llvm.aarch64.neon.fcvtpu.i64.f32(float %A)
+  %d = bitcast i64 %i to double
+  ret double %d
+}
+
+define dso_local float @fcvtpu_1s1h_simd(half %a) {
+; CHECK-LABEL: fcvtpu_1s1h_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtpu s0, h0
+; CHECK-NEXT:    ret
+  %fcvt = tail call i32 @llvm.aarch64.neon.fcvtpu.i32.f16(half %a)
+  %f = bitcast i32 %fcvt to float
+  ret float %f
+}
+
+define dso_local double @fcvtpu_1d1h_simd(half %a) {
+; CHECK-LABEL: fcvtpu_1d1h_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtpu d0, h0
+; CHECK-NEXT:    ret
+  %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtpu.i64.f16(half %a)
+  %d = bitcast i64 %vcvtah_s64_f16 to double
+  ret double %d
+}
+
+define dso_local double @fcvtpu_1d1d_simd(double %a) {
+; CHECK-LABEL: fcvtpu_1d1d_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtpu d0, d0
+; CHECK-NEXT:    ret
+  %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtpu.i64.f64(double %a)
+  %d = bitcast i64 %vcvtah_s64_f64 to double
+  ret double %d
+}
+
+define dso_local float @fcvtpu_1s1s_simd(float %a) {
+; CHECK-LABEL: fcvtpu_1s1s_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtpu s0, s0
+; CHECK-NEXT:    ret
+  %vcvtah_s32_f32 = tail call i32 @llvm.aarch64.neon.fcvtpu.i32.f32(float %a)
+  %d = bitcast i32 %vcvtah_s32_f32 to float
+  ret float %d
+}
+
+define float @fcvtzs_1s1d_simd(double %A) nounwind {
+; CHECK-LABEL: fcvtzs_1s1d_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, d0
+; CHECK-NEXT:    ret
+  %i = call i32 @llvm.aarch64.neon.fcvtzs.i32.f64(double %A)
+  %f = bitcast i32 %i to float
+  ret float %f
+}
+
+define double @fcvtzs_1d1s_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtzs_1d1s_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, s0
+; CHECK-NEXT:    ret
+  %i = call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %A)
+  %d = bitcast i64 %i to double
+  ret double %d
+}
+
+define dso_local float @fcvtzs_1s1h_simd(half %a) {
+; CHECK-LABEL: fcvtzs_1s1h_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, h0
+; CHECK-NEXT:    ret
+  %fcvt = tail call i32 @llvm.aarch64.neon.fcvtzs.i32.f16(half %a)
+  %f = bitcast i32 %fcvt to float
+  ret float %f
+}
+
+define dso_local double @fcvtzs_1d1h_simd(half %a) {
+; CHECK-LABEL: fcvtzs_1d1h_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, h0
+; CHECK-NEXT:    ret
+  %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f16(half %a)
+  %d = bitcast i64 %vcvtah_s64_f16 to double
+  ret double %d
+}
+
+define dso_local double @fcvtzs_1d1d_simd(double %a) {
+; CHECK-LABEL: fcvtzs_1d1d_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, d0
+; CHECK-NEXT:    ret
+  %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f64(double %a)
+  %d = bitcast i64 %vcvtah_s64_f64 to double
+  ret double %d
+}
+
+define dso_local float @fcvtzs_1s1s_simd(float %a) {
+; CHECK-LABEL: fcvtzs_1s1s_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, s0
+; CHECK-NEXT:    ret
+  %vcvtah_s32_f32 = tail call i32 @llvm.aarch64.neon.fcvtzs.i32.f32(float %a)
+  %d = bitcast i32 %vcvtah_s32_f32 to float
+  ret float %d
+}
+
+define float @fcvtzu_1s1d_simd(double %A) nounwind {
+; CHECK-LABEL: fcvtzu_1s1d_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu s0, d0
+; CHECK-NEXT:    ret
+  %i = call i32 @llvm.aarch64.neon.fcvtzu.i32.f64(double %A)
+  %f = bitcast i32 %i to float
+  ret float %f
+}
+
+define double @fcvtzu_1d1s_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtzu_1d1s_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, s0
+; CHECK-NEXT:    ret
+  %i = call i64 @llvm.aarch64.neon.fcvtzu.i64.f32(float %A)
+  %d = bitcast i64 %i to double
+  ret double %d
+}
+
+define dso_local float @fcvtzu_1s1h_simd(half %a) {
+; CHECK-LABEL: fcvtzu_1s1h_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu s0, h0
+; CHECK-NEXT:    ret
+  %fcvt = tail call i32 @llvm.aarch64.neon.fcvtzu.i32.f16(half %a)
+  %f = bitcast i32 %fcvt to float
+  ret float %f
+}
+
+define dso_local double @fcvtzu_1d1h_simd(half %a) {
+; CHECK-LABEL: fcvtzu_1d1h_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, h0
+; CHECK-NEXT:    ret
+  %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtzu.i64.f16(half %a)
+  %d = bitcast i64 %vcvtah_s64_f16 to double
+  ret double %d
+}
+
+define dso_local double @fcvtzu_1d1d_simd(double %a) {
+; CHECK-LABEL: fcvtzu_1d1d_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, d0
+; CHECK-NEXT:    ret
+  %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtzu.i64.f64(double %a)
+  %d = bitcast i64 %vcvtah_s64_f64 to double
+  ret double %d
+}
+
+define dso_local float @fcvtzu_1s1s_simd(float %a) {
+; CHECK-LABEL: fcvtzu_1s1s_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu s0, s0
+; CHECK-NEXT:    ret
+  %vcvtah_s32_f32 = tail call i32 @llvm.aarch64.neon.fcvtzu.i32.f32(float %a)
+  %d = bitcast i32 %vcvtah_s32_f32 to float
+  ret float %d
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-GI: {{.*}}
+; CHECK-SD: {{.*}}
diff --git a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-round-rint.ll b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-round-rint.ll
new file mode 100644
index 0000000000000..000ff64131ccf
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-round-rint.ll
@@ -0,0 +1,428 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple aarch64-unknown-unknown -mattr=+fprcvt,+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc < %s -mtriple aarch64-unknown-unknown -global-isel -global-isel-abort=2 -mattr=+fprcvt,+fullfp16 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+
+;  CHECK-GI: warning: Instruction selection used fallback path for lround_i32_f16_simd
+;  CHECK-GI-NEXT: warning: Instruction selection used fallback path for lround_i64_f16_simd
+;  CHECK-GI-NEXT: warning: Instruction selection used fallback path for lround_i32_f64_simd
+;  CHECK-GI-NEXT: warning: Instruction selection used fallback path for lround_i32_f32_simd
+;  CHECK-GI-NEXT: warning: Instruction selection used fallback path for llround_i64_f16_simd
+;  CHECK-GI-NEXT: warning: Instruction selection used fallback path for lround_i32_f16_simd_exp
+;  CHECK-GI-NEXT: warning: Instruction selection used fallback path for lround_i64_f16_simd_exp
+;  CHECK-GI-NEXT: warning: Instruction selection used fallback path for lround_i64_f32_simd_exp
+;  CHECK-GI-NEXT: warning: Instruction selection used fallback path for lround_i32_f64_simd_exp
+;  CHECK-GI-NEXT: warning: Instruction selection used fallback path for lround_i32_f32_simd_exp
+;  CHECK-GI-NEXT: warning: Instruction selection used fallback path for lround_i64_f64_simd_exp
+;  CHECK-GI-NEXT: warning: Instruction selection used fallback path for llround_i64_f16_simd_exp
+;  CHECK-GI-NEXT: warning: Instruction selection used fallback path for llround_i64_f32_simd_exp
+;  CHECK-GI-NEXT: warning: Instruction selection used fallback path for llround_i64_f64_simd_exp
+;  CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_i32_f16_simd
+;  CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_i32_f64_simd
+;  CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_i32_f32_simd
+;  CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_i32_f16_simd_exp
+;  CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_i64_f16_simd_exp
+;  CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_i64_f32_simd_exp
+;  CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_i32_f64_simd_exp
+;  CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_i32_f32_simd_exp
+;  CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_i64_f64_simd_exp
+;  CHECK-GI-NEXT: warning: Instruction selection used fallback path for llrint_i64_f16_simd_exp
+;  CHECK-GI-NEXT: warning: Instruction selection used fallback path for llrint_i64_f32_simd_exp
+;  CHECK-GI-NEXT: warning: Instruction selection used fallback path for llrint_i64_f64_simd_exp
+
+;
+; (L/LL)Round
+;
+
+define float @lround_i32_f16_simd(half %x)  {
+; CHECK-LABEL: lround_i32_f16_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas s0, h0
+; CHECK-NEXT:    ret
+  %val = call i32 @llvm.lround.i32.f16(half %x)
+  %sum = bitcast i32 %val to float
+  ret float %sum
+}
+
+define double @lround_i64_f16_simd(half %x)  {
+; CHECK-LABEL: lround_i64_f16_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas d0, h0
+; CHECK-NEXT:    ret
+  %val = call i64 @llvm.lround.i64.f16(half %x)
+  %bc  = bitcast i64 %val to double
+  ret double %bc
+}
+
+define double @lround_i64_f32_simd(float %x)  {
+; CHECK-LABEL: lround_i64_f32_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas d0, s0
+; CHECK-NEXT:    ret
+  %val = call i64 @llvm.lround.i64.f32(float %x)
+  %bc  = bitcast i64 %val to double
+  ret double %bc
+}
+
+define float @lround_i32_f64_simd(double %x)  {
+; CHECK-LABEL: lround_i32_f64_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas s0, d0
+; CHECK-NEXT:    ret
+  %val = call i32 @llvm.lround.i32.f64(double %x)
+  %bc  = bitcast i32 %val to float
+  ret float %bc
+}
+
+define float @lround_i32_f32_simd(float %x)  {
+; CHECK-LABEL: lround_i32_f32_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas s0, s0
+; CHECK-NEXT:    ret
+  %val = call i32 @llvm.lround.i32.f32(float %x)
+  %bc  = bitcast i32 %val to float
+  ret float %bc
+}
+
+define double @lround_i64_f64_simd(double %x)  {
+; CHECK-LABEL: lround_i64_f64_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas d0, d0
+; CHECK-NEXT:    ret
+  %val = call i64 @llvm.lround.i64.f64(double %x)
+  %bc  = bitcast i64 %val to double
+  ret double %bc
+}
+
+define double @llround_i64_f16_simd(half %x)  {
+; CHECK-LABEL: llround_i64_f16_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas d0, h0
+; CHECK-NEXT:    ret
+  %val = call i64 @llvm.llround.i64.f16(half %x)
+  %sum = bitcast i64 %val to double
+  ret double %sum
+}
+
+define double @llround_i64_f32_simd(float %x)  {
+; CHECK-LABEL: llround_i64_f32_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas d0, s0
+; CHECK-NEXT:    ret
+  %val = call i64 @llvm.llround.i64.f32(float %x)
+  %bc  = bitcast i64 %val to double
+  ret double %bc
+}
+
+define double @llround_i64_f64_simd(double %x)  {
+; CHECK-LABEL: llround_i64_f64_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas d0, d0
+; CHECK-NEXT:    ret
+  %val = call i64 @llvm.llround.i64.f64(double %x)
+  %bc  = bitcast i64 %val to double
+  ret double %bc
+}
+
+
+;
+; (L/LL)Round experimental
+;
+
+define float @lround_i32_f16_simd_exp(half %x)  {
+; CHECK-LABEL: lround_i32_f16_simd_exp:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas s0, h0
+; CHECK-NEXT:    ret
+  %val = call i32 @llvm.experimental.constrained.lround.i32.f16(half %x, metadata !"fpexcept.strict")
+  %sum = bitcast i32 %val to float
+  ret float %sum
+}
+
+define double @lround_i64_f16_simd_exp(half %x)  {
+; CHECK-LABEL: lround_i64_f16_simd_exp:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas d0, h0
+; CHECK-NEXT:    ret
+  %val = call i64 @llvm.experimental.constrained.lround.i64.f16(half %x, metadata !"fpexcept.strict")
+  %bc  = bitcast i64 %val to double
+  ret double %bc
+}
+
+define double @lround_i64_f32_simd_exp(float %x)  {
+; CHECK-LABEL: lround_i64_f32_simd_exp:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas d0, s0
+; CHECK-NEXT:    ret
+  %val = call i64 @llvm.experimental.constrained.lround.i64.f32(float %x, metadata !"fpexcept.strict")
+  %bc  = bitcast i64 %val to double
+  ret double %bc
+}
+
+define float @lround_i32_f64_simd_exp(double %x)  {
+; CHECK-LABEL: lround_i32_f64_simd_exp:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas s0, d0
+; CHECK-NEXT:    ret
+  %val = call i32 @llvm.experimental.constrained.lround.i32.f64(double %x, metadata !"fpexcept.strict")
+  %bc  = bitcast i32 %val to float
+  ret float %bc
+}
+
+define float @lround_i32_f32_simd_exp(float %x)  {
+; CHECK-LABEL: lround_i32_f32_simd_exp:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas s0, s0
+; CHECK-NEXT:    ret
+  %val = call i32 @llvm.experimental.constrained.lround.i32.f32(float %x, metadata !"fpexcept.strict")
+  %bc  = bitcast i32 %val to float
+  ret float %bc
+}
+
+define double @lround_i64_f64_simd_exp(double %x)  {
+; CHECK-LABEL: lround_i64_f64_simd_exp:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas d0, d0
+; CHECK-NEXT:    ret
+  %val = call i64 @llvm.experimental.constrained.lround.i64.f64(double %x, metadata !"fpexcept.strict")
+  %bc  = bitcast i64 %val to double
+  ret double %bc
+}
+
+define double @llround_i64_f16_simd_exp(half %x)  {
+; CHECK-LABEL: llround_i64_f16_simd_exp:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas d0, h0
+; CHECK-NEXT:    ret
+  %val = call i64 @llvm.experimental.constrained.llround.i64.f16(half %x, metadata !"fpexcept.strict")
+  %sum = bitcast i64 %val to double
+  ret double %sum
+}
+
+define double @llround_i64_f32_simd_exp(float %x)  {
+; CHECK-LABEL: llround_i64_f32_simd_exp:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas d0, s0
+; CHECK-NEXT:    ret
+  %val = call i64 @llvm.experimental.constrained.llround.i64.f32(float %x, metadata !"fpexcept.strict")
+  %bc  = bitcast i64 %val to double
+  ret double %bc
+}
+
+define double @llround_i64_f64_simd_exp(double %x)  {
+; CHECK-LABEL: llround_i64_f64_simd_exp:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas d0, d0
+; CHECK-NEXT:    ret
+  %val = call i64 @llvm.experimental.constrained.llround.i64.f64(double %x, metadata !"fpexcept.strict")
+  %bc  = bitcast i64 %val to double
+  ret double %bc
+}
+
+;
+; (L/LL)Rint
+;
+
+define float @lrint_i32_f16_simd(half %x)  {
+; CHECK-LABEL: lrint_i32_f16_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintx h0, h0
+; CHECK-NEXT:    fcvtzs s0, h0
+; CHECK-NEXT:    ret
+  %val = call i32 @llvm.lrint.i32.f16(half %x)
+  %sum = bitcast i32 %val to float
+  ret float %sum
+}
+
+define double @lrint_i64_f16_simd(half %x)  {
+; CHECK-LABEL: lrint_i64_f16_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintx h0, h0
+; CHECK-NEXT:    fcvtzs d0, h0
+; CHECK-NEXT:    ret
+  %val = call i64 @llvm.lrint.i53.f16(half %x)
+  %bc  = bitcast i64 %val to double
+  ret double %bc
+}
+
+define double @lrint_i64_f32_simd(float %x)  {
+; CHECK-LABEL: lrint_i64_f32_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintx s0, s0
+; CHECK-NEXT:    fcvtzs d0, s0
+; CHECK-NEXT:    ret
+  %val = call i64 @llvm.lrint.i64.f32(float %x)
+  %bc  = bitcast i64 %val to double
+  ret double %bc
+}
+
+define float @lrint_i32_f64_simd(double %x)  {
+; CHECK-LABEL: lrint_i32_f64_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintx d0, d0
+; CHECK-NEXT:    fcvtzs s0, d0
+; CHECK-NEXT:    ret
+  %val = call i32 @llvm.lrint.i32.f64(double %x)
+  %bc  = bitcast i32 %val to float
+  ret float %bc
+}
+
+define float @lrint_i32_f32_simd(float %x)  {
+; CHECK-LABEL: lrint_i32_f32_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintx s0, s0
+; CHECK-NEXT:    fcvtzs s0, s0
+; CHECK-NEXT:    ret
+  %val = call i32 @llvm.lrint.i32.f32(float %x)
+  %bc  = bitcast i32 %val to float
+  ret float %bc
+}
+
+define double @lrint_i64_f64_simd(double %x)  {
+; CHECK-LABEL: lrint_i64_f64_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintx d0, d0
+; CHECK-NEXT:    fcvtzs d0, d0
+; CHECK-NEXT:    ret
+  %val = call i64 @llvm.lrint.i64.f64(double %x)
+  %bc  = bitcast i64 %val to double
+  ret double %bc
+}
+
+define double @llrint_i64_f16_simd(half %x)  {
+; CHECK-LABEL: llrint_i64_f16_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintx h0, h0
+; CHECK-NEXT:    fcvtzs d0, h0
+; CHECK-NEXT:    ret
+  %val = call i64 @llvm.llrint.i64.f16(half %x)
+  %sum = bitcast i64 %val to double
+  ret double %sum
+}
+
+define double @llrint_i64_f32_simd(float %x)  {
+; CHECK-LABEL: llrint_i64_f32_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintx s0, s0
+; CHECK-NEXT:    fcvtzs d0, s0
+; CHECK-NEXT:    ret
+  %val = call i64 @llvm.llrint.i64.f32(float %x)
+  %bc  = bitcast i64 %val to double
+  ret double %bc
+}
+
+define double @llrint_i64_f64_simd(double %x)  {
+; CHECK-LABEL: llrint_i64_f64_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintx d0, d0
+; CHECK-NEXT:    fcvtzs d0, d0
+; CHECK-NEXT:    ret
+  %val = call i64 @llvm.llrint.i64.f64(double %x)
+  %bc  = bitcast i64 %val to double
+  ret double %bc
+}
+
+;
+; (L/LL)Rint experimental
+;
+
+define float @lrint_i32_f16_simd_exp(half %x)  {
+; CHECK-LABEL: lrint_i32_f16_simd_exp:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintx h0, h0
+; CHECK-NEXT:    fcvtzs s0, h0
+; CHECK-NEXT:    ret
+  %val = call i32 @llvm.experimental.constrained.lrint.i32.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  %sum = bitcast i32 %val to float
+  ret float %sum
+}
+
+define double @lrint_i64_f16_simd_exp(half %x)  {
+; CHECK-LABEL: lrint_i64_f16_simd_exp:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintx h0, h0
+; CHECK-NEXT:    fcvtzs d0, h0
+; CHECK-NEXT:    ret
+  %val = call i64 @llvm.experimental.constrained.lrint.i53.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  %bc  = bitcast i64 %val to double
+  ret double %bc
+}
+
+define double @lrint_i64_f32_simd_exp(float %x)  {
+; CHECK-LABEL: lrint_i64_f32_simd_exp:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintx s0, s0
+; CHECK-NEXT:    fcvtzs d0, s0
+; CHECK-NEXT:    ret
+  %val = call i64 @llvm.experimental.constrained.lrint.i64.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  %bc  = bitcast i64 %val to double
+  ret double %bc
+}
+
+define float @lrint_i32_f64_simd_exp(double %x)  {
+; CHECK-LABEL: lrint_i32_f64_simd_exp:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintx d0, d0
+; CHECK-NEXT:    fcvtzs s0, d0
+; CHECK-NEXT:    ret
+  %val = call i32 @llvm.experimental.constrained.lrint.i32.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  %bc  = bitcast i32 %val to float
+  ret float %bc
+}
+
+define float @lrint_i32_f32_simd_exp(float %x)  {
+; CHECK-LABEL: lrint_i32_f32_simd_exp:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintx s0, s0
+; CHECK-NEXT:    fcvtzs s0, s0
+; CHECK-NEXT:    ret
+  %val = call i32 @llvm.experimental.constrained.lrint.i32.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  %bc  = bitcast i32 %val to float
+  ret float %bc
+}
+
+define double @lrint_i64_f64_simd_exp(double %x)  {
+; CHECK-LABEL: lrint_i64_f64_simd_exp:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintx d0, d0
+; CHECK-NEXT:    fcvtzs d0, d0
+; CHECK-NEXT:    ret
+  %val = call i64 @llvm.experimental.constrained.lrint.i64.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  %bc  = bitcast i64 %val to double
+  ret double %bc
+}
+
+define double @llrint_i64_f16_simd_exp(half %x)  {
+; CHECK-LABEL: llrint_i64_f16_simd_exp:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintx h0, h0
+; CHECK-NEXT:    fcvtzs d0, h0
+; CHECK-NEXT:    ret
+  %val = call i64 @llvm.experimental.constrained.llrint.i64.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  %sum = bitcast i64 %val to double
+  ret double %sum
+}
+
+define double @llrint_i64_f32_simd_exp(float %x)  {
+; CHECK-LABEL: llrint_i64_f32_simd_exp:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintx s0, s0
+; CHECK-NEXT:    fcvtzs d0, s0
+; CHECK-NEXT:    ret
+  %val = call i64 @llvm.experimental.constrained.llrint.i64.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  %bc  = bitcast i64 %val to double
+  ret double %bc
+}
+
+define double @llrint_i64_f64_simd_exp(double %x)  {
+; CHECK-LABEL: llrint_i64_f64_simd_exp:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintx d0, d0
+; CHECK-NEXT:    fcvtzs d0, d0
+; CHECK-NEXT:    ret
+  %val = call i64 @llvm.experimental.constrained.llrint.i64.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  %bc  = bitcast i64 %val to double
+  ret double %bc
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-GI: {{.*}}
+; CHECK-SD: {{.*}}
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
index 367105f783817..649071900eb0c 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
@@ -978,12 +978,18 @@ define <1 x double> @test_bitcasti64tov1f64(i64 %in) {
 }
 
 define <1 x i64> @test_bitcastv8i8tov1f64(<8 x i8> %a) #0 {
-; CHECK-LABEL: test_bitcastv8i8tov1f64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    neg v0.8b, v0.8b
-; CHECK-NEXT:    fcvtzs x8, d0
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_bitcastv8i8tov1f64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    neg v0.8b, v0.8b
+; CHECK-SD-NEXT:    fcvtzs x8, d0
+; CHECK-SD-NEXT:    fmov d0, x8
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_bitcastv8i8tov1f64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    neg v0.8b, v0.8b
+; CHECK-GI-NEXT:    fcvtzs d0, d0
+; CHECK-GI-NEXT:    ret
   %sub.i = sub <8 x i8> zeroinitializer, %a
   %1 = bitcast <8 x i8> %sub.i to <1 x double>
   %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
@@ -991,12 +997,18 @@ define <1 x i64> @test_bitcastv8i8tov1f64(<8 x i8> %a) #0 {
 }
 
 define <1 x i64> @test_bitcastv4i16tov1f64(<4 x i16> %a) #0 {
-; CHECK-LABEL: test_bitcastv4i16tov1f64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    neg v0.4h, v0.4h
-; CHECK-NEXT:    fcvtzs x8, d0
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_bitcastv4i16tov1f64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    neg v0.4h, v0.4h
+; CHECK-SD-NEXT:    fcvtzs x8, d0
+; CHECK-SD-NEXT:    fmov d0, x8
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_bitcastv4i16tov1f64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    neg v0.4h, v0.4h
+; CHECK-GI-NEXT:    fcvtzs d0, d0
+; CHECK-GI-NEXT:    ret
   %sub.i = sub <4 x i16> zeroinitializer, %a
   %1 = bitcast <4 x i16> %sub.i to <1 x double>
   %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
@@ -1004,12 +1016,18 @@ define <1 x i64> @test_bitcastv4i16tov1f64(<4 x i16> %a) #0 {
 }
 
 define <1 x i64> @test_bitcastv2i32tov1f64(<2 x i32> %a) #0 {
-; CHECK-LABEL: test_bitcastv2i32tov1f64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    neg v0.2s, v0.2s
-; CHECK-NEXT:    fcvtzs x8, d0
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_bitcastv2i32tov1f64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    neg v0.2s, v0.2s
+; CHECK-SD-NEXT:    fcvtzs x8, d0
+; CHECK-SD-NEXT:    fmov d0, x8
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_bitcastv2i32tov1f64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    neg v0.2s, v0.2s
+; CHECK-GI-NEXT:    fcvtzs d0, d0
+; CHECK-GI-NEXT:    ret
   %sub.i = sub <2 x i32> zeroinitializer, %a
   %1 = bitcast <2 x i32> %sub.i to <1 x double>
   %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
@@ -1029,8 +1047,7 @@ define <1 x i64> @test_bitcastv1i64tov1f64(<1 x i64> %a) #0 {
 ; CHECK-GI-NEXT:    fmov x8, d0
 ; CHECK-GI-NEXT:    neg x8, x8
 ; CHECK-GI-NEXT:    fmov d0, x8
-; CHECK-GI-NEXT:    fcvtzs x8, d0
-; CHECK-GI-NEXT:    fmov d0, x8
+; CHECK-GI-NEXT:    fcvtzs d0, d0
 ; CHECK-GI-NEXT:    ret
   %sub.i = sub <1 x i64> zeroinitializer, %a
   %1 = bitcast <1 x i64> %sub.i to <1 x double>
diff --git a/llvm/test/CodeGen/AArch64/arm64-vcvt.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt.ll
index be21776e26f8e..c3a95a45b7ba6 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vcvt.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vcvt.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,CHECK-SD
 ; RUN: llc < %s -mtriple=arm64-eabi -pass-remarks-missed=gisel-* \
 ; RUN: -aarch64-neon-syntax=apple -global-isel -global-isel-abort=2 2>&1 | \
-; RUN: FileCheck %s --check-prefixes=FALLBACK,CHECK
+; RUN: FileCheck %s --check-prefixes=FALLBACK,CHECK,CHECK-GI
 
 ; FALLBACK-NOT: remark{{.*}}fcvtas_2s
 define <2 x i32> @fcvtas_2s(<2 x float> %A) nounwind {
@@ -365,9 +365,12 @@ define <2 x i64> @fcvtzs_2d(<2 x double> %A) nounwind {
 define <1 x i64> @fcvtzs_1d(<1 x double> %A) nounwind {
 ;CHECK-LABEL: fcvtzs_1d:
 ;CHECK-NOT: ld1
-;CHECK: fcvtzs x8, d0
-;CHECK-NEXT: mov d0, x8
-;CHECK-NEXT: ret
+;CHECK-SD: fcvtzs x8, d0
+;CHECK-SD-NEXT: mov d0, x8
+;CHECK-SD-NEXT: ret
+
+;CHECK-GI: fcvtzs d0, d0
+;CHECK-GI-NEXT: ret
 	%tmp3 = fptosi <1 x double> %A to <1 x i64>
 	ret <1 x i64> %tmp3
 }
@@ -444,9 +447,12 @@ define <2 x i64> @fcvtzu_2d(<2 x double> %A) nounwind {
 define <1 x i64> @fcvtzu_1d(<1 x double> %A) nounwind {
 ;CHECK-LABEL: fcvtzu_1d:
 ;CHECK-NOT: ld1
-;CHECK: fcvtzu x8, d0
-;CHECK-NEXT: mov d0, x8
-;CHECK-NEXT: ret
+;CHECK-SD: fcvtzu x8, d0
+;CHECK-SD-NEXT: mov d0, x8
+;CHECK-SD-NEXT: ret
+
+;CHECK-GI: fcvtzu d0, d0
+;CHECK-GI-NEXT: ret
 	%tmp3 = fptoui <1 x double> %A to <1 x i64>
 	ret <1 x i64> %tmp3
 }
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
index c74112937ba53..b963acd8cb2a1 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
@@ -31,8 +31,7 @@ define <1 x i32> @test_signed_v1f32_v1i32(<1 x float> %f) {
 ;
 ; CHECK-GI-LABEL: test_signed_v1f32_v1i32:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    fcvtzs w8, s0
-; CHECK-GI-NEXT:    fmov s0, w8
+; CHECK-GI-NEXT:    fcvtzs s0, s0
 ; CHECK-GI-NEXT:    ret
     %x = call <1 x i32> @llvm.fptosi.sat.v1f32.v1i32(<1 x float> %f)
     ret <1 x i32> %x
@@ -1162,18 +1161,24 @@ declare <7 x i32> @llvm.fptosi.sat.v7f16.v7i32 (<7 x half>)
 declare <8 x i32> @llvm.fptosi.sat.v8f16.v8i32 (<8 x half>)
 
 define <1 x i32> @test_signed_v1f16_v1i32(<1 x half> %f) {
-; CHECK-CVT-LABEL: test_signed_v1f16_v1i32:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    fcvtzs w8, s0
-; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_signed_v1f16_v1i32:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvt s0, h0
+; CHECK-SD-CVT-NEXT:    fcvtzs w8, s0
+; CHECK-SD-CVT-NEXT:    fmov s0, w8
+; CHECK-SD-CVT-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: test_signed_v1f16_v1i32:
 ; CHECK-FP16:       // %bb.0:
 ; CHECK-FP16-NEXT:    fcvtzs w8, h0
 ; CHECK-FP16-NEXT:    fmov s0, w8
 ; CHECK-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_v1f16_v1i32:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    fcvtzs s0, s0
+; CHECK-GI-CVT-NEXT:    ret
     %x = call <1 x i32> @llvm.fptosi.sat.v1f16.v1i32(<1 x half> %f)
     ret <1 x i32> %x
 }
diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
index efe0a1bedbc9e..5a66b68af8e96 100644
--- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
@@ -31,8 +31,7 @@ define <1 x i32> @test_unsigned_v1f32_v1i32(<1 x float> %f) {
 ;
 ; CHECK-GI-LABEL: test_unsigned_v1f32_v1i32:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    fcvtzu w8, s0
-; CHECK-GI-NEXT:    fmov s0, w8
+; CHECK-GI-NEXT:    fcvtzu s0, s0
 ; CHECK-GI-NEXT:    ret
     %x = call <1 x i32> @llvm.fptoui.sat.v1f32.v1i32(<1 x float> %f)
     ret <1 x i32> %x
@@ -993,18 +992,24 @@ declare <7 x i32> @llvm.fptoui.sat.v7f16.v7i32 (<7 x half>)
 declare <8 x i32> @llvm.fptoui.sat.v8f16.v8i32 (<8 x half>)
 
 define <1 x i32> @test_unsigned_v1f16_v1i32(<1 x half> %f) {
-; CHECK-CVT-LABEL: test_unsigned_v1f16_v1i32:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    fcvtzu w8, s0
-; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_unsigned_v1f16_v1i32:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvt s0, h0
+; CHECK-SD-CVT-NEXT:    fcvtzu w8, s0
+; CHECK-SD-CVT-NEXT:    fmov s0, w8
+; CHECK-SD-CVT-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: test_unsigned_v1f16_v1i32:
 ; CHECK-FP16:       // %bb.0:
 ; CHECK-FP16-NEXT:    fcvtzu w8, h0
 ; CHECK-FP16-NEXT:    fmov s0, w8
 ; CHECK-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_v1f16_v1i32:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    fcvtzu s0, s0
+; CHECK-GI-CVT-NEXT:    ret
     %x = call <1 x i32> @llvm.fptoui.sat.v1f16.v1i32(<1 x half> %f)
     ret <1 x i32> %x
 }
diff --git a/llvm/test/CodeGen/AArch64/vector-lrint.ll b/llvm/test/CodeGen/AArch64/vector-lrint.ll
index 927c6142138b3..057a927422432 100644
--- a/llvm/test/CodeGen/AArch64/vector-lrint.ll
+++ b/llvm/test/CodeGen/AArch64/vector-lrint.ll
@@ -995,12 +995,18 @@ define <1 x iXLen> @lrint_v1f64(<1 x double> %x) nounwind {
 ; CHECK-i32-NEXT:    fmov s0, w8
 ; CHECK-i32-NEXT:    ret
 ;
-; CHECK-i64-LABEL: lrint_v1f64:
-; CHECK-i64:       // %bb.0:
-; CHECK-i64-NEXT:    frintx d0, d0
-; CHECK-i64-NEXT:    fcvtzs x8, d0
-; CHECK-i64-NEXT:    fmov d0, x8
-; CHECK-i64-NEXT:    ret
+; CHECK-i64-SD-LABEL: lrint_v1f64:
+; CHECK-i64-SD:       // %bb.0:
+; CHECK-i64-SD-NEXT:    frintx d0, d0
+; CHECK-i64-SD-NEXT:    fcvtzs x8, d0
+; CHECK-i64-SD-NEXT:    fmov d0, x8
+; CHECK-i64-SD-NEXT:    ret
+;
+; CHECK-i64-GI-LABEL: lrint_v1f64:
+; CHECK-i64-GI:       // %bb.0:
+; CHECK-i64-GI-NEXT:    frintx d0, d0
+; CHECK-i64-GI-NEXT:    fcvtzs d0, d0
+; CHECK-i64-GI-NEXT:    ret
   %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double> %x)
   ret <1 x iXLen> %a
 }

>From 81958ac1f3d7d550e62157d18d8a7dc1a5738654 Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Thu, 4 Sep 2025 14:19:57 +0000
Subject: [PATCH 2/4] Update tests

---
 .../test/CodeGen/AArch64/fptosi-sat-vector.ll | 21 +++++++------------
 .../test/CodeGen/AArch64/fptoui-sat-vector.ll | 21 +++++++------------
 2 files changed, 16 insertions(+), 26 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
index b963acd8cb2a1..c74112937ba53 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
@@ -31,7 +31,8 @@ define <1 x i32> @test_signed_v1f32_v1i32(<1 x float> %f) {
 ;
 ; CHECK-GI-LABEL: test_signed_v1f32_v1i32:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    fcvtzs s0, s0
+; CHECK-GI-NEXT:    fcvtzs w8, s0
+; CHECK-GI-NEXT:    fmov s0, w8
 ; CHECK-GI-NEXT:    ret
     %x = call <1 x i32> @llvm.fptosi.sat.v1f32.v1i32(<1 x float> %f)
     ret <1 x i32> %x
@@ -1161,24 +1162,18 @@ declare <7 x i32> @llvm.fptosi.sat.v7f16.v7i32 (<7 x half>)
 declare <8 x i32> @llvm.fptosi.sat.v8f16.v8i32 (<8 x half>)
 
 define <1 x i32> @test_signed_v1f16_v1i32(<1 x half> %f) {
-; CHECK-SD-CVT-LABEL: test_signed_v1f16_v1i32:
-; CHECK-SD-CVT:       // %bb.0:
-; CHECK-SD-CVT-NEXT:    fcvt s0, h0
-; CHECK-SD-CVT-NEXT:    fcvtzs w8, s0
-; CHECK-SD-CVT-NEXT:    fmov s0, w8
-; CHECK-SD-CVT-NEXT:    ret
+; CHECK-CVT-LABEL: test_signed_v1f16_v1i32:
+; CHECK-CVT:       // %bb.0:
+; CHECK-CVT-NEXT:    fcvt s0, h0
+; CHECK-CVT-NEXT:    fcvtzs w8, s0
+; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: test_signed_v1f16_v1i32:
 ; CHECK-FP16:       // %bb.0:
 ; CHECK-FP16-NEXT:    fcvtzs w8, h0
 ; CHECK-FP16-NEXT:    fmov s0, w8
 ; CHECK-FP16-NEXT:    ret
-;
-; CHECK-GI-CVT-LABEL: test_signed_v1f16_v1i32:
-; CHECK-GI-CVT:       // %bb.0:
-; CHECK-GI-CVT-NEXT:    fcvt s0, h0
-; CHECK-GI-CVT-NEXT:    fcvtzs s0, s0
-; CHECK-GI-CVT-NEXT:    ret
     %x = call <1 x i32> @llvm.fptosi.sat.v1f16.v1i32(<1 x half> %f)
     ret <1 x i32> %x
 }
diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
index 5a66b68af8e96..efe0a1bedbc9e 100644
--- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
@@ -31,7 +31,8 @@ define <1 x i32> @test_unsigned_v1f32_v1i32(<1 x float> %f) {
 ;
 ; CHECK-GI-LABEL: test_unsigned_v1f32_v1i32:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    fcvtzu s0, s0
+; CHECK-GI-NEXT:    fcvtzu w8, s0
+; CHECK-GI-NEXT:    fmov s0, w8
 ; CHECK-GI-NEXT:    ret
     %x = call <1 x i32> @llvm.fptoui.sat.v1f32.v1i32(<1 x float> %f)
     ret <1 x i32> %x
@@ -992,24 +993,18 @@ declare <7 x i32> @llvm.fptoui.sat.v7f16.v7i32 (<7 x half>)
 declare <8 x i32> @llvm.fptoui.sat.v8f16.v8i32 (<8 x half>)
 
 define <1 x i32> @test_unsigned_v1f16_v1i32(<1 x half> %f) {
-; CHECK-SD-CVT-LABEL: test_unsigned_v1f16_v1i32:
-; CHECK-SD-CVT:       // %bb.0:
-; CHECK-SD-CVT-NEXT:    fcvt s0, h0
-; CHECK-SD-CVT-NEXT:    fcvtzu w8, s0
-; CHECK-SD-CVT-NEXT:    fmov s0, w8
-; CHECK-SD-CVT-NEXT:    ret
+; CHECK-CVT-LABEL: test_unsigned_v1f16_v1i32:
+; CHECK-CVT:       // %bb.0:
+; CHECK-CVT-NEXT:    fcvt s0, h0
+; CHECK-CVT-NEXT:    fcvtzu w8, s0
+; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: test_unsigned_v1f16_v1i32:
 ; CHECK-FP16:       // %bb.0:
 ; CHECK-FP16-NEXT:    fcvtzu w8, h0
 ; CHECK-FP16-NEXT:    fmov s0, w8
 ; CHECK-FP16-NEXT:    ret
-;
-; CHECK-GI-CVT-LABEL: test_unsigned_v1f16_v1i32:
-; CHECK-GI-CVT:       // %bb.0:
-; CHECK-GI-CVT-NEXT:    fcvt s0, h0
-; CHECK-GI-CVT-NEXT:    fcvtzu s0, s0
-; CHECK-GI-CVT-NEXT:    ret
     %x = call <1 x i32> @llvm.fptoui.sat.v1f16.v1i32(<1 x half> %f)
     ret <1 x i32> %x
 }

>From ad701bbfbfe8273af5269e80b8a2e0019f9a7f17 Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Fri, 5 Sep 2025 14:21:23 +0000
Subject: [PATCH 3/4] Fix bitwise and error

---
 llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index c75a3c406f60d..2f607f1d8cf30 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -868,7 +868,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
       break;
     TypeSize DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
     TypeSize SrcSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, TRI);
-    if (((DstSize == SrcSize) || STI.hasFeature(AArch64::FeatureFPRCVT)) &
+    if (((DstSize == SrcSize) || STI.hasFeature(AArch64::FeatureFPRCVT)) &&
         all_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
                [&](const MachineInstr &UseMI) {
                  return onlyUsesFP(UseMI, MRI, TRI) ||
@@ -1172,7 +1172,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
       }
       TypeSize DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
       TypeSize SrcSize = getSizeInBits(MI.getOperand(2).getReg(), MRI, TRI);
-      if (((DstSize == SrcSize) || STI.hasFeature(AArch64::FeatureFPRCVT)) &
+      if (((DstSize == SrcSize) || STI.hasFeature(AArch64::FeatureFPRCVT)) &&
           all_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
                  [&](const MachineInstr &UseMI) {
                    return onlyUsesFP(UseMI, MRI, TRI) ||

>From b0ecf9b34e2b18edb229cf7cc729db4d62e376f1 Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Mon, 8 Sep 2025 16:00:50 +0000
Subject: [PATCH 4/4] Fix GlobalISel for sat fptoi and regenerate tests

---
 .../AArch64/GISel/AArch64RegisterBankInfo.cpp |   12 +-
 .../CodeGen/AArch64/arm64-cvt-simd-fptoi.ll   | 1027 ++++++++++++++---
 2 files changed, 881 insertions(+), 158 deletions(-)

diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index 2f607f1d8cf30..6e954a1f6611b 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -847,17 +847,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     break;
   }
   case TargetOpcode::G_FPTOSI_SAT:
-  case TargetOpcode::G_FPTOUI_SAT: {
-    LLT DstType = MRI.getType(MI.getOperand(0).getReg());
-    if (DstType.isVector())
-      break;
-    if (DstType == LLT::scalar(16)) {
-      OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
-      break;
-    }
-    OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
-    break;
-  }
+  case TargetOpcode::G_FPTOUI_SAT:
   case TargetOpcode::G_FPTOSI:
   case TargetOpcode::G_FPTOUI:
   case TargetOpcode::G_INTRINSIC_LRINT:
diff --git a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll
index 936bdfc164810..4a6b1f1f1d9d2 100644
--- a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll
@@ -20,50 +20,50 @@
 ;
 
 define float @test_fptosi_f16_i32_simd(half %a)  {
-; CHECK-SD-LABEL: test_fptosi_f16_i32_simd:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fcvtzs s0, h0
-; CHECK-SD-NEXT:    ret
+; CHECK-LABEL: test_fptosi_f16_i32_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, h0
+; CHECK-NEXT:    ret
   %r = fptosi half %a to i32
   %bc = bitcast i32 %r to float
   ret float %bc
 }
 
 define double @test_fptosi_f16_i64_simd(half %a)  {
-; CHECK-SD-LABEL: test_fptosi_f16_i64_simd:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fcvtzs d0, h0
-; CHECK-SD-NEXT:    ret
+; CHECK-LABEL: test_fptosi_f16_i64_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, h0
+; CHECK-NEXT:    ret
   %r = fptosi half %a to i64
   %bc = bitcast i64 %r to double
   ret double %bc
 }
 
 define float @test_fptosi_f64_i32_simd(double %a)  {
-; CHECK-SD-LABEL: test_fptosi_f64_i32_simd:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fcvtzs s0, d0
-; CHECK-SD-NEXT:    ret
+; CHECK-LABEL: test_fptosi_f64_i32_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, d0
+; CHECK-NEXT:    ret
   %r = fptosi double %a to i32
   %bc = bitcast i32 %r to float
   ret float %bc
 }
 
 define double @test_fptosi_f32_i64_simd(float %a)  {
-; CHECK-SD-LABEL: test_fptosi_f32_i64_simd:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fcvtzs d0, s0
-; CHECK-SD-NEXT:    ret
+; CHECK-LABEL: test_fptosi_f32_i64_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, s0
+; CHECK-NEXT:    ret
   %r = fptosi float %a to i64
   %bc = bitcast i64 %r to double
   ret double %bc
 }
 
 define double @test_fptosi_f64_i64_simd(double %a)  {
-; CHECK-SD-LABEL: test_fptosi_f64_i64_simd:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fcvtzs d0, d0
-; CHECK-SD-NEXT:    ret
+; CHECK-LABEL: test_fptosi_f64_i64_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, d0
+; CHECK-NEXT:    ret
   %r = fptosi double %a to i64
   %bc = bitcast i64 %r to double
   ret double %bc
@@ -71,60 +71,60 @@ define double @test_fptosi_f64_i64_simd(double %a)  {
 
 
 define float @test_fptosi_f32_i32_simd(float %a)  {
-; CHECK-SD-LABEL: test_fptosi_f32_i32_simd:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fcvtzs s0, s0
-; CHECK-SD-NEXT:    ret
+; CHECK-LABEL: test_fptosi_f32_i32_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, s0
+; CHECK-NEXT:    ret
   %r = fptosi float %a to i32
   %bc = bitcast i32 %r to float
   ret float %bc
 }
 
 define float @test_fptoui_f16_i32_simd(half %a)  {
-; CHECK-SD-LABEL: test_fptoui_f16_i32_simd:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fcvtzu s0, h0
-; CHECK-SD-NEXT:    ret
+; CHECK-LABEL: test_fptoui_f16_i32_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu s0, h0
+; CHECK-NEXT:    ret
   %r = fptoui half %a to i32
   %bc = bitcast i32 %r to float
   ret float %bc
 }
 
 define double @test_fptoui_f16_i64_simd(half %a)  {
-; CHECK-SD-LABEL: test_fptoui_f16_i64_simd:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fcvtzu d0, h0
-; CHECK-SD-NEXT:    ret
+; CHECK-LABEL: test_fptoui_f16_i64_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, h0
+; CHECK-NEXT:    ret
   %r = fptoui half %a to i64
   %bc = bitcast i64 %r to double
   ret double %bc
 }
 
 define float @test_fptoui_f64_i32_simd(double %a)  {
-; CHECK-SD-LABEL: test_fptoui_f64_i32_simd:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fcvtzu s0, d0
-; CHECK-SD-NEXT:    ret
+; CHECK-LABEL: test_fptoui_f64_i32_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu s0, d0
+; CHECK-NEXT:    ret
   %r = fptoui double %a to i32
   %bc = bitcast i32 %r to float
   ret float %bc
 }
 
 define double @test_fptoui_f32_i64_simd(float %a)  {
-; CHECK-SD-LABEL: test_fptoui_f32_i64_simd:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fcvtzu d0, s0
-; CHECK-SD-NEXT:    ret
+; CHECK-LABEL: test_fptoui_f32_i64_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, s0
+; CHECK-NEXT:    ret
   %r = fptoui float %a to i64
   %bc = bitcast i64 %r to double
   ret double %bc
 }
 
 define double @test_fptoui_f64_i64_simd(double %a)  {
-; CHECK-SD-LABEL: test_fptoui_f64_i64_simd:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fcvtzu d0, d0
-; CHECK-SD-NEXT:    ret
+; CHECK-LABEL: test_fptoui_f64_i64_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, d0
+; CHECK-NEXT:    ret
   %r = fptoui double %a to i64
   %bc = bitcast i64 %r to double
   ret double %bc
@@ -132,10 +132,10 @@ define double @test_fptoui_f64_i64_simd(double %a)  {
 
 
 define float @test_fptoui_f32_i32_simd(float %a)  {
-; CHECK-SD-LABEL: test_fptoui_f32_i32_simd:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fcvtzu s0, s0
-; CHECK-SD-NEXT:    ret
+; CHECK-LABEL: test_fptoui_f32_i32_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu s0, s0
+; CHECK-NEXT:    ret
   %r = fptoui float %a to i32
   %bc = bitcast i32 %r to float
   ret float %bc
@@ -147,60 +147,60 @@ define float @test_fptoui_f32_i32_simd(float %a)  {
 ;
 
 define float @fptosi_i32_f16_simd(half %x)  {
-; CHECK-SD-LABEL: fptosi_i32_f16_simd:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fcvtzs s0, h0
-; CHECK-SD-NEXT:    ret
+; CHECK-LABEL: fptosi_i32_f16_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, h0
+; CHECK-NEXT:    ret
   %val = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %x, metadata !"fpexcept.strict")
   %sum = bitcast i32 %val to float
   ret float %sum
 }
 
 define double @fptosi_i64_f16_simd(half %x)  {
-; CHECK-SD-LABEL: fptosi_i64_f16_simd:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fcvtzs d0, h0
-; CHECK-SD-NEXT:    ret
+; CHECK-LABEL: fptosi_i64_f16_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, h0
+; CHECK-NEXT:    ret
   %val = call i64 @llvm.experimental.constrained.fptosi.i64.f16(half %x, metadata !"fpexcept.strict")
   %sum = bitcast i64 %val to double
   ret double %sum
 }
 
 define double @fptosi_i64_f32_simd(float %x)  {
-; CHECK-SD-LABEL: fptosi_i64_f32_simd:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fcvtzs d0, s0
-; CHECK-SD-NEXT:    ret
+; CHECK-LABEL: fptosi_i64_f32_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, s0
+; CHECK-NEXT:    ret
   %val = call i64 @llvm.experimental.constrained.fptosi.i64.f32(float %x, metadata !"fpexcept.strict")
   %bc = bitcast i64 %val to double
   ret double %bc
 }
 
 define float @fptosi_i32_f64_simd(double %x)  {
-; CHECK-SD-LABEL: fptosi_i32_f64_simd:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fcvtzs s0, d0
-; CHECK-SD-NEXT:    ret
+; CHECK-LABEL: fptosi_i32_f64_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, d0
+; CHECK-NEXT:    ret
   %val = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %x, metadata !"fpexcept.strict")
   %bc = bitcast i32 %val to float
   ret float %bc
 }
 
 define double @fptosi_i64_f64_simd(double %x)  {
-; CHECK-SD-LABEL: fptosi_i64_f64_simd:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fcvtzs d0, d0
-; CHECK-SD-NEXT:    ret
+; CHECK-LABEL: fptosi_i64_f64_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, d0
+; CHECK-NEXT:    ret
   %val = call i64 @llvm.experimental.constrained.fptosi.i64.f64(double %x, metadata !"fpexcept.strict")
   %bc = bitcast i64 %val to double
   ret double %bc
 }
 
 define float @fptosi_i32_f32_simd(float %x)  {
-; CHECK-SD-LABEL: fptosi_i32_f32_simd:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fcvtzs s0, s0
-; CHECK-SD-NEXT:    ret
+; CHECK-LABEL: fptosi_i32_f32_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, s0
+; CHECK-NEXT:    ret
   %val = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %x, metadata !"fpexcept.strict")
   %bc = bitcast i32 %val to float
   ret float %bc
@@ -209,60 +209,60 @@ define float @fptosi_i32_f32_simd(float %x)  {
 
 
 define float @fptoui_i32_f16_simd(half %x)  {
-; CHECK-SD-LABEL: fptoui_i32_f16_simd:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fcvtzu s0, h0
-; CHECK-SD-NEXT:    ret
+; CHECK-LABEL: fptoui_i32_f16_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu s0, h0
+; CHECK-NEXT:    ret
   %val = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %x, metadata !"fpexcept.strict")
   %sum = bitcast i32 %val to float
   ret float %sum
 }
 
 define double @fptoui_i64_f16_simd(half %x)  {
-; CHECK-SD-LABEL: fptoui_i64_f16_simd:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fcvtzu d0, h0
-; CHECK-SD-NEXT:    ret
+; CHECK-LABEL: fptoui_i64_f16_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, h0
+; CHECK-NEXT:    ret
   %val = call i64 @llvm.experimental.constrained.fptoui.i64.f16(half %x, metadata !"fpexcept.strict")
   %sum = bitcast i64 %val to double
   ret double %sum
 }
 
 define double @fptoui_i64_f32_simd(float %x)  {
-; CHECK-SD-LABEL: fptoui_i64_f32_simd:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fcvtzu d0, s0
-; CHECK-SD-NEXT:    ret
+; CHECK-LABEL: fptoui_i64_f32_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, s0
+; CHECK-NEXT:    ret
   %val = call i64 @llvm.experimental.constrained.fptoui.i64.f32(float %x, metadata !"fpexcept.strict")
   %bc = bitcast i64 %val to double
   ret double %bc
 }
 
 define float @fptoui_i32_f64_simd(double %x)  {
-; CHECK-SD-LABEL: fptoui_i32_f64_simd:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fcvtzu s0, d0
-; CHECK-SD-NEXT:    ret
+; CHECK-LABEL: fptoui_i32_f64_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu s0, d0
+; CHECK-NEXT:    ret
   %val = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %x, metadata !"fpexcept.strict")
   %bc = bitcast i32 %val to float
   ret float %bc
 }
 
 define double @fptoui_i64_f64_simd(double %x)  {
-; CHECK-SD-LABEL: fptoui_i64_f64_simd:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fcvtzu d0, d0
-; CHECK-SD-NEXT:    ret
+; CHECK-LABEL: fptoui_i64_f64_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, d0
+; CHECK-NEXT:    ret
   %val = call i64 @llvm.experimental.constrained.fptoui.i64.f64(double %x, metadata !"fpexcept.strict")
   %bc = bitcast i64 %val to double
   ret double %bc
 }
 
 define float @fptoui_i32_f32_simd(float %x)  {
-; CHECK-SD-LABEL: fptoui_i32_f32_simd:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fcvtzu s0, s0
-; CHECK-SD-NEXT:    ret
+; CHECK-LABEL: fptoui_i32_f32_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu s0, s0
+; CHECK-NEXT:    ret
   %val = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %x, metadata !"fpexcept.strict")
   %bc = bitcast i32 %val to float
   ret float %bc
@@ -278,6 +278,16 @@ define double @fcvtas_ds_round_simd(float %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtas d0, s0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtas_ds_round_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl roundf
+; CHECK-GI-NEXT:    fcvtzs d0, s0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call float @roundf(float %a) nounwind readnone
   %i = fptosi float %r to i64
   %bc = bitcast i64 %i to double
@@ -289,6 +299,16 @@ define float @fcvtas_sd_round_simd(double %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtas s0, d0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtas_sd_round_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl round
+; CHECK-GI-NEXT:    fcvtzs s0, d0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call double @round(double %a) nounwind readnone
   %i = fptosi double %r to i32
   %bc = bitcast i32 %i to float
@@ -300,6 +320,16 @@ define float @fcvtas_ss_round_simd(float %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtas s0, s0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtas_ss_round_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl roundf
+; CHECK-GI-NEXT:    fcvtzs s0, s0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call float @roundf(float %a) nounwind readnone
   %i = fptosi float %r to i32
   %bc = bitcast i32 %i to float
@@ -311,6 +341,16 @@ define double @fcvtas_dd_round_simd(double %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtas d0, d0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtas_dd_round_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl round
+; CHECK-GI-NEXT:    fcvtzs d0, d0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call double @round(double %a) nounwind readnone
   %i = fptosi double %r to i64
   %bc = bitcast i64 %i to double
@@ -323,6 +363,16 @@ define double @fcvtau_ds_round_simd(float %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtau d0, s0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtau_ds_round_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl roundf
+; CHECK-GI-NEXT:    fcvtzu d0, s0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call float @roundf(float %a) nounwind readnone
   %i = fptoui float %r to i64
   %bc = bitcast i64 %i to double
@@ -334,6 +384,16 @@ define float @fcvtau_sd_round_simd(double %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtau s0, d0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtau_sd_round_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl round
+; CHECK-GI-NEXT:    fcvtzu s0, d0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call double @round(double %a) nounwind readnone
   %i = fptoui double %r to i32
   %bc = bitcast i32 %i to float
@@ -345,6 +405,16 @@ define float @fcvtau_ss_round_simd(float %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtas s0, s0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtau_ss_round_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl roundf
+; CHECK-GI-NEXT:    fcvtzs s0, s0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call float @roundf(float %a) nounwind readnone
   %i = fptosi float %r to i32
   %bc = bitcast i32 %i to float
@@ -356,6 +426,16 @@ define double @fcvtau_dd_round_simd(double %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtas d0, d0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtau_dd_round_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl round
+; CHECK-GI-NEXT:    fcvtzs d0, d0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call double @round(double %a) nounwind readnone
   %i = fptosi double %r to i64
   %bc = bitcast i64 %i to double
@@ -368,6 +448,16 @@ define double @fcvtms_ds_round_simd(float %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtms d0, s0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtms_ds_round_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl floorf
+; CHECK-GI-NEXT:    fcvtzs d0, s0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call float @floorf(float %a) nounwind readnone
   %i = fptosi float %r to i64
   %bc = bitcast i64 %i to double
@@ -379,6 +469,16 @@ define float @fcvtms_sd_round_simd(double %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtms s0, d0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtms_sd_round_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl floor
+; CHECK-GI-NEXT:    fcvtzs s0, d0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call double @floor(double %a) nounwind readnone
   %i = fptosi double %r to i32
   %bc = bitcast i32 %i to float
@@ -390,6 +490,16 @@ define float @fcvtms_ss_round_simd(float %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtms s0, s0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtms_ss_round_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl floorf
+; CHECK-GI-NEXT:    fcvtzs s0, s0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call float @floorf(float %a) nounwind readnone
   %i = fptosi float %r to i32
   %bc = bitcast i32 %i to float
@@ -401,6 +511,16 @@ define double @fcvtms_dd_round_simd(double %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtms d0, d0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtms_dd_round_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl floor
+; CHECK-GI-NEXT:    fcvtzs d0, d0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call double @floor(double %a) nounwind readnone
   %i = fptosi double %r to i64
   %bc = bitcast i64 %i to double
@@ -414,6 +534,16 @@ define double @fcvtmu_ds_round_simd(float %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtmu d0, s0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtmu_ds_round_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl floorf
+; CHECK-GI-NEXT:    fcvtzu d0, s0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call float @floorf(float %a) nounwind readnone
   %i = fptoui float %r to i64
   %bc = bitcast i64 %i to double
@@ -425,6 +555,16 @@ define float @fcvtmu_sd_round_simd(double %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtmu s0, d0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtmu_sd_round_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl floor
+; CHECK-GI-NEXT:    fcvtzu s0, d0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call double @floor(double %a) nounwind readnone
   %i = fptoui double %r to i32
   %bc = bitcast i32 %i to float
@@ -436,6 +576,16 @@ define float @fcvtmu_ss_round_simd(float %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtms s0, s0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtmu_ss_round_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl floorf
+; CHECK-GI-NEXT:    fcvtzs s0, s0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call float @floorf(float %a) nounwind readnone
   %i = fptosi float %r to i32
   %bc = bitcast i32 %i to float
@@ -447,6 +597,16 @@ define double @fcvtmu_dd_round_simd(double %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtms d0, d0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtmu_dd_round_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl floor
+; CHECK-GI-NEXT:    fcvtzs d0, d0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call double @floor(double %a) nounwind readnone
   %i = fptosi double %r to i64
   %bc = bitcast i64 %i to double
@@ -459,6 +619,16 @@ define double @fcvtps_ds_round_simd(float %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtps d0, s0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtps_ds_round_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl ceilf
+; CHECK-GI-NEXT:    fcvtzs d0, s0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call float @ceilf(float %a) nounwind readnone
   %i = fptosi float %r to i64
   %bc = bitcast i64 %i to double
@@ -470,6 +640,16 @@ define float @fcvtps_sd_round_simd(double %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtps s0, d0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtps_sd_round_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl ceil
+; CHECK-GI-NEXT:    fcvtzs s0, d0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call double @ceil(double %a) nounwind readnone
   %i = fptosi double %r to i32
   %bc = bitcast i32 %i to float
@@ -481,6 +661,16 @@ define float @fcvtps_ss_round_simd(float %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtps s0, s0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtps_ss_round_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl ceilf
+; CHECK-GI-NEXT:    fcvtzs s0, s0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call float @ceilf(float %a) nounwind readnone
   %i = fptosi float %r to i32
   %bc = bitcast i32 %i to float
@@ -492,6 +682,16 @@ define double @fcvtps_dd_round_simd(double %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtps d0, d0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtps_dd_round_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl ceil
+; CHECK-GI-NEXT:    fcvtzs d0, d0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call double @ceil(double %a) nounwind readnone
   %i = fptosi double %r to i64
   %bc = bitcast i64 %i to double
@@ -504,6 +704,16 @@ define double @fcvtpu_ds_round_simd(float %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtpu d0, s0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtpu_ds_round_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl ceilf
+; CHECK-GI-NEXT:    fcvtzu d0, s0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call float @ceilf(float %a) nounwind readnone
   %i = fptoui float %r to i64
   %bc = bitcast i64 %i to double
@@ -515,6 +725,16 @@ define float @fcvtpu_sd_round_simd(double %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtpu s0, d0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtpu_sd_round_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl ceil
+; CHECK-GI-NEXT:    fcvtzu s0, d0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call double @ceil(double %a) nounwind readnone
   %i = fptoui double %r to i32
   %bc = bitcast i32 %i to float
@@ -526,6 +746,16 @@ define float @fcvtpu_ss_round_simd(float %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtps s0, s0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtpu_ss_round_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl ceilf
+; CHECK-GI-NEXT:    fcvtzs s0, s0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call float @ceilf(float %a) nounwind readnone
   %i = fptosi float %r to i32
   %bc = bitcast i32 %i to float
@@ -537,6 +767,16 @@ define double @fcvtpu_dd_round_simd(double %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtps d0, d0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtpu_dd_round_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl ceil
+; CHECK-GI-NEXT:    fcvtzs d0, d0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call double @ceil(double %a) nounwind readnone
   %i = fptosi double %r to i64
   %bc = bitcast i64 %i to double
@@ -549,6 +789,16 @@ define double @fcvtzs_ds_round_simd(float %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtzs d0, s0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzs_ds_round_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl truncf
+; CHECK-GI-NEXT:    fcvtzs d0, s0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call float @truncf(float %a) nounwind readnone
   %i = fptosi float %r to i64
   %bc = bitcast i64 %i to double
@@ -560,6 +810,16 @@ define float @fcvtzs_sd_round_simd(double %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtzs s0, d0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzs_sd_round_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl trunc
+; CHECK-GI-NEXT:    fcvtzs s0, d0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call double @trunc(double %a) nounwind readnone
   %i = fptosi double %r to i32
   %bc = bitcast i32 %i to float
@@ -571,6 +831,16 @@ define float @fcvtzs_ss_round_simd(float %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtzs s0, s0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzs_ss_round_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl truncf
+; CHECK-GI-NEXT:    fcvtzs s0, s0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call float @truncf(float %a) nounwind readnone
   %i = fptosi float %r to i32
   %bc = bitcast i32 %i to float
@@ -582,6 +852,16 @@ define double @fcvtzs_dd_round_simd(double %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtzs d0, d0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzs_dd_round_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl trunc
+; CHECK-GI-NEXT:    fcvtzs d0, d0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call double @trunc(double %a) nounwind readnone
   %i = fptosi double %r to i64
   %bc = bitcast i64 %i to double
@@ -593,6 +873,16 @@ define double @fcvtzu_ds_round_simd(float %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtzu d0, s0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzu_ds_round_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl truncf
+; CHECK-GI-NEXT:    fcvtzu d0, s0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call float @truncf(float %a) nounwind readnone
   %i = fptoui float %r to i64
   %bc = bitcast i64 %i to double
@@ -604,6 +894,16 @@ define float @fcvtzu_sd_round_simd(double %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtzu s0, d0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzu_sd_round_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl trunc
+; CHECK-GI-NEXT:    fcvtzu s0, d0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call double @trunc(double %a) nounwind readnone
   %i = fptoui double %r to i32
   %bc = bitcast i32 %i to float
@@ -615,6 +915,16 @@ define float @fcvtzu_ss_round_simd(float %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtzs s0, s0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzu_ss_round_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl truncf
+; CHECK-GI-NEXT:    fcvtzs s0, s0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call float @truncf(float %a) nounwind readnone
   %i = fptosi float %r to i32
   %bc = bitcast i32 %i to float
@@ -626,6 +936,16 @@ define double @fcvtzu_dd_round_simd(double %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtzs d0, d0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzu_dd_round_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl trunc
+; CHECK-GI-NEXT:    fcvtzs d0, d0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call double @trunc(double %a) nounwind readnone
   %i = fptosi double %r to i64
   %bc = bitcast i64 %i to double
@@ -638,120 +958,120 @@ define double @fcvtzu_dd_round_simd(double %a) {
 ;
 
 define float @fcvtzs_sh_sat_simd(half %a) {
-; CHECK-SD-LABEL: fcvtzs_sh_sat_simd:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fcvtzs s0, h0
-; CHECK-SD-NEXT:    ret
+; CHECK-LABEL: fcvtzs_sh_sat_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, h0
+; CHECK-NEXT:    ret
   %i = call i32 @llvm.fptosi.sat.i32.f16(half %a)
   %bc = bitcast i32 %i to float
   ret float %bc
 }
 
 define double @fcvtzs_dh_sat_simd(half %a) {
-; CHECK-SD-LABEL: fcvtzs_dh_sat_simd:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fcvtzs d0, h0
-; CHECK-SD-NEXT:    ret
+; CHECK-LABEL: fcvtzs_dh_sat_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, h0
+; CHECK-NEXT:    ret
   %i = call i64 @llvm.fptosi.sat.i64.f16(half %a)
   %bc = bitcast i64 %i to double
   ret double %bc
 }
 
 define double @fcvtzs_ds_sat_simd(float %a) {
-; CHECK-SD-LABEL: fcvtzs_ds_sat_simd:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fcvtzs d0, s0
-; CHECK-SD-NEXT:    ret
+; CHECK-LABEL: fcvtzs_ds_sat_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, s0
+; CHECK-NEXT:    ret
   %i = call i64 @llvm.fptosi.sat.i64.f32(float %a)
   %bc = bitcast i64 %i to double
   ret double %bc
 }
 
 define float @fcvtzs_sd_sat_simd(double %a) {
-; CHECK-SD-LABEL: fcvtzs_sd_sat_simd:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fcvtzs s0, d0
-; CHECK-SD-NEXT:    ret
+; CHECK-LABEL: fcvtzs_sd_sat_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, d0
+; CHECK-NEXT:    ret
   %i = call i32 @llvm.fptosi.sat.i32.f64(double %a)
   %bc = bitcast i32 %i to float
   ret float %bc
 }
 
 define float @fcvtzs_ss_sat_simd(float %a) {
-; CHECK-SD-LABEL: fcvtzs_ss_sat_simd:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fcvtzs s0, s0
-; CHECK-SD-NEXT:    ret
+; CHECK-LABEL: fcvtzs_ss_sat_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, s0
+; CHECK-NEXT:    ret
   %i = call i32 @llvm.fptosi.sat.i32.f32(float %a)
   %bc = bitcast i32 %i to float
   ret float %bc
 }
 
 define double @fcvtzs_dd_sat_simd(double %a) {
-; CHECK-SD-LABEL: fcvtzs_dd_sat_simd:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fcvtzs d0, d0
-; CHECK-SD-NEXT:    ret
+; CHECK-LABEL: fcvtzs_dd_sat_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, d0
+; CHECK-NEXT:    ret
   %i = call i64 @llvm.fptosi.sat.i64.f64(double %a)
   %bc = bitcast i64 %i to double
   ret double %bc
 }
 
 define float @fcvtzu_sh_sat_simd(half %a) {
-; CHECK-SD-LABEL: fcvtzu_sh_sat_simd:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fcvtzu s0, h0
-; CHECK-SD-NEXT:    ret
+; CHECK-LABEL: fcvtzu_sh_sat_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu s0, h0
+; CHECK-NEXT:    ret
   %i = call i32 @llvm.fptoui.sat.i32.f16(half %a)
   %bc = bitcast i32 %i to float
   ret float %bc
 }
 
 define double @fcvtzu_dh_sat_simd(half %a) {
-; CHECK-SD-LABEL: fcvtzu_dh_sat_simd:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fcvtzu d0, h0
-; CHECK-SD-NEXT:    ret
+; CHECK-LABEL: fcvtzu_dh_sat_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, h0
+; CHECK-NEXT:    ret
   %i = call i64 @llvm.fptoui.sat.i64.f16(half %a)
   %bc = bitcast i64 %i to double
   ret double %bc
 }
 
 define double @fcvtzu_ds_sat_simd(float %a) {
-; CHECK-SD-LABEL: fcvtzu_ds_sat_simd:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fcvtzu d0, s0
-; CHECK-SD-NEXT:    ret
+; CHECK-LABEL: fcvtzu_ds_sat_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, s0
+; CHECK-NEXT:    ret
   %i = call i64 @llvm.fptoui.sat.i64.f32(float %a)
   %bc = bitcast i64 %i to double
   ret double %bc
 }
 
 define float @fcvtzu_sd_sat_simd(double %a) {
-; CHECK-SD-LABEL: fcvtzu_sd_sat_simd:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fcvtzu s0, d0
-; CHECK-SD-NEXT:    ret
+; CHECK-LABEL: fcvtzu_sd_sat_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu s0, d0
+; CHECK-NEXT:    ret
   %i = call i32 @llvm.fptoui.sat.i32.f64(double %a)
   %bc = bitcast i32 %i to float
   ret float %bc
 }
 
 define float @fcvtzu_ss_sat_simd(float %a) {
-; CHECK-SD-LABEL: fcvtzu_ss_sat_simd:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fcvtzs s0, s0
-; CHECK-SD-NEXT:    ret
+; CHECK-LABEL: fcvtzu_ss_sat_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, s0
+; CHECK-NEXT:    ret
   %i = call i32 @llvm.fptosi.sat.i32.f32(float %a)
   %bc = bitcast i32 %i to float
   ret float %bc
 }
 
 define double @fcvtzu_dd_sat_simd(double %a) {
-; CHECK-SD-LABEL: fcvtzu_dd_sat_simd:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fcvtzs d0, d0
-; CHECK-SD-NEXT:    ret
+; CHECK-LABEL: fcvtzu_dd_sat_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, d0
+; CHECK-NEXT:    ret
   %i = call i64 @llvm.fptosi.sat.i64.f64(double %a)
   %bc = bitcast i64 %i to double
   ret double %bc
@@ -766,6 +1086,12 @@ define float @fcvtas_sh_simd(half %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtas s0, h0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtas_sh_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    frinta h0, h0
+; CHECK-GI-NEXT:    fcvtzs s0, h0
+; CHECK-GI-NEXT:    ret
   %r = call half @llvm.round.f16(half %a) nounwind readnone
   %i = call i32 @llvm.fptosi.sat.i32.f16(half %r)
   %bc = bitcast i32 %i to float
@@ -777,6 +1103,12 @@ define double @fcvtas_dh_simd(half %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtas d0, h0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtas_dh_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    frinta h0, h0
+; CHECK-GI-NEXT:    fcvtzs d0, h0
+; CHECK-GI-NEXT:    ret
   %r = call half @llvm.round.f16(half %a) nounwind readnone
   %i = call i64 @llvm.fptosi.sat.i64.f16(half %r)
   %bc = bitcast i64 %i to double
@@ -788,6 +1120,16 @@ define double @fcvtas_ds_simd(float %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtas d0, s0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtas_ds_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl roundf
+; CHECK-GI-NEXT:    fcvtzs d0, s0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call float @roundf(float %a) nounwind readnone
   %i = call i64 @llvm.fptosi.sat.i64.f32(float %r)
   %bc = bitcast i64 %i to double
@@ -799,6 +1141,16 @@ define float @fcvtas_sd_simd(double %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtas s0, d0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtas_sd_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl round
+; CHECK-GI-NEXT:    fcvtzs s0, d0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call double @round(double %a) nounwind readnone
   %i = call i32 @llvm.fptosi.sat.i32.f64(double %r)
   %bc = bitcast i32 %i to float
@@ -810,6 +1162,16 @@ define float @fcvtas_ss_simd(float %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtas s0, s0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtas_ss_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl roundf
+; CHECK-GI-NEXT:    fcvtzs s0, s0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call float @roundf(float %a) nounwind readnone
   %i = call i32 @llvm.fptosi.sat.i32.f32(float %r)
   %bc = bitcast i32 %i to float
@@ -821,6 +1183,16 @@ define double @fcvtas_dd_simd(double %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtas d0, d0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtas_dd_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl round
+; CHECK-GI-NEXT:    fcvtzs d0, d0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call double @round(double %a) nounwind readnone
   %i = call i64 @llvm.fptosi.sat.i64.f64(double %r)
   %bc = bitcast i64 %i to double
@@ -832,6 +1204,12 @@ define float @fcvtau_sh_simd(half %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtau s0, h0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtau_sh_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    frinta h0, h0
+; CHECK-GI-NEXT:    fcvtzu s0, h0
+; CHECK-GI-NEXT:    ret
   %r = call half @llvm.round.f16(half %a) nounwind readnone
   %i = call i32 @llvm.fptoui.sat.i32.f16(half %r)
   %bc = bitcast i32 %i to float
@@ -843,6 +1221,12 @@ define double @fcvtau_dh_simd(half %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtau d0, h0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtau_dh_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    frinta h0, h0
+; CHECK-GI-NEXT:    fcvtzu d0, h0
+; CHECK-GI-NEXT:    ret
   %r = call half @llvm.round.f16(half %a) nounwind readnone
   %i = call i64 @llvm.fptoui.sat.i64.f16(half %r)
   %bc = bitcast i64 %i to double
@@ -854,6 +1238,16 @@ define double @fcvtau_ds_simd(float %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtau d0, s0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtau_ds_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl roundf
+; CHECK-GI-NEXT:    fcvtzu d0, s0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call float @roundf(float %a) nounwind readnone
   %i = call i64 @llvm.fptoui.sat.i64.f32(float %r)
   %bc = bitcast i64 %i to double
@@ -865,6 +1259,16 @@ define float @fcvtau_sd_simd(double %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtau s0, d0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtau_sd_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl round
+; CHECK-GI-NEXT:    fcvtzu s0, d0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call double @round(double %a) nounwind readnone
   %i = call i32 @llvm.fptoui.sat.i32.f64(double %r)
   %bc = bitcast i32 %i to float
@@ -876,6 +1280,16 @@ define float @fcvtau_ss_simd(float %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtas s0, s0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtau_ss_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl roundf
+; CHECK-GI-NEXT:    fcvtzs s0, s0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call float @roundf(float %a) nounwind readnone
   %i = call i32 @llvm.fptosi.sat.i32.f32(float %r)
   %bc = bitcast i32 %i to float
@@ -887,6 +1301,16 @@ define double @fcvtau_dd_simd(double %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtas d0, d0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtau_dd_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl round
+; CHECK-GI-NEXT:    fcvtzs d0, d0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call double @round(double %a) nounwind readnone
   %i = call i64 @llvm.fptosi.sat.i64.f64(double %r)
   %bc = bitcast i64 %i to double
@@ -898,6 +1322,12 @@ define float @fcvtms_sh_simd(half %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtms s0, h0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtms_sh_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    frintm h0, h0
+; CHECK-GI-NEXT:    fcvtzs s0, h0
+; CHECK-GI-NEXT:    ret
   %r = call half @llvm.floor.f16(half %a) nounwind readnone
   %i = call i32 @llvm.fptosi.sat.i32.f16(half %r)
   %bc = bitcast i32 %i to float
@@ -909,6 +1339,12 @@ define double @fcvtms_dh_simd(half %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtms d0, h0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtms_dh_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    frintm h0, h0
+; CHECK-GI-NEXT:    fcvtzs d0, h0
+; CHECK-GI-NEXT:    ret
   %r = call half @llvm.floor.f16(half %a) nounwind readnone
   %i = call i64 @llvm.fptosi.sat.i64.f16(half %r)
   %bc = bitcast i64 %i to double
@@ -920,6 +1356,16 @@ define double @fcvtms_ds_simd(float %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtms d0, s0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtms_ds_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl floorf
+; CHECK-GI-NEXT:    fcvtzs d0, s0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call float @floorf(float %a) nounwind readnone
   %i = call i64 @llvm.fptosi.sat.i64.f32(float %r)
   %bc = bitcast i64 %i to double
@@ -931,6 +1377,16 @@ define float @fcvtms_sd_simd(double %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtms s0, d0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtms_sd_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl floor
+; CHECK-GI-NEXT:    fcvtzs s0, d0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call double @floor(double %a) nounwind readnone
   %i = call i32 @llvm.fptosi.sat.i32.f64(double %r)
   %bc = bitcast i32 %i to float
@@ -942,6 +1398,16 @@ define float @fcvtms_ss_simd(float %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtms s0, s0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtms_ss_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl floorf
+; CHECK-GI-NEXT:    fcvtzs s0, s0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call float @floorf(float %a) nounwind readnone
   %i = call i32 @llvm.fptosi.sat.i32.f32(float %r)
   %bc = bitcast i32 %i to float
@@ -953,6 +1419,16 @@ define double @fcvtms_dd_simd(double %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtms d0, d0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtms_dd_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl floor
+; CHECK-GI-NEXT:    fcvtzs d0, d0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call double @floor(double %a) nounwind readnone
   %i = call i64 @llvm.fptosi.sat.i64.f64(double %r)
   %bc = bitcast i64 %i to double
@@ -964,6 +1440,12 @@ define float @fcvtmu_sh_simd(half %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtmu s0, h0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtmu_sh_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    frintm h0, h0
+; CHECK-GI-NEXT:    fcvtzu s0, h0
+; CHECK-GI-NEXT:    ret
   %r = call half @llvm.floor.f16(half %a) nounwind readnone
   %i = call i32 @llvm.fptoui.sat.i32.f16(half %r)
   %bc = bitcast i32 %i to float
@@ -975,6 +1457,12 @@ define double @fcvtmu_dh_simd(half %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtmu d0, h0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtmu_dh_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    frintm h0, h0
+; CHECK-GI-NEXT:    fcvtzu d0, h0
+; CHECK-GI-NEXT:    ret
   %r = call half @llvm.floor.f16(half %a) nounwind readnone
   %i = call i64 @llvm.fptoui.sat.i64.f16(half %r)
   %bc = bitcast i64 %i to double
@@ -986,6 +1474,16 @@ define double @fcvtmu_ds_simd(float %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtmu d0, s0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtmu_ds_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl floorf
+; CHECK-GI-NEXT:    fcvtzu d0, s0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call float @floorf(float %a) nounwind readnone
   %i = call i64 @llvm.fptoui.sat.i64.f32(float %r)
   %bc = bitcast i64 %i to double
@@ -997,6 +1495,16 @@ define float @fcvtmu_sd_simd(double %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtmu s0, d0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtmu_sd_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl floor
+; CHECK-GI-NEXT:    fcvtzu s0, d0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call double @floor(double %a) nounwind readnone
   %i = call i32 @llvm.fptoui.sat.i32.f64(double %r)
   %bc = bitcast i32 %i to float
@@ -1008,6 +1516,16 @@ define float @fcvtmu_ss_simd(float %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtms s0, s0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtmu_ss_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl floorf
+; CHECK-GI-NEXT:    fcvtzs s0, s0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call float @floorf(float %a) nounwind readnone
   %i = call i32 @llvm.fptosi.sat.i32.f32(float %r)
   %bc = bitcast i32 %i to float
@@ -1019,6 +1537,16 @@ define double @fcvtmu_dd_simd(double %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtms d0, d0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtmu_dd_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl floor
+; CHECK-GI-NEXT:    fcvtzs d0, d0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call double @floor(double %a) nounwind readnone
   %i = call i64 @llvm.fptosi.sat.i64.f64(double %r)
   %bc = bitcast i64 %i to double
@@ -1030,6 +1558,12 @@ define float @fcvtps_sh_simd(half %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtps s0, h0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtps_sh_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    frintp h0, h0
+; CHECK-GI-NEXT:    fcvtzs s0, h0
+; CHECK-GI-NEXT:    ret
   %r = call half @llvm.ceil.f16(half %a) nounwind readnone
   %i = call i32 @llvm.fptosi.sat.i32.f16(half %r)
   %bc = bitcast i32 %i to float
@@ -1041,6 +1575,12 @@ define double @fcvtps_dh_simd(half %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtps d0, h0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtps_dh_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    frintp h0, h0
+; CHECK-GI-NEXT:    fcvtzs d0, h0
+; CHECK-GI-NEXT:    ret
   %r = call half @llvm.ceil.f16(half %a) nounwind readnone
   %i = call i64 @llvm.fptosi.sat.i64.f16(half %r)
   %bc = bitcast i64 %i to double
@@ -1052,6 +1592,16 @@ define double @fcvtps_ds_simd(float %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtps d0, s0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtps_ds_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl ceilf
+; CHECK-GI-NEXT:    fcvtzs d0, s0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call float @ceilf(float %a) nounwind readnone
   %i = call i64 @llvm.fptosi.sat.i64.f32(float %r)
   %bc = bitcast i64 %i to double
@@ -1063,6 +1613,16 @@ define float @fcvtps_sd_simd(double %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtps s0, d0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtps_sd_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl ceil
+; CHECK-GI-NEXT:    fcvtzs s0, d0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call double @ceil(double %a) nounwind readnone
   %i = call i32 @llvm.fptosi.sat.i32.f64(double %r)
   %bc = bitcast i32 %i to float
@@ -1074,6 +1634,16 @@ define float @fcvtps_ss_simd(float %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtps s0, s0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtps_ss_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl ceilf
+; CHECK-GI-NEXT:    fcvtzs s0, s0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call float @ceilf(float %a) nounwind readnone
   %i = call i32 @llvm.fptosi.sat.i32.f32(float %r)
   %bc = bitcast i32 %i to float
@@ -1085,6 +1655,16 @@ define double @fcvtps_dd_simd(double %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtps d0, d0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtps_dd_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl ceil
+; CHECK-GI-NEXT:    fcvtzs d0, d0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call double @ceil(double %a) nounwind readnone
   %i = call i64 @llvm.fptosi.sat.i64.f64(double %r)
   %bc = bitcast i64 %i to double
@@ -1096,6 +1676,12 @@ define float @fcvtpu_sh_simd(half %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtpu s0, h0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtpu_sh_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    frintp h0, h0
+; CHECK-GI-NEXT:    fcvtzu s0, h0
+; CHECK-GI-NEXT:    ret
   %r = call half @llvm.ceil.f16(half %a) nounwind readnone
   %i = call i32 @llvm.fptoui.sat.i32.f16(half %r)
   %bc = bitcast i32 %i to float
@@ -1107,6 +1693,12 @@ define double @fcvtpu_dh_simd(half %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtpu d0, h0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtpu_dh_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    frintp h0, h0
+; CHECK-GI-NEXT:    fcvtzu d0, h0
+; CHECK-GI-NEXT:    ret
   %r = call half @llvm.ceil.f16(half %a) nounwind readnone
   %i = call i64 @llvm.fptoui.sat.i64.f16(half %r)
   %bc = bitcast i64 %i to double
@@ -1118,6 +1710,16 @@ define double @fcvtpu_ds_simd(float %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtpu d0, s0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtpu_ds_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl ceilf
+; CHECK-GI-NEXT:    fcvtzu d0, s0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call float @ceilf(float %a) nounwind readnone
   %i = call i64 @llvm.fptoui.sat.i64.f32(float %r)
   %bc = bitcast i64 %i to double
@@ -1129,6 +1731,16 @@ define float @fcvtpu_sd_simd(double %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtpu s0, d0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtpu_sd_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl ceil
+; CHECK-GI-NEXT:    fcvtzu s0, d0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call double @ceil(double %a) nounwind readnone
   %i = call i32 @llvm.fptoui.sat.i32.f64(double %r)
   %bc = bitcast i32 %i to float
@@ -1140,6 +1752,16 @@ define float @fcvtpu_ss_simd(float %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtps s0, s0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtpu_ss_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl ceilf
+; CHECK-GI-NEXT:    fcvtzs s0, s0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call float @ceilf(float %a) nounwind readnone
   %i = call i32 @llvm.fptosi.sat.i32.f32(float %r)
   %bc = bitcast i32 %i to float
@@ -1151,6 +1773,16 @@ define double @fcvtpu_dd_simd(double %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtps d0, d0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtpu_dd_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl ceil
+; CHECK-GI-NEXT:    fcvtzs d0, d0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call double @ceil(double %a) nounwind readnone
   %i = call i64 @llvm.fptosi.sat.i64.f64(double %r)
   %bc = bitcast i64 %i to double
@@ -1162,6 +1794,12 @@ define float @fcvtzs_sh_simd(half %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtzs s0, h0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzs_sh_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    frintz h0, h0
+; CHECK-GI-NEXT:    fcvtzs s0, h0
+; CHECK-GI-NEXT:    ret
   %r = call half @llvm.trunc.f16(half %a) nounwind readnone
   %i = call i32 @llvm.fptosi.sat.i32.f16(half %r)
   %bc = bitcast i32 %i to float
@@ -1173,6 +1811,12 @@ define double @fcvtzs_dh_simd(half %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtzs d0, h0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzs_dh_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    frintz h0, h0
+; CHECK-GI-NEXT:    fcvtzs d0, h0
+; CHECK-GI-NEXT:    ret
   %r = call half @llvm.trunc.f16(half %a) nounwind readnone
   %i = call i64 @llvm.fptosi.sat.i64.f16(half %r)
   %bc = bitcast i64 %i to double
@@ -1184,6 +1828,16 @@ define double @fcvtzs_ds_simd(float %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtzs d0, s0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzs_ds_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl truncf
+; CHECK-GI-NEXT:    fcvtzs d0, s0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call float @truncf(float %a) nounwind readnone
   %i = call i64 @llvm.fptosi.sat.i64.f32(float %r)
   %bc = bitcast i64 %i to double
@@ -1195,6 +1849,16 @@ define float @fcvtzs_sd_simd(double %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtzs s0, d0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzs_sd_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl trunc
+; CHECK-GI-NEXT:    fcvtzs s0, d0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call double @trunc(double %a) nounwind readnone
   %i = call i32 @llvm.fptosi.sat.i32.f64(double %r)
   %bc = bitcast i32 %i to float
@@ -1206,6 +1870,16 @@ define float @fcvtzs_ss_simd(float %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtzs s0, s0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzs_ss_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl truncf
+; CHECK-GI-NEXT:    fcvtzs s0, s0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call float @truncf(float %a) nounwind readnone
   %i = call i32 @llvm.fptosi.sat.i32.f32(float %r)
   %bc = bitcast i32 %i to float
@@ -1217,6 +1891,16 @@ define double @fcvtzs_dd_simd(double %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtzs d0, d0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzs_dd_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl trunc
+; CHECK-GI-NEXT:    fcvtzs d0, d0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call double @trunc(double %a) nounwind readnone
   %i = call i64 @llvm.fptosi.sat.i64.f64(double %r)
   %bc = bitcast i64 %i to double
@@ -1228,6 +1912,12 @@ define float @fcvtzu_sh_simd(half %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtzu s0, h0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzu_sh_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    frintz h0, h0
+; CHECK-GI-NEXT:    fcvtzu s0, h0
+; CHECK-GI-NEXT:    ret
   %r = call half @llvm.trunc.f16(half %a) nounwind readnone
   %i = call i32 @llvm.fptoui.sat.i32.f16(half %r)
   %bc = bitcast i32 %i to float
@@ -1239,6 +1929,12 @@ define double @fcvtzu_dh_simd(half %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtzu d0, h0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzu_dh_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    frintz h0, h0
+; CHECK-GI-NEXT:    fcvtzu d0, h0
+; CHECK-GI-NEXT:    ret
   %r = call half @llvm.trunc.f16(half %a) nounwind readnone
   %i = call i64 @llvm.fptoui.sat.i64.f16(half %r)
   %bc = bitcast i64 %i to double
@@ -1250,6 +1946,16 @@ define double @fcvtzu_ds_simd(float %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtzu d0, s0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzu_ds_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl truncf
+; CHECK-GI-NEXT:    fcvtzu d0, s0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call float @truncf(float %a) nounwind readnone
   %i = call i64 @llvm.fptoui.sat.i64.f32(float %r)
   %bc = bitcast i64 %i to double
@@ -1261,6 +1967,16 @@ define float @fcvtzu_sd_simd(double %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtzu s0, d0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzu_sd_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl trunc
+; CHECK-GI-NEXT:    fcvtzu s0, d0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call double @trunc(double %a) nounwind readnone
   %i = call i32 @llvm.fptoui.sat.i32.f64(double %r)
   %bc = bitcast i32 %i to float
@@ -1272,6 +1988,16 @@ define float @fcvtzu_ss_simd(float %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtzu s0, s0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzu_ss_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl truncf
+; CHECK-GI-NEXT:    fcvtzu s0, s0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call float @truncf(float %a) nounwind readnone
   %i = call i32 @llvm.fptoui.sat.i32.f32(float %r)
   %bc = bitcast i32 %i to float
@@ -1283,6 +2009,16 @@ define double @fcvtzu_dd_simd(double %a) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtzu d0, d0
 ; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzu_dd_simd:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl trunc
+; CHECK-GI-NEXT:    fcvtzu d0, d0
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %r = call double @trunc(double %a) nounwind readnone
   %i = call i64 @llvm.fptoui.sat.i64.f64(double %r)
   %bc = bitcast i64 %i to double
@@ -1301,6 +2037,3 @@ declare double @floor(double) nounwind readnone
 declare double @ceil(double) nounwind readnone
 declare double @trunc(double) nounwind readnone
 declare double @round(double) nounwind readnone
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK: {{.*}}
-; CHECK-GI: {{.*}}