[llvm] [AArch64][GlobalISel] Fix lowering of i64->f32 itofp. (PR #132703)

David Green via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 24 02:54:13 PDT 2025


https://github.com/davemgreen created https://github.com/llvm/llvm-project/pull/132703

This is a GISel equivalent of #130665, preventing a double-rounding issue in sitofp/uitofp by scalarizing i64->f32 converts. Most of the changes are made in the ActionDefinitionsBuilder for G_SITOFP/G_UITOFP. Because it is legal to convert i64->f16 itofp without double-rounding, but not a fpround f64->f16, that variant is lowered to build the two extends.

>From 31ba237a9aafb6af6448c51481a6ae945535aa96 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Mon, 24 Mar 2025 09:44:00 +0000
Subject: [PATCH] [AArch64][GlobalISel] Fix lowering of i64->f32 itofp.

This is a GISel equivalent of #130665, preventing a double-rounding issue in
sitofp/uitofp by scalarizing i64->f32 converts. Most of the changes are made in
the ActionDefinitionsBuilder for G_SITOFP/G_UITOFP. Because it is legal to
convert i64->f16 itofp without double-rounding, but not a fpround i64->f16,
that variant is lowered to build the two extends.
---
 .../CodeGen/GlobalISel/LegalizerHelper.cpp    |   24 +
 .../AArch64/GISel/AArch64LegalizerInfo.cpp    |   16 +-
 llvm/test/CodeGen/AArch64/itofp.ll            | 1578 ++++++++---------
 3 files changed, 736 insertions(+), 882 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index a9f80860124fb..631e872096c3d 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -7620,6 +7620,18 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) {
     return Legalized;
   }
 
+  // i64->fp16 itofp can be lowered to i64->f64,f64->f32,f32->f16. We cannot
+  // just convert fpround f64->f16 without double-rounding, so we manually
+  // perform the lowering here where we know it is valid.
+  if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64) {
+    auto M1 = MIRBuilder.buildUITOFP(SrcTy, Src);
+    LLT S32Ty = SrcTy.changeElementSize(32);
+    auto M2 = MIRBuilder.buildFPTrunc(S32Ty, M1);
+    MIRBuilder.buildFPTrunc(Dst, M2);
+    MI.eraseFromParent();
+    return Legalized;
+  }
+
   if (SrcTy != LLT::scalar(64))
     return UnableToLegalize;
 
@@ -7651,6 +7663,18 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerSITOFP(MachineInstr &MI) {
     return Legalized;
   }
 
+  // i64->fp16 itofp can be lowered to i64->f64,f64->f32,f32->f16. We cannot
+  // just convert fpround f64->f16 without double-rounding, so we manually
+  // perform the lowering here where we know it is valid.
+  if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64) {
+    auto M1 = MIRBuilder.buildSITOFP(SrcTy, Src);
+    LLT S32Ty = SrcTy.changeElementSize(32);
+    auto M2 = MIRBuilder.buildFPTrunc(S32Ty, M1);
+    MIRBuilder.buildFPTrunc(Dst, M2);
+    MI.eraseFromParent();
+    return Legalized;
+  }
+
   if (SrcTy != S64)
     return UnableToLegalize;
 
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index c36b20badfc09..4ccf3e260e991 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -917,16 +917,28 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
       .moreElementsToNextPow2(1)
       .widenScalarOrEltToNextPow2OrMinSize(1)
       .minScalar(1, s32)
+      .lowerIf([](const LegalityQuery &Query) {
+        return Query.Types[1].isVector() &&
+               Query.Types[1].getScalarSizeInBits() == 64 &&
+               Query.Types[0].getScalarSizeInBits() == 16;
+      })
       .widenScalarOrEltToNextPow2OrMinSize(0, /*MinSize=*/HasFP16 ? 16 : 32)
+      .scalarizeIf(
+          // v2i64->v2f32 needs to scalarize to avoid double-rounding issues.
+          [](const LegalityQuery &Query) {
+            return Query.Types[0].getScalarSizeInBits() == 32 &&
+                   Query.Types[1].getScalarSizeInBits() == 64;
+          },
+          0)
       .widenScalarIf(
-          [=](const LegalityQuery &Query) {
+          [](const LegalityQuery &Query) {
             return Query.Types[1].getScalarSizeInBits() <= 64 &&
                    Query.Types[0].getScalarSizeInBits() <
                        Query.Types[1].getScalarSizeInBits();
           },
           LegalizeMutations::changeElementSizeTo(0, 1))
       .widenScalarIf(
-          [=](const LegalityQuery &Query) {
+          [](const LegalityQuery &Query) {
             return Query.Types[0].getScalarSizeInBits() <= 64 &&
                    Query.Types[0].getScalarSizeInBits() >
                        Query.Types[1].getScalarSizeInBits();
diff --git a/llvm/test/CodeGen/AArch64/itofp.ll b/llvm/test/CodeGen/AArch64/itofp.ll
index 81c1a64f2d434..8bcf7c2a04ae3 100644
--- a/llvm/test/CodeGen/AArch64/itofp.ll
+++ b/llvm/test/CodeGen/AArch64/itofp.ll
@@ -4421,22 +4421,42 @@ entry:
 }
 
 define <2 x float> @stofp_v2i64_v2f32(<2 x i64> %a) {
-; CHECK-LABEL: stofp_v2i64_v2f32:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    scvtf v0.2d, v0.2d
-; CHECK-NEXT:    fcvtn v0.2s, v0.2d
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: stofp_v2i64_v2f32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    scvtf v0.2d, v0.2d
+; CHECK-SD-NEXT:    fcvtn v0.2s, v0.2d
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: stofp_v2i64_v2f32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov x8, v0.d[1]
+; CHECK-GI-NEXT:    fmov x9, d0
+; CHECK-GI-NEXT:    scvtf s0, x9
+; CHECK-GI-NEXT:    scvtf s1, x8
+; CHECK-GI-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    ret
 entry:
   %c = sitofp <2 x i64> %a to <2 x float>
   ret <2 x float> %c
 }
 
 define <2 x float> @utofp_v2i64_v2f32(<2 x i64> %a) {
-; CHECK-LABEL: utofp_v2i64_v2f32:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ucvtf v0.2d, v0.2d
-; CHECK-NEXT:    fcvtn v0.2s, v0.2d
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: utofp_v2i64_v2f32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ucvtf v0.2d, v0.2d
+; CHECK-SD-NEXT:    fcvtn v0.2s, v0.2d
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: utofp_v2i64_v2f32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov x8, v0.d[1]
+; CHECK-GI-NEXT:    fmov x9, d0
+; CHECK-GI-NEXT:    ucvtf s0, x9
+; CHECK-GI-NEXT:    ucvtf s1, x8
+; CHECK-GI-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    ret
 entry:
   %c = uitofp <2 x i64> %a to <2 x float>
   ret <2 x float> %c
@@ -4457,16 +4477,13 @@ define <3 x float> @stofp_v3i64_v3f32(<3 x i64> %a) {
 ;
 ; CHECK-GI-LABEL: stofp_v3i64_v3f32:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
-; CHECK-GI-NEXT:    scvtf v2.2d, v2.2d
-; CHECK-GI-NEXT:    scvtf v0.2d, v0.2d
-; CHECK-GI-NEXT:    fcvtn v2.2s, v2.2d
-; CHECK-GI-NEXT:    fcvtn v1.2s, v0.2d
-; CHECK-GI-NEXT:    mov v0.s[0], v1.s[0]
-; CHECK-GI-NEXT:    mov v0.s[1], v1.s[1]
+; CHECK-GI-NEXT:    fmov x8, d0
+; CHECK-GI-NEXT:    fmov x9, d1
+; CHECK-GI-NEXT:    scvtf s0, x8
+; CHECK-GI-NEXT:    scvtf s1, x9
+; CHECK-GI-NEXT:    fmov x8, d2
+; CHECK-GI-NEXT:    scvtf s2, x8
+; CHECK-GI-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-GI-NEXT:    mov v0.s[2], v2.s[0]
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -4489,16 +4506,13 @@ define <3 x float> @utofp_v3i64_v3f32(<3 x i64> %a) {
 ;
 ; CHECK-GI-LABEL: utofp_v3i64_v3f32:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
-; CHECK-GI-NEXT:    ucvtf v2.2d, v2.2d
-; CHECK-GI-NEXT:    ucvtf v0.2d, v0.2d
-; CHECK-GI-NEXT:    fcvtn v2.2s, v2.2d
-; CHECK-GI-NEXT:    fcvtn v1.2s, v0.2d
-; CHECK-GI-NEXT:    mov v0.s[0], v1.s[0]
-; CHECK-GI-NEXT:    mov v0.s[1], v1.s[1]
+; CHECK-GI-NEXT:    fmov x8, d0
+; CHECK-GI-NEXT:    fmov x9, d1
+; CHECK-GI-NEXT:    ucvtf s0, x8
+; CHECK-GI-NEXT:    ucvtf s1, x9
+; CHECK-GI-NEXT:    fmov x8, d2
+; CHECK-GI-NEXT:    ucvtf s2, x8
+; CHECK-GI-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-GI-NEXT:    mov v0.s[2], v2.s[0]
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -4507,26 +4521,56 @@ entry:
 }
 
 define <4 x float> @stofp_v4i64_v4f32(<4 x i64> %a) {
-; CHECK-LABEL: stofp_v4i64_v4f32:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    scvtf v0.2d, v0.2d
-; CHECK-NEXT:    scvtf v1.2d, v1.2d
-; CHECK-NEXT:    fcvtn v0.2s, v0.2d
-; CHECK-NEXT:    fcvtn2 v0.4s, v1.2d
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: stofp_v4i64_v4f32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    scvtf v0.2d, v0.2d
+; CHECK-SD-NEXT:    scvtf v1.2d, v1.2d
+; CHECK-SD-NEXT:    fcvtn v0.2s, v0.2d
+; CHECK-SD-NEXT:    fcvtn2 v0.4s, v1.2d
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: stofp_v4i64_v4f32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov x8, v0.d[1]
+; CHECK-GI-NEXT:    fmov x9, d0
+; CHECK-GI-NEXT:    scvtf s0, x9
+; CHECK-GI-NEXT:    mov x9, v1.d[1]
+; CHECK-GI-NEXT:    scvtf s2, x8
+; CHECK-GI-NEXT:    fmov x8, d1
+; CHECK-GI-NEXT:    scvtf s1, x8
+; CHECK-GI-NEXT:    mov v0.s[1], v2.s[0]
+; CHECK-GI-NEXT:    scvtf s2, x9
+; CHECK-GI-NEXT:    mov v0.s[2], v1.s[0]
+; CHECK-GI-NEXT:    mov v0.s[3], v2.s[0]
+; CHECK-GI-NEXT:    ret
 entry:
   %c = sitofp <4 x i64> %a to <4 x float>
   ret <4 x float> %c
 }
 
 define <4 x float> @utofp_v4i64_v4f32(<4 x i64> %a) {
-; CHECK-LABEL: utofp_v4i64_v4f32:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ucvtf v0.2d, v0.2d
-; CHECK-NEXT:    ucvtf v1.2d, v1.2d
-; CHECK-NEXT:    fcvtn v0.2s, v0.2d
-; CHECK-NEXT:    fcvtn2 v0.4s, v1.2d
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: utofp_v4i64_v4f32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ucvtf v0.2d, v0.2d
+; CHECK-SD-NEXT:    ucvtf v1.2d, v1.2d
+; CHECK-SD-NEXT:    fcvtn v0.2s, v0.2d
+; CHECK-SD-NEXT:    fcvtn2 v0.4s, v1.2d
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: utofp_v4i64_v4f32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov x8, v0.d[1]
+; CHECK-GI-NEXT:    fmov x9, d0
+; CHECK-GI-NEXT:    ucvtf s0, x9
+; CHECK-GI-NEXT:    mov x9, v1.d[1]
+; CHECK-GI-NEXT:    ucvtf s2, x8
+; CHECK-GI-NEXT:    fmov x8, d1
+; CHECK-GI-NEXT:    ucvtf s1, x8
+; CHECK-GI-NEXT:    mov v0.s[1], v2.s[0]
+; CHECK-GI-NEXT:    ucvtf s2, x9
+; CHECK-GI-NEXT:    mov v0.s[2], v1.s[0]
+; CHECK-GI-NEXT:    mov v0.s[3], v2.s[0]
+; CHECK-GI-NEXT:    ret
 entry:
   %c = uitofp <4 x i64> %a to <4 x float>
   ret <4 x float> %c
@@ -4547,14 +4591,29 @@ define <8 x float> @stofp_v8i64_v8f32(<8 x i64> %a) {
 ;
 ; CHECK-GI-LABEL: stofp_v8i64_v8f32:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    scvtf v0.2d, v0.2d
-; CHECK-GI-NEXT:    scvtf v2.2d, v2.2d
-; CHECK-GI-NEXT:    scvtf v4.2d, v1.2d
-; CHECK-GI-NEXT:    scvtf v3.2d, v3.2d
-; CHECK-GI-NEXT:    fcvtn v0.2s, v0.2d
-; CHECK-GI-NEXT:    fcvtn v1.2s, v2.2d
-; CHECK-GI-NEXT:    fcvtn2 v0.4s, v4.2d
-; CHECK-GI-NEXT:    fcvtn2 v1.4s, v3.2d
+; CHECK-GI-NEXT:    fmov x10, d0
+; CHECK-GI-NEXT:    mov x9, v2.d[1]
+; CHECK-GI-NEXT:    mov x8, v0.d[1]
+; CHECK-GI-NEXT:    scvtf s0, x10
+; CHECK-GI-NEXT:    fmov x10, d2
+; CHECK-GI-NEXT:    scvtf s5, x9
+; CHECK-GI-NEXT:    fmov x9, d1
+; CHECK-GI-NEXT:    scvtf s4, x8
+; CHECK-GI-NEXT:    mov x8, v1.d[1]
+; CHECK-GI-NEXT:    scvtf s2, x10
+; CHECK-GI-NEXT:    fmov x10, d3
+; CHECK-GI-NEXT:    scvtf s1, x9
+; CHECK-GI-NEXT:    mov x9, v3.d[1]
+; CHECK-GI-NEXT:    mov v0.s[1], v4.s[0]
+; CHECK-GI-NEXT:    scvtf s3, x10
+; CHECK-GI-NEXT:    scvtf s4, x8
+; CHECK-GI-NEXT:    mov v2.s[1], v5.s[0]
+; CHECK-GI-NEXT:    scvtf s5, x9
+; CHECK-GI-NEXT:    mov v0.s[2], v1.s[0]
+; CHECK-GI-NEXT:    mov v2.s[2], v3.s[0]
+; CHECK-GI-NEXT:    mov v0.s[3], v4.s[0]
+; CHECK-GI-NEXT:    mov v2.s[3], v5.s[0]
+; CHECK-GI-NEXT:    mov v1.16b, v2.16b
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = sitofp <8 x i64> %a to <8 x float>
@@ -4576,14 +4635,29 @@ define <8 x float> @utofp_v8i64_v8f32(<8 x i64> %a) {
 ;
 ; CHECK-GI-LABEL: utofp_v8i64_v8f32:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    ucvtf v0.2d, v0.2d
-; CHECK-GI-NEXT:    ucvtf v2.2d, v2.2d
-; CHECK-GI-NEXT:    ucvtf v4.2d, v1.2d
-; CHECK-GI-NEXT:    ucvtf v3.2d, v3.2d
-; CHECK-GI-NEXT:    fcvtn v0.2s, v0.2d
-; CHECK-GI-NEXT:    fcvtn v1.2s, v2.2d
-; CHECK-GI-NEXT:    fcvtn2 v0.4s, v4.2d
-; CHECK-GI-NEXT:    fcvtn2 v1.4s, v3.2d
+; CHECK-GI-NEXT:    fmov x10, d0
+; CHECK-GI-NEXT:    mov x9, v2.d[1]
+; CHECK-GI-NEXT:    mov x8, v0.d[1]
+; CHECK-GI-NEXT:    ucvtf s0, x10
+; CHECK-GI-NEXT:    fmov x10, d2
+; CHECK-GI-NEXT:    ucvtf s5, x9
+; CHECK-GI-NEXT:    fmov x9, d1
+; CHECK-GI-NEXT:    ucvtf s4, x8
+; CHECK-GI-NEXT:    mov x8, v1.d[1]
+; CHECK-GI-NEXT:    ucvtf s2, x10
+; CHECK-GI-NEXT:    fmov x10, d3
+; CHECK-GI-NEXT:    ucvtf s1, x9
+; CHECK-GI-NEXT:    mov x9, v3.d[1]
+; CHECK-GI-NEXT:    mov v0.s[1], v4.s[0]
+; CHECK-GI-NEXT:    ucvtf s3, x10
+; CHECK-GI-NEXT:    ucvtf s4, x8
+; CHECK-GI-NEXT:    mov v2.s[1], v5.s[0]
+; CHECK-GI-NEXT:    ucvtf s5, x9
+; CHECK-GI-NEXT:    mov v0.s[2], v1.s[0]
+; CHECK-GI-NEXT:    mov v2.s[2], v3.s[0]
+; CHECK-GI-NEXT:    mov v0.s[3], v4.s[0]
+; CHECK-GI-NEXT:    mov v2.s[3], v5.s[0]
+; CHECK-GI-NEXT:    mov v1.16b, v2.16b
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = uitofp <8 x i64> %a to <8 x float>
@@ -4591,50 +4665,146 @@ entry:
 }
 
 define <16 x float> @stofp_v16i64_v16f32(<16 x i64> %a) {
-; CHECK-LABEL: stofp_v16i64_v16f32:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    scvtf v0.2d, v0.2d
-; CHECK-NEXT:    scvtf v2.2d, v2.2d
-; CHECK-NEXT:    scvtf v4.2d, v4.2d
-; CHECK-NEXT:    scvtf v6.2d, v6.2d
-; CHECK-NEXT:    scvtf v16.2d, v1.2d
-; CHECK-NEXT:    scvtf v17.2d, v3.2d
-; CHECK-NEXT:    scvtf v5.2d, v5.2d
-; CHECK-NEXT:    scvtf v7.2d, v7.2d
-; CHECK-NEXT:    fcvtn v0.2s, v0.2d
-; CHECK-NEXT:    fcvtn v1.2s, v2.2d
-; CHECK-NEXT:    fcvtn v2.2s, v4.2d
-; CHECK-NEXT:    fcvtn v3.2s, v6.2d
-; CHECK-NEXT:    fcvtn2 v0.4s, v16.2d
-; CHECK-NEXT:    fcvtn2 v1.4s, v17.2d
-; CHECK-NEXT:    fcvtn2 v2.4s, v5.2d
-; CHECK-NEXT:    fcvtn2 v3.4s, v7.2d
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: stofp_v16i64_v16f32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    scvtf v0.2d, v0.2d
+; CHECK-SD-NEXT:    scvtf v2.2d, v2.2d
+; CHECK-SD-NEXT:    scvtf v4.2d, v4.2d
+; CHECK-SD-NEXT:    scvtf v6.2d, v6.2d
+; CHECK-SD-NEXT:    scvtf v16.2d, v1.2d
+; CHECK-SD-NEXT:    scvtf v17.2d, v3.2d
+; CHECK-SD-NEXT:    scvtf v5.2d, v5.2d
+; CHECK-SD-NEXT:    scvtf v7.2d, v7.2d
+; CHECK-SD-NEXT:    fcvtn v0.2s, v0.2d
+; CHECK-SD-NEXT:    fcvtn v1.2s, v2.2d
+; CHECK-SD-NEXT:    fcvtn v2.2s, v4.2d
+; CHECK-SD-NEXT:    fcvtn v3.2s, v6.2d
+; CHECK-SD-NEXT:    fcvtn2 v0.4s, v16.2d
+; CHECK-SD-NEXT:    fcvtn2 v1.4s, v17.2d
+; CHECK-SD-NEXT:    fcvtn2 v2.4s, v5.2d
+; CHECK-SD-NEXT:    fcvtn2 v3.4s, v7.2d
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: stofp_v16i64_v16f32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov x9, v0.d[1]
+; CHECK-GI-NEXT:    fmov x13, d2
+; CHECK-GI-NEXT:    fmov x11, d0
+; CHECK-GI-NEXT:    mov x12, v4.d[1]
+; CHECK-GI-NEXT:    mov x8, v1.d[1]
+; CHECK-GI-NEXT:    mov x10, v2.d[1]
+; CHECK-GI-NEXT:    scvtf s0, x11
+; CHECK-GI-NEXT:    mov x11, v6.d[1]
+; CHECK-GI-NEXT:    scvtf s16, x9
+; CHECK-GI-NEXT:    fmov x9, d1
+; CHECK-GI-NEXT:    scvtf s1, x13
+; CHECK-GI-NEXT:    fmov x13, d4
+; CHECK-GI-NEXT:    scvtf s4, x12
+; CHECK-GI-NEXT:    fmov x12, d6
+; CHECK-GI-NEXT:    scvtf s17, x10
+; CHECK-GI-NEXT:    mov x10, v3.d[1]
+; CHECK-GI-NEXT:    scvtf s6, x11
+; CHECK-GI-NEXT:    fmov x11, d5
+; CHECK-GI-NEXT:    scvtf s18, x9
+; CHECK-GI-NEXT:    scvtf s2, x13
+; CHECK-GI-NEXT:    fmov x13, d3
+; CHECK-GI-NEXT:    scvtf s3, x12
+; CHECK-GI-NEXT:    mov x9, v5.d[1]
+; CHECK-GI-NEXT:    mov x12, v7.d[1]
+; CHECK-GI-NEXT:    mov v0.s[1], v16.s[0]
+; CHECK-GI-NEXT:    scvtf s5, x11
+; CHECK-GI-NEXT:    mov v1.s[1], v17.s[0]
+; CHECK-GI-NEXT:    scvtf s19, x13
+; CHECK-GI-NEXT:    fmov x13, d7
+; CHECK-GI-NEXT:    mov v2.s[1], v4.s[0]
+; CHECK-GI-NEXT:    mov v3.s[1], v6.s[0]
+; CHECK-GI-NEXT:    scvtf s4, x8
+; CHECK-GI-NEXT:    scvtf s6, x10
+; CHECK-GI-NEXT:    scvtf s16, x9
+; CHECK-GI-NEXT:    scvtf s17, x12
+; CHECK-GI-NEXT:    scvtf s7, x13
+; CHECK-GI-NEXT:    mov v0.s[2], v18.s[0]
+; CHECK-GI-NEXT:    mov v1.s[2], v19.s[0]
+; CHECK-GI-NEXT:    mov v2.s[2], v5.s[0]
+; CHECK-GI-NEXT:    mov v3.s[2], v7.s[0]
+; CHECK-GI-NEXT:    mov v0.s[3], v4.s[0]
+; CHECK-GI-NEXT:    mov v1.s[3], v6.s[0]
+; CHECK-GI-NEXT:    mov v2.s[3], v16.s[0]
+; CHECK-GI-NEXT:    mov v3.s[3], v17.s[0]
+; CHECK-GI-NEXT:    ret
 entry:
   %c = sitofp <16 x i64> %a to <16 x float>
   ret <16 x float> %c
 }
 
 define <16 x float> @utofp_v16i64_v16f32(<16 x i64> %a) {
-; CHECK-LABEL: utofp_v16i64_v16f32:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ucvtf v0.2d, v0.2d
-; CHECK-NEXT:    ucvtf v2.2d, v2.2d
-; CHECK-NEXT:    ucvtf v4.2d, v4.2d
-; CHECK-NEXT:    ucvtf v6.2d, v6.2d
-; CHECK-NEXT:    ucvtf v16.2d, v1.2d
-; CHECK-NEXT:    ucvtf v17.2d, v3.2d
-; CHECK-NEXT:    ucvtf v5.2d, v5.2d
-; CHECK-NEXT:    ucvtf v7.2d, v7.2d
-; CHECK-NEXT:    fcvtn v0.2s, v0.2d
-; CHECK-NEXT:    fcvtn v1.2s, v2.2d
-; CHECK-NEXT:    fcvtn v2.2s, v4.2d
-; CHECK-NEXT:    fcvtn v3.2s, v6.2d
-; CHECK-NEXT:    fcvtn2 v0.4s, v16.2d
-; CHECK-NEXT:    fcvtn2 v1.4s, v17.2d
-; CHECK-NEXT:    fcvtn2 v2.4s, v5.2d
-; CHECK-NEXT:    fcvtn2 v3.4s, v7.2d
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: utofp_v16i64_v16f32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ucvtf v0.2d, v0.2d
+; CHECK-SD-NEXT:    ucvtf v2.2d, v2.2d
+; CHECK-SD-NEXT:    ucvtf v4.2d, v4.2d
+; CHECK-SD-NEXT:    ucvtf v6.2d, v6.2d
+; CHECK-SD-NEXT:    ucvtf v16.2d, v1.2d
+; CHECK-SD-NEXT:    ucvtf v17.2d, v3.2d
+; CHECK-SD-NEXT:    ucvtf v5.2d, v5.2d
+; CHECK-SD-NEXT:    ucvtf v7.2d, v7.2d
+; CHECK-SD-NEXT:    fcvtn v0.2s, v0.2d
+; CHECK-SD-NEXT:    fcvtn v1.2s, v2.2d
+; CHECK-SD-NEXT:    fcvtn v2.2s, v4.2d
+; CHECK-SD-NEXT:    fcvtn v3.2s, v6.2d
+; CHECK-SD-NEXT:    fcvtn2 v0.4s, v16.2d
+; CHECK-SD-NEXT:    fcvtn2 v1.4s, v17.2d
+; CHECK-SD-NEXT:    fcvtn2 v2.4s, v5.2d
+; CHECK-SD-NEXT:    fcvtn2 v3.4s, v7.2d
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: utofp_v16i64_v16f32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov x9, v0.d[1]
+; CHECK-GI-NEXT:    fmov x13, d2
+; CHECK-GI-NEXT:    fmov x11, d0
+; CHECK-GI-NEXT:    mov x12, v4.d[1]
+; CHECK-GI-NEXT:    mov x8, v1.d[1]
+; CHECK-GI-NEXT:    mov x10, v2.d[1]
+; CHECK-GI-NEXT:    ucvtf s0, x11
+; CHECK-GI-NEXT:    mov x11, v6.d[1]
+; CHECK-GI-NEXT:    ucvtf s16, x9
+; CHECK-GI-NEXT:    fmov x9, d1
+; CHECK-GI-NEXT:    ucvtf s1, x13
+; CHECK-GI-NEXT:    fmov x13, d4
+; CHECK-GI-NEXT:    ucvtf s4, x12
+; CHECK-GI-NEXT:    fmov x12, d6
+; CHECK-GI-NEXT:    ucvtf s17, x10
+; CHECK-GI-NEXT:    mov x10, v3.d[1]
+; CHECK-GI-NEXT:    ucvtf s6, x11
+; CHECK-GI-NEXT:    fmov x11, d5
+; CHECK-GI-NEXT:    ucvtf s18, x9
+; CHECK-GI-NEXT:    ucvtf s2, x13
+; CHECK-GI-NEXT:    fmov x13, d3
+; CHECK-GI-NEXT:    ucvtf s3, x12
+; CHECK-GI-NEXT:    mov x9, v5.d[1]
+; CHECK-GI-NEXT:    mov x12, v7.d[1]
+; CHECK-GI-NEXT:    mov v0.s[1], v16.s[0]
+; CHECK-GI-NEXT:    ucvtf s5, x11
+; CHECK-GI-NEXT:    mov v1.s[1], v17.s[0]
+; CHECK-GI-NEXT:    ucvtf s19, x13
+; CHECK-GI-NEXT:    fmov x13, d7
+; CHECK-GI-NEXT:    mov v2.s[1], v4.s[0]
+; CHECK-GI-NEXT:    mov v3.s[1], v6.s[0]
+; CHECK-GI-NEXT:    ucvtf s4, x8
+; CHECK-GI-NEXT:    ucvtf s6, x10
+; CHECK-GI-NEXT:    ucvtf s16, x9
+; CHECK-GI-NEXT:    ucvtf s17, x12
+; CHECK-GI-NEXT:    ucvtf s7, x13
+; CHECK-GI-NEXT:    mov v0.s[2], v18.s[0]
+; CHECK-GI-NEXT:    mov v1.s[2], v19.s[0]
+; CHECK-GI-NEXT:    mov v2.s[2], v5.s[0]
+; CHECK-GI-NEXT:    mov v3.s[2], v7.s[0]
+; CHECK-GI-NEXT:    mov v0.s[3], v4.s[0]
+; CHECK-GI-NEXT:    mov v1.s[3], v6.s[0]
+; CHECK-GI-NEXT:    mov v2.s[3], v16.s[0]
+; CHECK-GI-NEXT:    mov v3.s[3], v17.s[0]
+; CHECK-GI-NEXT:    ret
 entry:
   %c = uitofp <16 x i64> %a to <16 x float>
   ret <16 x float> %c
@@ -4683,42 +4853,110 @@ define <32 x float> @stofp_v32i64_v32f32(<32 x i64> %a) {
 ;
 ; CHECK-GI-LABEL: stofp_v32i64_v32f32:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    ldp q16, q17, [sp]
-; CHECK-GI-NEXT:    scvtf v0.2d, v0.2d
-; CHECK-GI-NEXT:    ldp q18, q19, [sp, #32]
-; CHECK-GI-NEXT:    scvtf v24.2d, v1.2d
-; CHECK-GI-NEXT:    ldp q20, q21, [sp, #64]
-; CHECK-GI-NEXT:    scvtf v1.2d, v2.2d
-; CHECK-GI-NEXT:    ldp q22, q23, [sp, #96]
-; CHECK-GI-NEXT:    scvtf v25.2d, v3.2d
-; CHECK-GI-NEXT:    scvtf v2.2d, v4.2d
-; CHECK-GI-NEXT:    scvtf v26.2d, v5.2d
-; CHECK-GI-NEXT:    scvtf v3.2d, v6.2d
-; CHECK-GI-NEXT:    scvtf v27.2d, v7.2d
-; CHECK-GI-NEXT:    scvtf v4.2d, v16.2d
-; CHECK-GI-NEXT:    scvtf v5.2d, v18.2d
-; CHECK-GI-NEXT:    scvtf v6.2d, v20.2d
-; CHECK-GI-NEXT:    scvtf v7.2d, v22.2d
-; CHECK-GI-NEXT:    scvtf v16.2d, v17.2d
-; CHECK-GI-NEXT:    scvtf v17.2d, v19.2d
-; CHECK-GI-NEXT:    scvtf v18.2d, v21.2d
-; CHECK-GI-NEXT:    scvtf v19.2d, v23.2d
-; CHECK-GI-NEXT:    fcvtn v0.2s, v0.2d
-; CHECK-GI-NEXT:    fcvtn v1.2s, v1.2d
-; CHECK-GI-NEXT:    fcvtn v2.2s, v2.2d
-; CHECK-GI-NEXT:    fcvtn v3.2s, v3.2d
-; CHECK-GI-NEXT:    fcvtn v4.2s, v4.2d
-; CHECK-GI-NEXT:    fcvtn v5.2s, v5.2d
-; CHECK-GI-NEXT:    fcvtn v6.2s, v6.2d
-; CHECK-GI-NEXT:    fcvtn v7.2s, v7.2d
-; CHECK-GI-NEXT:    fcvtn2 v0.4s, v24.2d
-; CHECK-GI-NEXT:    fcvtn2 v1.4s, v25.2d
-; CHECK-GI-NEXT:    fcvtn2 v2.4s, v26.2d
-; CHECK-GI-NEXT:    fcvtn2 v3.4s, v27.2d
-; CHECK-GI-NEXT:    fcvtn2 v4.4s, v16.2d
-; CHECK-GI-NEXT:    fcvtn2 v5.4s, v17.2d
-; CHECK-GI-NEXT:    fcvtn2 v6.4s, v18.2d
-; CHECK-GI-NEXT:    fcvtn2 v7.4s, v19.2d
+; CHECK-GI-NEXT:    str d12, [sp, #-48]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    stp d11, d10, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp d9, d8, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-GI-NEXT:    .cfi_offset b8, -8
+; CHECK-GI-NEXT:    .cfi_offset b9, -16
+; CHECK-GI-NEXT:    .cfi_offset b10, -24
+; CHECK-GI-NEXT:    .cfi_offset b11, -32
+; CHECK-GI-NEXT:    .cfi_offset b12, -48
+; CHECK-GI-NEXT:    mov x8, v0.d[1]
+; CHECK-GI-NEXT:    mov x10, v1.d[1]
+; CHECK-GI-NEXT:    mov x12, v4.d[1]
+; CHECK-GI-NEXT:    mov x11, v3.d[1]
+; CHECK-GI-NEXT:    ldp q30, q25, [sp, #112]
+; CHECK-GI-NEXT:    fmov x9, d0
+; CHECK-GI-NEXT:    ldp q21, q24, [sp, #48]
+; CHECK-GI-NEXT:    ldp q11, q27, [sp, #80]
+; CHECK-GI-NEXT:    scvtf s28, x8
+; CHECK-GI-NEXT:    fmov x8, d1
+; CHECK-GI-NEXT:    scvtf s16, x10
+; CHECK-GI-NEXT:    mov x10, v6.d[1]
+; CHECK-GI-NEXT:    scvtf s9, x12
+; CHECK-GI-NEXT:    fmov x12, d6
+; CHECK-GI-NEXT:    scvtf s0, x9
+; CHECK-GI-NEXT:    mov x9, v2.d[1]
+; CHECK-GI-NEXT:    scvtf s18, x11
+; CHECK-GI-NEXT:    scvtf s17, x8
+; CHECK-GI-NEXT:    fmov x8, d2
+; CHECK-GI-NEXT:    fmov x11, d5
+; CHECK-GI-NEXT:    ldp q31, q26, [sp, #144]
+; CHECK-GI-NEXT:    mov x13, v21.d[1]
+; CHECK-GI-NEXT:    scvtf s10, x10
+; CHECK-GI-NEXT:    fmov x10, d21
+; CHECK-GI-NEXT:    fmov x14, d30
+; CHECK-GI-NEXT:    scvtf s1, x8
+; CHECK-GI-NEXT:    fmov x8, d3
+; CHECK-GI-NEXT:    scvtf s3, x12
+; CHECK-GI-NEXT:    mov x12, v30.d[1]
+; CHECK-GI-NEXT:    scvtf s22, x11
+; CHECK-GI-NEXT:    fmov x11, d11
+; CHECK-GI-NEXT:    scvtf s29, x9
+; CHECK-GI-NEXT:    fmov x9, d4
+; CHECK-GI-NEXT:    scvtf s4, x10
+; CHECK-GI-NEXT:    scvtf s19, x8
+; CHECK-GI-NEXT:    mov x8, v5.d[1]
+; CHECK-GI-NEXT:    mov x10, v11.d[1]
+; CHECK-GI-NEXT:    scvtf s5, x11
+; CHECK-GI-NEXT:    mov x11, v31.d[1]
+; CHECK-GI-NEXT:    scvtf s8, x13
+; CHECK-GI-NEXT:    scvtf s11, x12
+; CHECK-GI-NEXT:    fmov x12, d31
+; CHECK-GI-NEXT:    scvtf s2, x9
+; CHECK-GI-NEXT:    mov x9, v7.d[1]
+; CHECK-GI-NEXT:    scvtf s6, x14
+; CHECK-GI-NEXT:    fmov x13, d27
+; CHECK-GI-NEXT:    scvtf s20, x8
+; CHECK-GI-NEXT:    fmov x8, d7
+; CHECK-GI-NEXT:    scvtf s12, x10
+; CHECK-GI-NEXT:    scvtf s7, x12
+; CHECK-GI-NEXT:    fmov x12, d25
+; CHECK-GI-NEXT:    scvtf s30, x11
+; CHECK-GI-NEXT:    mov x10, v25.d[1]
+; CHECK-GI-NEXT:    mov x11, v26.d[1]
+; CHECK-GI-NEXT:    mov v0.s[1], v28.s[0]
+; CHECK-GI-NEXT:    scvtf s21, x9
+; CHECK-GI-NEXT:    fmov x9, d24
+; CHECK-GI-NEXT:    scvtf s23, x8
+; CHECK-GI-NEXT:    scvtf s25, x12
+; CHECK-GI-NEXT:    fmov x12, d26
+; CHECK-GI-NEXT:    mov x8, v24.d[1]
+; CHECK-GI-NEXT:    mov v1.s[1], v29.s[0]
+; CHECK-GI-NEXT:    mov v2.s[1], v9.s[0]
+; CHECK-GI-NEXT:    mov v3.s[1], v10.s[0]
+; CHECK-GI-NEXT:    scvtf s24, x9
+; CHECK-GI-NEXT:    mov x9, v27.d[1]
+; CHECK-GI-NEXT:    scvtf s27, x13
+; CHECK-GI-NEXT:    scvtf s26, x12
+; CHECK-GI-NEXT:    mov v4.s[1], v8.s[0]
+; CHECK-GI-NEXT:    mov v5.s[1], v12.s[0]
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v6.s[1], v11.s[0]
+; CHECK-GI-NEXT:    ldp d11, d10, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v7.s[1], v30.s[0]
+; CHECK-GI-NEXT:    scvtf s28, x8
+; CHECK-GI-NEXT:    scvtf s29, x9
+; CHECK-GI-NEXT:    scvtf s30, x10
+; CHECK-GI-NEXT:    scvtf s31, x11
+; CHECK-GI-NEXT:    mov v0.s[2], v17.s[0]
+; CHECK-GI-NEXT:    mov v1.s[2], v19.s[0]
+; CHECK-GI-NEXT:    mov v2.s[2], v22.s[0]
+; CHECK-GI-NEXT:    mov v3.s[2], v23.s[0]
+; CHECK-GI-NEXT:    mov v4.s[2], v24.s[0]
+; CHECK-GI-NEXT:    mov v5.s[2], v27.s[0]
+; CHECK-GI-NEXT:    mov v6.s[2], v25.s[0]
+; CHECK-GI-NEXT:    mov v7.s[2], v26.s[0]
+; CHECK-GI-NEXT:    mov v0.s[3], v16.s[0]
+; CHECK-GI-NEXT:    mov v1.s[3], v18.s[0]
+; CHECK-GI-NEXT:    mov v2.s[3], v20.s[0]
+; CHECK-GI-NEXT:    mov v3.s[3], v21.s[0]
+; CHECK-GI-NEXT:    mov v4.s[3], v28.s[0]
+; CHECK-GI-NEXT:    mov v5.s[3], v29.s[0]
+; CHECK-GI-NEXT:    mov v6.s[3], v30.s[0]
+; CHECK-GI-NEXT:    mov v7.s[3], v31.s[0]
+; CHECK-GI-NEXT:    ldr d12, [sp], #48 // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = sitofp <32 x i64> %a to <32 x float>
@@ -4768,42 +5006,110 @@ define <32 x float> @utofp_v32i64_v32f32(<32 x i64> %a) {
 ;
 ; CHECK-GI-LABEL: utofp_v32i64_v32f32:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    ldp q16, q17, [sp]
-; CHECK-GI-NEXT:    ucvtf v0.2d, v0.2d
-; CHECK-GI-NEXT:    ldp q18, q19, [sp, #32]
-; CHECK-GI-NEXT:    ucvtf v24.2d, v1.2d
-; CHECK-GI-NEXT:    ldp q20, q21, [sp, #64]
-; CHECK-GI-NEXT:    ucvtf v1.2d, v2.2d
-; CHECK-GI-NEXT:    ldp q22, q23, [sp, #96]
-; CHECK-GI-NEXT:    ucvtf v25.2d, v3.2d
-; CHECK-GI-NEXT:    ucvtf v2.2d, v4.2d
-; CHECK-GI-NEXT:    ucvtf v26.2d, v5.2d
-; CHECK-GI-NEXT:    ucvtf v3.2d, v6.2d
-; CHECK-GI-NEXT:    ucvtf v27.2d, v7.2d
-; CHECK-GI-NEXT:    ucvtf v4.2d, v16.2d
-; CHECK-GI-NEXT:    ucvtf v5.2d, v18.2d
-; CHECK-GI-NEXT:    ucvtf v6.2d, v20.2d
-; CHECK-GI-NEXT:    ucvtf v7.2d, v22.2d
-; CHECK-GI-NEXT:    ucvtf v16.2d, v17.2d
-; CHECK-GI-NEXT:    ucvtf v17.2d, v19.2d
-; CHECK-GI-NEXT:    ucvtf v18.2d, v21.2d
-; CHECK-GI-NEXT:    ucvtf v19.2d, v23.2d
-; CHECK-GI-NEXT:    fcvtn v0.2s, v0.2d
-; CHECK-GI-NEXT:    fcvtn v1.2s, v1.2d
-; CHECK-GI-NEXT:    fcvtn v2.2s, v2.2d
-; CHECK-GI-NEXT:    fcvtn v3.2s, v3.2d
-; CHECK-GI-NEXT:    fcvtn v4.2s, v4.2d
-; CHECK-GI-NEXT:    fcvtn v5.2s, v5.2d
-; CHECK-GI-NEXT:    fcvtn v6.2s, v6.2d
-; CHECK-GI-NEXT:    fcvtn v7.2s, v7.2d
-; CHECK-GI-NEXT:    fcvtn2 v0.4s, v24.2d
-; CHECK-GI-NEXT:    fcvtn2 v1.4s, v25.2d
-; CHECK-GI-NEXT:    fcvtn2 v2.4s, v26.2d
-; CHECK-GI-NEXT:    fcvtn2 v3.4s, v27.2d
-; CHECK-GI-NEXT:    fcvtn2 v4.4s, v16.2d
-; CHECK-GI-NEXT:    fcvtn2 v5.4s, v17.2d
-; CHECK-GI-NEXT:    fcvtn2 v6.4s, v18.2d
-; CHECK-GI-NEXT:    fcvtn2 v7.4s, v19.2d
+; CHECK-GI-NEXT:    str d12, [sp, #-48]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    stp d11, d10, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp d9, d8, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-GI-NEXT:    .cfi_offset b8, -8
+; CHECK-GI-NEXT:    .cfi_offset b9, -16
+; CHECK-GI-NEXT:    .cfi_offset b10, -24
+; CHECK-GI-NEXT:    .cfi_offset b11, -32
+; CHECK-GI-NEXT:    .cfi_offset b12, -48
+; CHECK-GI-NEXT:    mov x8, v0.d[1]
+; CHECK-GI-NEXT:    mov x10, v1.d[1]
+; CHECK-GI-NEXT:    mov x12, v4.d[1]
+; CHECK-GI-NEXT:    mov x11, v3.d[1]
+; CHECK-GI-NEXT:    ldp q30, q25, [sp, #112]
+; CHECK-GI-NEXT:    fmov x9, d0
+; CHECK-GI-NEXT:    ldp q21, q24, [sp, #48]
+; CHECK-GI-NEXT:    ldp q11, q27, [sp, #80]
+; CHECK-GI-NEXT:    ucvtf s28, x8
+; CHECK-GI-NEXT:    fmov x8, d1
+; CHECK-GI-NEXT:    ucvtf s16, x10
+; CHECK-GI-NEXT:    mov x10, v6.d[1]
+; CHECK-GI-NEXT:    ucvtf s9, x12
+; CHECK-GI-NEXT:    fmov x12, d6
+; CHECK-GI-NEXT:    ucvtf s0, x9
+; CHECK-GI-NEXT:    mov x9, v2.d[1]
+; CHECK-GI-NEXT:    ucvtf s18, x11
+; CHECK-GI-NEXT:    ucvtf s17, x8
+; CHECK-GI-NEXT:    fmov x8, d2
+; CHECK-GI-NEXT:    fmov x11, d5
+; CHECK-GI-NEXT:    ldp q31, q26, [sp, #144]
+; CHECK-GI-NEXT:    mov x13, v21.d[1]
+; CHECK-GI-NEXT:    ucvtf s10, x10
+; CHECK-GI-NEXT:    fmov x10, d21
+; CHECK-GI-NEXT:    fmov x14, d30
+; CHECK-GI-NEXT:    ucvtf s1, x8
+; CHECK-GI-NEXT:    fmov x8, d3
+; CHECK-GI-NEXT:    ucvtf s3, x12
+; CHECK-GI-NEXT:    mov x12, v30.d[1]
+; CHECK-GI-NEXT:    ucvtf s22, x11
+; CHECK-GI-NEXT:    fmov x11, d11
+; CHECK-GI-NEXT:    ucvtf s29, x9
+; CHECK-GI-NEXT:    fmov x9, d4
+; CHECK-GI-NEXT:    ucvtf s4, x10
+; CHECK-GI-NEXT:    ucvtf s19, x8
+; CHECK-GI-NEXT:    mov x8, v5.d[1]
+; CHECK-GI-NEXT:    mov x10, v11.d[1]
+; CHECK-GI-NEXT:    ucvtf s5, x11
+; CHECK-GI-NEXT:    mov x11, v31.d[1]
+; CHECK-GI-NEXT:    ucvtf s8, x13
+; CHECK-GI-NEXT:    ucvtf s11, x12
+; CHECK-GI-NEXT:    fmov x12, d31
+; CHECK-GI-NEXT:    ucvtf s2, x9
+; CHECK-GI-NEXT:    mov x9, v7.d[1]
+; CHECK-GI-NEXT:    ucvtf s6, x14
+; CHECK-GI-NEXT:    fmov x13, d27
+; CHECK-GI-NEXT:    ucvtf s20, x8
+; CHECK-GI-NEXT:    fmov x8, d7
+; CHECK-GI-NEXT:    ucvtf s12, x10
+; CHECK-GI-NEXT:    ucvtf s7, x12
+; CHECK-GI-NEXT:    fmov x12, d25
+; CHECK-GI-NEXT:    ucvtf s30, x11
+; CHECK-GI-NEXT:    mov x10, v25.d[1]
+; CHECK-GI-NEXT:    mov x11, v26.d[1]
+; CHECK-GI-NEXT:    mov v0.s[1], v28.s[0]
+; CHECK-GI-NEXT:    ucvtf s21, x9
+; CHECK-GI-NEXT:    fmov x9, d24
+; CHECK-GI-NEXT:    ucvtf s23, x8
+; CHECK-GI-NEXT:    ucvtf s25, x12
+; CHECK-GI-NEXT:    fmov x12, d26
+; CHECK-GI-NEXT:    mov x8, v24.d[1]
+; CHECK-GI-NEXT:    mov v1.s[1], v29.s[0]
+; CHECK-GI-NEXT:    mov v2.s[1], v9.s[0]
+; CHECK-GI-NEXT:    mov v3.s[1], v10.s[0]
+; CHECK-GI-NEXT:    ucvtf s24, x9
+; CHECK-GI-NEXT:    mov x9, v27.d[1]
+; CHECK-GI-NEXT:    ucvtf s27, x13
+; CHECK-GI-NEXT:    ucvtf s26, x12
+; CHECK-GI-NEXT:    mov v4.s[1], v8.s[0]
+; CHECK-GI-NEXT:    mov v5.s[1], v12.s[0]
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v6.s[1], v11.s[0]
+; CHECK-GI-NEXT:    ldp d11, d10, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v7.s[1], v30.s[0]
+; CHECK-GI-NEXT:    ucvtf s28, x8
+; CHECK-GI-NEXT:    ucvtf s29, x9
+; CHECK-GI-NEXT:    ucvtf s30, x10
+; CHECK-GI-NEXT:    ucvtf s31, x11
+; CHECK-GI-NEXT:    mov v0.s[2], v17.s[0]
+; CHECK-GI-NEXT:    mov v1.s[2], v19.s[0]
+; CHECK-GI-NEXT:    mov v2.s[2], v22.s[0]
+; CHECK-GI-NEXT:    mov v3.s[2], v23.s[0]
+; CHECK-GI-NEXT:    mov v4.s[2], v24.s[0]
+; CHECK-GI-NEXT:    mov v5.s[2], v27.s[0]
+; CHECK-GI-NEXT:    mov v6.s[2], v25.s[0]
+; CHECK-GI-NEXT:    mov v7.s[2], v26.s[0]
+; CHECK-GI-NEXT:    mov v0.s[3], v16.s[0]
+; CHECK-GI-NEXT:    mov v1.s[3], v18.s[0]
+; CHECK-GI-NEXT:    mov v2.s[3], v20.s[0]
+; CHECK-GI-NEXT:    mov v3.s[3], v21.s[0]
+; CHECK-GI-NEXT:    mov v4.s[3], v28.s[0]
+; CHECK-GI-NEXT:    mov v5.s[3], v29.s[0]
+; CHECK-GI-NEXT:    mov v6.s[3], v30.s[0]
+; CHECK-GI-NEXT:    mov v7.s[3], v31.s[0]
+; CHECK-GI-NEXT:    ldr d12, [sp], #48 // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = uitofp <32 x i64> %a to <32 x float>
@@ -6195,24 +6501,14 @@ define <2 x half> @stofp_v2i64_v2f16(<2 x i64> %a) {
 ; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-FP16-NEXT:    ret
 ;
-; CHECK-GI-NOFP16-LABEL: stofp_v2i64_v2f16:
-; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    scvtf v0.2d, v0.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn v0.2s, v0.2d
-; CHECK-GI-NOFP16-NEXT:    mov v1.s[0], v0.s[0]
-; CHECK-GI-NOFP16-NEXT:    mov v1.s[1], v0.s[1]
-; CHECK-GI-NOFP16-NEXT:    fcvtn v0.4h, v1.4s
-; CHECK-GI-NOFP16-NEXT:    ret
-;
-; CHECK-GI-FP16-LABEL: stofp_v2i64_v2f16:
-; CHECK-GI-FP16:       // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT:    scvtf v0.2d, v0.2d
-; CHECK-GI-FP16-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-FP16-NEXT:    fcvt h0, d0
-; CHECK-GI-FP16-NEXT:    fcvt h1, d1
-; CHECK-GI-FP16-NEXT:    mov v0.h[1], v1.h[0]
-; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-GI-FP16-NEXT:    ret
+; CHECK-GI-LABEL: stofp_v2i64_v2f16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    scvtf v0.2d, v0.2d
+; CHECK-GI-NEXT:    fcvtn v0.2s, v0.2d
+; CHECK-GI-NEXT:    mov v1.s[0], v0.s[0]
+; CHECK-GI-NEXT:    mov v1.s[1], v0.s[1]
+; CHECK-GI-NEXT:    fcvtn v0.4h, v1.4s
+; CHECK-GI-NEXT:    ret
 entry:
   %c = sitofp <2 x i64> %a to <2 x half>
   ret <2 x half> %c
@@ -6241,198 +6537,78 @@ define <2 x half> @utofp_v2i64_v2f16(<2 x i64> %a) {
 ; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-FP16-NEXT:    ret
 ;
-; CHECK-GI-NOFP16-LABEL: utofp_v2i64_v2f16:
-; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    ucvtf v0.2d, v0.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn v0.2s, v0.2d
-; CHECK-GI-NOFP16-NEXT:    mov v1.s[0], v0.s[0]
-; CHECK-GI-NOFP16-NEXT:    mov v1.s[1], v0.s[1]
-; CHECK-GI-NOFP16-NEXT:    fcvtn v0.4h, v1.4s
-; CHECK-GI-NOFP16-NEXT:    ret
-;
-; CHECK-GI-FP16-LABEL: utofp_v2i64_v2f16:
-; CHECK-GI-FP16:       // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT:    ucvtf v0.2d, v0.2d
-; CHECK-GI-FP16-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-FP16-NEXT:    fcvt h0, d0
-; CHECK-GI-FP16-NEXT:    fcvt h1, d1
-; CHECK-GI-FP16-NEXT:    mov v0.h[1], v1.h[0]
-; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-GI-FP16-NEXT:    ret
+; CHECK-GI-LABEL: utofp_v2i64_v2f16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ucvtf v0.2d, v0.2d
+; CHECK-GI-NEXT:    fcvtn v0.2s, v0.2d
+; CHECK-GI-NEXT:    mov v1.s[0], v0.s[0]
+; CHECK-GI-NEXT:    mov v1.s[1], v0.s[1]
+; CHECK-GI-NEXT:    fcvtn v0.4h, v1.4s
+; CHECK-GI-NEXT:    ret
 entry:
   %c = uitofp <2 x i64> %a to <2 x half>
   ret <2 x half> %c
 }
 
 define <3 x half> @stofp_v3i64_v3f16(<3 x i64> %a) {
-; CHECK-SD-LABEL: stofp_v3i64_v3f16:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
-; CHECK-SD-NEXT:    scvtf v1.2d, v2.2d
-; CHECK-SD-NEXT:    scvtf v0.2d, v0.2d
-; CHECK-SD-NEXT:    fcvtn v0.2s, v0.2d
-; CHECK-SD-NEXT:    fcvtn2 v0.4s, v1.2d
-; CHECK-SD-NEXT:    fcvtn v0.4h, v0.4s
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-NOFP16-LABEL: stofp_v3i64_v3f16:
-; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NOFP16-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NOFP16-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-GI-NOFP16-NEXT:    mov v0.d[1], v1.d[0]
-; CHECK-GI-NOFP16-NEXT:    scvtf v1.2d, v2.2d
-; CHECK-GI-NOFP16-NEXT:    scvtf v0.2d, v0.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn v0.2s, v0.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v0.4s, v1.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn v0.4h, v0.4s
-; CHECK-GI-NOFP16-NEXT:    ret
-;
-; CHECK-GI-FP16-LABEL: stofp_v3i64_v3f16:
-; CHECK-GI-FP16:       // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-FP16-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-FP16-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-GI-FP16-NEXT:    mov v0.d[1], v1.d[0]
-; CHECK-GI-FP16-NEXT:    scvtf v2.2d, v2.2d
-; CHECK-GI-FP16-NEXT:    scvtf v0.2d, v0.2d
-; CHECK-GI-FP16-NEXT:    fcvt h2, d2
-; CHECK-GI-FP16-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-FP16-NEXT:    fcvt h0, d0
-; CHECK-GI-FP16-NEXT:    fcvt h1, d1
-; CHECK-GI-FP16-NEXT:    mov v0.h[1], v1.h[0]
-; CHECK-GI-FP16-NEXT:    mov v0.h[2], v2.h[0]
-; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-GI-FP16-NEXT:    ret
+; CHECK-LABEL: stofp_v3i64_v3f16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    scvtf v1.2d, v2.2d
+; CHECK-NEXT:    scvtf v0.2d, v0.2d
+; CHECK-NEXT:    fcvtn v0.2s, v0.2d
+; CHECK-NEXT:    fcvtn2 v0.4s, v1.2d
+; CHECK-NEXT:    fcvtn v0.4h, v0.4s
+; CHECK-NEXT:    ret
 entry:
   %c = sitofp <3 x i64> %a to <3 x half>
   ret <3 x half> %c
 }
 
 define <3 x half> @utofp_v3i64_v3f16(<3 x i64> %a) {
-; CHECK-SD-LABEL: utofp_v3i64_v3f16:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
-; CHECK-SD-NEXT:    ucvtf v1.2d, v2.2d
-; CHECK-SD-NEXT:    ucvtf v0.2d, v0.2d
-; CHECK-SD-NEXT:    fcvtn v0.2s, v0.2d
-; CHECK-SD-NEXT:    fcvtn2 v0.4s, v1.2d
-; CHECK-SD-NEXT:    fcvtn v0.4h, v0.4s
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-NOFP16-LABEL: utofp_v3i64_v3f16:
-; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NOFP16-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NOFP16-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-GI-NOFP16-NEXT:    mov v0.d[1], v1.d[0]
-; CHECK-GI-NOFP16-NEXT:    ucvtf v1.2d, v2.2d
-; CHECK-GI-NOFP16-NEXT:    ucvtf v0.2d, v0.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn v0.2s, v0.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v0.4s, v1.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn v0.4h, v0.4s
-; CHECK-GI-NOFP16-NEXT:    ret
-;
-; CHECK-GI-FP16-LABEL: utofp_v3i64_v3f16:
-; CHECK-GI-FP16:       // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-FP16-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-FP16-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-GI-FP16-NEXT:    mov v0.d[1], v1.d[0]
-; CHECK-GI-FP16-NEXT:    ucvtf v2.2d, v2.2d
-; CHECK-GI-FP16-NEXT:    ucvtf v0.2d, v0.2d
-; CHECK-GI-FP16-NEXT:    fcvt h2, d2
-; CHECK-GI-FP16-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-FP16-NEXT:    fcvt h0, d0
-; CHECK-GI-FP16-NEXT:    fcvt h1, d1
-; CHECK-GI-FP16-NEXT:    mov v0.h[1], v1.h[0]
-; CHECK-GI-FP16-NEXT:    mov v0.h[2], v2.h[0]
-; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-GI-FP16-NEXT:    ret
+; CHECK-LABEL: utofp_v3i64_v3f16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    ucvtf v1.2d, v2.2d
+; CHECK-NEXT:    ucvtf v0.2d, v0.2d
+; CHECK-NEXT:    fcvtn v0.2s, v0.2d
+; CHECK-NEXT:    fcvtn2 v0.4s, v1.2d
+; CHECK-NEXT:    fcvtn v0.4h, v0.4s
+; CHECK-NEXT:    ret
 entry:
   %c = uitofp <3 x i64> %a to <3 x half>
   ret <3 x half> %c
 }
 
 define <4 x half> @stofp_v4i64_v4f16(<4 x i64> %a) {
-; CHECK-SD-LABEL: stofp_v4i64_v4f16:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    scvtf v0.2d, v0.2d
-; CHECK-SD-NEXT:    scvtf v1.2d, v1.2d
-; CHECK-SD-NEXT:    fcvtn v0.2s, v0.2d
-; CHECK-SD-NEXT:    fcvtn2 v0.4s, v1.2d
-; CHECK-SD-NEXT:    fcvtn v0.4h, v0.4s
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-NOFP16-LABEL: stofp_v4i64_v4f16:
-; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    scvtf v0.2d, v0.2d
-; CHECK-GI-NOFP16-NEXT:    scvtf v1.2d, v1.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn v0.2s, v0.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v0.4s, v1.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn v0.4h, v0.4s
-; CHECK-GI-NOFP16-NEXT:    ret
-;
-; CHECK-GI-FP16-LABEL: stofp_v4i64_v4f16:
-; CHECK-GI-FP16:       // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT:    scvtf v0.2d, v0.2d
-; CHECK-GI-FP16-NEXT:    scvtf v1.2d, v1.2d
-; CHECK-GI-FP16-NEXT:    mov d2, v0.d[1]
-; CHECK-GI-FP16-NEXT:    fcvt h0, d0
-; CHECK-GI-FP16-NEXT:    mov d3, v1.d[1]
-; CHECK-GI-FP16-NEXT:    fcvt h1, d1
-; CHECK-GI-FP16-NEXT:    fcvt h2, d2
-; CHECK-GI-FP16-NEXT:    mov v0.h[1], v2.h[0]
-; CHECK-GI-FP16-NEXT:    fcvt h2, d3
-; CHECK-GI-FP16-NEXT:    mov v0.h[2], v1.h[0]
-; CHECK-GI-FP16-NEXT:    mov v0.h[3], v2.h[0]
-; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-GI-FP16-NEXT:    ret
+; CHECK-LABEL: stofp_v4i64_v4f16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    scvtf v0.2d, v0.2d
+; CHECK-NEXT:    scvtf v1.2d, v1.2d
+; CHECK-NEXT:    fcvtn v0.2s, v0.2d
+; CHECK-NEXT:    fcvtn2 v0.4s, v1.2d
+; CHECK-NEXT:    fcvtn v0.4h, v0.4s
+; CHECK-NEXT:    ret
 entry:
   %c = sitofp <4 x i64> %a to <4 x half>
   ret <4 x half> %c
 }
 
 define <4 x half> @utofp_v4i64_v4f16(<4 x i64> %a) {
-; CHECK-SD-LABEL: utofp_v4i64_v4f16:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    ucvtf v0.2d, v0.2d
-; CHECK-SD-NEXT:    ucvtf v1.2d, v1.2d
-; CHECK-SD-NEXT:    fcvtn v0.2s, v0.2d
-; CHECK-SD-NEXT:    fcvtn2 v0.4s, v1.2d
-; CHECK-SD-NEXT:    fcvtn v0.4h, v0.4s
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-NOFP16-LABEL: utofp_v4i64_v4f16:
-; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    ucvtf v0.2d, v0.2d
-; CHECK-GI-NOFP16-NEXT:    ucvtf v1.2d, v1.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn v0.2s, v0.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v0.4s, v1.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn v0.4h, v0.4s
-; CHECK-GI-NOFP16-NEXT:    ret
-;
-; CHECK-GI-FP16-LABEL: utofp_v4i64_v4f16:
-; CHECK-GI-FP16:       // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT:    ucvtf v0.2d, v0.2d
-; CHECK-GI-FP16-NEXT:    ucvtf v1.2d, v1.2d
-; CHECK-GI-FP16-NEXT:    mov d2, v0.d[1]
-; CHECK-GI-FP16-NEXT:    fcvt h0, d0
-; CHECK-GI-FP16-NEXT:    mov d3, v1.d[1]
-; CHECK-GI-FP16-NEXT:    fcvt h1, d1
-; CHECK-GI-FP16-NEXT:    fcvt h2, d2
-; CHECK-GI-FP16-NEXT:    mov v0.h[1], v2.h[0]
-; CHECK-GI-FP16-NEXT:    fcvt h2, d3
-; CHECK-GI-FP16-NEXT:    mov v0.h[2], v1.h[0]
-; CHECK-GI-FP16-NEXT:    mov v0.h[3], v2.h[0]
-; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-GI-FP16-NEXT:    ret
+; CHECK-LABEL: utofp_v4i64_v4f16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ucvtf v0.2d, v0.2d
+; CHECK-NEXT:    ucvtf v1.2d, v1.2d
+; CHECK-NEXT:    fcvtn v0.2s, v0.2d
+; CHECK-NEXT:    fcvtn2 v0.4s, v1.2d
+; CHECK-NEXT:    fcvtn v0.4h, v0.4s
+; CHECK-NEXT:    ret
 entry:
   %c = uitofp <4 x i64> %a to <4 x half>
   ret <4 x half> %c
@@ -6453,46 +6629,19 @@ define <8 x half> @stofp_v8i64_v8f16(<8 x i64> %a) {
 ; CHECK-SD-NEXT:    fcvtn2 v0.8h, v2.4s
 ; CHECK-SD-NEXT:    ret
 ;
-; CHECK-GI-NOFP16-LABEL: stofp_v8i64_v8f16:
-; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    scvtf v0.2d, v0.2d
-; CHECK-GI-NOFP16-NEXT:    scvtf v1.2d, v1.2d
-; CHECK-GI-NOFP16-NEXT:    scvtf v2.2d, v2.2d
-; CHECK-GI-NOFP16-NEXT:    scvtf v3.2d, v3.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn v0.2s, v0.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn v2.2s, v2.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v0.4s, v1.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v2.4s, v3.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn v0.4h, v0.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v0.8h, v2.4s
-; CHECK-GI-NOFP16-NEXT:    ret
-;
-; CHECK-GI-FP16-LABEL: stofp_v8i64_v8f16:
-; CHECK-GI-FP16:       // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT:    scvtf v0.2d, v0.2d
-; CHECK-GI-FP16-NEXT:    scvtf v1.2d, v1.2d
-; CHECK-GI-FP16-NEXT:    scvtf v2.2d, v2.2d
-; CHECK-GI-FP16-NEXT:    scvtf v3.2d, v3.2d
-; CHECK-GI-FP16-NEXT:    mov d4, v0.d[1]
-; CHECK-GI-FP16-NEXT:    fcvt h0, d0
-; CHECK-GI-FP16-NEXT:    mov d5, v1.d[1]
-; CHECK-GI-FP16-NEXT:    fcvt h1, d1
-; CHECK-GI-FP16-NEXT:    fcvt h4, d4
-; CHECK-GI-FP16-NEXT:    mov v0.h[1], v4.h[0]
-; CHECK-GI-FP16-NEXT:    fcvt h4, d5
-; CHECK-GI-FP16-NEXT:    mov v0.h[2], v1.h[0]
-; CHECK-GI-FP16-NEXT:    mov d1, v2.d[1]
-; CHECK-GI-FP16-NEXT:    fcvt h2, d2
-; CHECK-GI-FP16-NEXT:    mov v0.h[3], v4.h[0]
-; CHECK-GI-FP16-NEXT:    fcvt h1, d1
-; CHECK-GI-FP16-NEXT:    mov v0.h[4], v2.h[0]
-; CHECK-GI-FP16-NEXT:    mov d2, v3.d[1]
-; CHECK-GI-FP16-NEXT:    fcvt h3, d3
-; CHECK-GI-FP16-NEXT:    mov v0.h[5], v1.h[0]
-; CHECK-GI-FP16-NEXT:    fcvt h1, d2
-; CHECK-GI-FP16-NEXT:    mov v0.h[6], v3.h[0]
-; CHECK-GI-FP16-NEXT:    mov v0.h[7], v1.h[0]
-; CHECK-GI-FP16-NEXT:    ret
+; CHECK-GI-LABEL: stofp_v8i64_v8f16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    scvtf v0.2d, v0.2d
+; CHECK-GI-NEXT:    scvtf v1.2d, v1.2d
+; CHECK-GI-NEXT:    scvtf v2.2d, v2.2d
+; CHECK-GI-NEXT:    scvtf v3.2d, v3.2d
+; CHECK-GI-NEXT:    fcvtn v0.2s, v0.2d
+; CHECK-GI-NEXT:    fcvtn v2.2s, v2.2d
+; CHECK-GI-NEXT:    fcvtn2 v0.4s, v1.2d
+; CHECK-GI-NEXT:    fcvtn2 v2.4s, v3.2d
+; CHECK-GI-NEXT:    fcvtn v0.4h, v0.4s
+; CHECK-GI-NEXT:    fcvtn2 v0.8h, v2.4s
+; CHECK-GI-NEXT:    ret
 entry:
   %c = sitofp <8 x i64> %a to <8 x half>
   ret <8 x half> %c
@@ -6513,46 +6662,19 @@ define <8 x half> @utofp_v8i64_v8f16(<8 x i64> %a) {
 ; CHECK-SD-NEXT:    fcvtn2 v0.8h, v2.4s
 ; CHECK-SD-NEXT:    ret
 ;
-; CHECK-GI-NOFP16-LABEL: utofp_v8i64_v8f16:
-; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    ucvtf v0.2d, v0.2d
-; CHECK-GI-NOFP16-NEXT:    ucvtf v1.2d, v1.2d
-; CHECK-GI-NOFP16-NEXT:    ucvtf v2.2d, v2.2d
-; CHECK-GI-NOFP16-NEXT:    ucvtf v3.2d, v3.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn v0.2s, v0.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn v2.2s, v2.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v0.4s, v1.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v2.4s, v3.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn v0.4h, v0.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v0.8h, v2.4s
-; CHECK-GI-NOFP16-NEXT:    ret
-;
-; CHECK-GI-FP16-LABEL: utofp_v8i64_v8f16:
-; CHECK-GI-FP16:       // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT:    ucvtf v0.2d, v0.2d
-; CHECK-GI-FP16-NEXT:    ucvtf v1.2d, v1.2d
-; CHECK-GI-FP16-NEXT:    ucvtf v2.2d, v2.2d
-; CHECK-GI-FP16-NEXT:    ucvtf v3.2d, v3.2d
-; CHECK-GI-FP16-NEXT:    mov d4, v0.d[1]
-; CHECK-GI-FP16-NEXT:    fcvt h0, d0
-; CHECK-GI-FP16-NEXT:    mov d5, v1.d[1]
-; CHECK-GI-FP16-NEXT:    fcvt h1, d1
-; CHECK-GI-FP16-NEXT:    fcvt h4, d4
-; CHECK-GI-FP16-NEXT:    mov v0.h[1], v4.h[0]
-; CHECK-GI-FP16-NEXT:    fcvt h4, d5
-; CHECK-GI-FP16-NEXT:    mov v0.h[2], v1.h[0]
-; CHECK-GI-FP16-NEXT:    mov d1, v2.d[1]
-; CHECK-GI-FP16-NEXT:    fcvt h2, d2
-; CHECK-GI-FP16-NEXT:    mov v0.h[3], v4.h[0]
-; CHECK-GI-FP16-NEXT:    fcvt h1, d1
-; CHECK-GI-FP16-NEXT:    mov v0.h[4], v2.h[0]
-; CHECK-GI-FP16-NEXT:    mov d2, v3.d[1]
-; CHECK-GI-FP16-NEXT:    fcvt h3, d3
-; CHECK-GI-FP16-NEXT:    mov v0.h[5], v1.h[0]
-; CHECK-GI-FP16-NEXT:    fcvt h1, d2
-; CHECK-GI-FP16-NEXT:    mov v0.h[6], v3.h[0]
-; CHECK-GI-FP16-NEXT:    mov v0.h[7], v1.h[0]
-; CHECK-GI-FP16-NEXT:    ret
+; CHECK-GI-LABEL: utofp_v8i64_v8f16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ucvtf v0.2d, v0.2d
+; CHECK-GI-NEXT:    ucvtf v1.2d, v1.2d
+; CHECK-GI-NEXT:    ucvtf v2.2d, v2.2d
+; CHECK-GI-NEXT:    ucvtf v3.2d, v3.2d
+; CHECK-GI-NEXT:    fcvtn v0.2s, v0.2d
+; CHECK-GI-NEXT:    fcvtn v2.2s, v2.2d
+; CHECK-GI-NEXT:    fcvtn2 v0.4s, v1.2d
+; CHECK-GI-NEXT:    fcvtn2 v2.4s, v3.2d
+; CHECK-GI-NEXT:    fcvtn v0.4h, v0.4s
+; CHECK-GI-NEXT:    fcvtn2 v0.8h, v2.4s
+; CHECK-GI-NEXT:    ret
 entry:
   %c = uitofp <8 x i64> %a to <8 x half>
   ret <8 x half> %c
@@ -6583,79 +6705,29 @@ define <16 x half> @stofp_v16i64_v16f16(<16 x i64> %a) {
 ; CHECK-SD-NEXT:    fcvtn2 v1.8h, v6.4s
 ; CHECK-SD-NEXT:    ret
 ;
-; CHECK-GI-NOFP16-LABEL: stofp_v16i64_v16f16:
-; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    scvtf v0.2d, v0.2d
-; CHECK-GI-NOFP16-NEXT:    scvtf v4.2d, v4.2d
-; CHECK-GI-NOFP16-NEXT:    scvtf v1.2d, v1.2d
-; CHECK-GI-NOFP16-NEXT:    scvtf v2.2d, v2.2d
-; CHECK-GI-NOFP16-NEXT:    scvtf v5.2d, v5.2d
-; CHECK-GI-NOFP16-NEXT:    scvtf v6.2d, v6.2d
-; CHECK-GI-NOFP16-NEXT:    scvtf v3.2d, v3.2d
-; CHECK-GI-NOFP16-NEXT:    scvtf v7.2d, v7.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn v0.2s, v0.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn v4.2s, v4.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn v2.2s, v2.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn v6.2s, v6.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v0.4s, v1.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v4.4s, v5.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v2.4s, v3.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v6.4s, v7.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn v0.4h, v0.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtn v1.4h, v4.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v0.8h, v2.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v1.8h, v6.4s
-; CHECK-GI-NOFP16-NEXT:    ret
-;
-; CHECK-GI-FP16-LABEL: stofp_v16i64_v16f16:
-; CHECK-GI-FP16:       // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT:    scvtf v0.2d, v0.2d
-; CHECK-GI-FP16-NEXT:    scvtf v4.2d, v4.2d
-; CHECK-GI-FP16-NEXT:    scvtf v18.2d, v1.2d
-; CHECK-GI-FP16-NEXT:    scvtf v5.2d, v5.2d
-; CHECK-GI-FP16-NEXT:    scvtf v2.2d, v2.2d
-; CHECK-GI-FP16-NEXT:    scvtf v3.2d, v3.2d
-; CHECK-GI-FP16-NEXT:    mov d16, v0.d[1]
-; CHECK-GI-FP16-NEXT:    mov d17, v4.d[1]
-; CHECK-GI-FP16-NEXT:    fcvt h0, d0
-; CHECK-GI-FP16-NEXT:    fcvt h1, d4
-; CHECK-GI-FP16-NEXT:    mov d19, v5.d[1]
-; CHECK-GI-FP16-NEXT:    fcvt h5, d5
-; CHECK-GI-FP16-NEXT:    fcvt h16, d16
-; CHECK-GI-FP16-NEXT:    fcvt h4, d17
-; CHECK-GI-FP16-NEXT:    mov d17, v18.d[1]
-; CHECK-GI-FP16-NEXT:    fcvt h18, d18
-; CHECK-GI-FP16-NEXT:    mov v0.h[1], v16.h[0]
-; CHECK-GI-FP16-NEXT:    mov v1.h[1], v4.h[0]
-; CHECK-GI-FP16-NEXT:    scvtf v4.2d, v6.2d
-; CHECK-GI-FP16-NEXT:    fcvt h6, d17
-; CHECK-GI-FP16-NEXT:    fcvt h16, d19
-; CHECK-GI-FP16-NEXT:    mov v0.h[2], v18.h[0]
-; CHECK-GI-FP16-NEXT:    mov v1.h[2], v5.h[0]
-; CHECK-GI-FP16-NEXT:    mov d5, v2.d[1]
-; CHECK-GI-FP16-NEXT:    fcvt h2, d2
-; CHECK-GI-FP16-NEXT:    mov d17, v4.d[1]
-; CHECK-GI-FP16-NEXT:    fcvt h4, d4
-; CHECK-GI-FP16-NEXT:    mov v0.h[3], v6.h[0]
-; CHECK-GI-FP16-NEXT:    mov v1.h[3], v16.h[0]
-; CHECK-GI-FP16-NEXT:    scvtf v6.2d, v7.2d
-; CHECK-GI-FP16-NEXT:    fcvt h5, d5
-; CHECK-GI-FP16-NEXT:    fcvt h7, d17
-; CHECK-GI-FP16-NEXT:    mov v0.h[4], v2.h[0]
-; CHECK-GI-FP16-NEXT:    mov v1.h[4], v4.h[0]
-; CHECK-GI-FP16-NEXT:    mov d2, v3.d[1]
-; CHECK-GI-FP16-NEXT:    fcvt h3, d3
-; CHECK-GI-FP16-NEXT:    mov d4, v6.d[1]
-; CHECK-GI-FP16-NEXT:    fcvt h6, d6
-; CHECK-GI-FP16-NEXT:    mov v0.h[5], v5.h[0]
-; CHECK-GI-FP16-NEXT:    mov v1.h[5], v7.h[0]
-; CHECK-GI-FP16-NEXT:    fcvt h2, d2
-; CHECK-GI-FP16-NEXT:    fcvt h4, d4
-; CHECK-GI-FP16-NEXT:    mov v0.h[6], v3.h[0]
-; CHECK-GI-FP16-NEXT:    mov v1.h[6], v6.h[0]
-; CHECK-GI-FP16-NEXT:    mov v0.h[7], v2.h[0]
-; CHECK-GI-FP16-NEXT:    mov v1.h[7], v4.h[0]
-; CHECK-GI-FP16-NEXT:    ret
+; CHECK-GI-LABEL: stofp_v16i64_v16f16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    scvtf v0.2d, v0.2d
+; CHECK-GI-NEXT:    scvtf v4.2d, v4.2d
+; CHECK-GI-NEXT:    scvtf v1.2d, v1.2d
+; CHECK-GI-NEXT:    scvtf v2.2d, v2.2d
+; CHECK-GI-NEXT:    scvtf v5.2d, v5.2d
+; CHECK-GI-NEXT:    scvtf v6.2d, v6.2d
+; CHECK-GI-NEXT:    scvtf v3.2d, v3.2d
+; CHECK-GI-NEXT:    scvtf v7.2d, v7.2d
+; CHECK-GI-NEXT:    fcvtn v0.2s, v0.2d
+; CHECK-GI-NEXT:    fcvtn v4.2s, v4.2d
+; CHECK-GI-NEXT:    fcvtn v2.2s, v2.2d
+; CHECK-GI-NEXT:    fcvtn v6.2s, v6.2d
+; CHECK-GI-NEXT:    fcvtn2 v0.4s, v1.2d
+; CHECK-GI-NEXT:    fcvtn2 v4.4s, v5.2d
+; CHECK-GI-NEXT:    fcvtn2 v2.4s, v3.2d
+; CHECK-GI-NEXT:    fcvtn2 v6.4s, v7.2d
+; CHECK-GI-NEXT:    fcvtn v0.4h, v0.4s
+; CHECK-GI-NEXT:    fcvtn v1.4h, v4.4s
+; CHECK-GI-NEXT:    fcvtn2 v0.8h, v2.4s
+; CHECK-GI-NEXT:    fcvtn2 v1.8h, v6.4s
+; CHECK-GI-NEXT:    ret
 entry:
   %c = sitofp <16 x i64> %a to <16 x half>
   ret <16 x half> %c
@@ -6686,79 +6758,29 @@ define <16 x half> @utofp_v16i64_v16f16(<16 x i64> %a) {
 ; CHECK-SD-NEXT:    fcvtn2 v1.8h, v6.4s
 ; CHECK-SD-NEXT:    ret
 ;
-; CHECK-GI-NOFP16-LABEL: utofp_v16i64_v16f16:
-; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    ucvtf v0.2d, v0.2d
-; CHECK-GI-NOFP16-NEXT:    ucvtf v4.2d, v4.2d
-; CHECK-GI-NOFP16-NEXT:    ucvtf v1.2d, v1.2d
-; CHECK-GI-NOFP16-NEXT:    ucvtf v2.2d, v2.2d
-; CHECK-GI-NOFP16-NEXT:    ucvtf v5.2d, v5.2d
-; CHECK-GI-NOFP16-NEXT:    ucvtf v6.2d, v6.2d
-; CHECK-GI-NOFP16-NEXT:    ucvtf v3.2d, v3.2d
-; CHECK-GI-NOFP16-NEXT:    ucvtf v7.2d, v7.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn v0.2s, v0.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn v4.2s, v4.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn v2.2s, v2.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn v6.2s, v6.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v0.4s, v1.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v4.4s, v5.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v2.4s, v3.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v6.4s, v7.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn v0.4h, v0.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtn v1.4h, v4.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v0.8h, v2.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v1.8h, v6.4s
-; CHECK-GI-NOFP16-NEXT:    ret
-;
-; CHECK-GI-FP16-LABEL: utofp_v16i64_v16f16:
-; CHECK-GI-FP16:       // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT:    ucvtf v0.2d, v0.2d
-; CHECK-GI-FP16-NEXT:    ucvtf v4.2d, v4.2d
-; CHECK-GI-FP16-NEXT:    ucvtf v18.2d, v1.2d
-; CHECK-GI-FP16-NEXT:    ucvtf v5.2d, v5.2d
-; CHECK-GI-FP16-NEXT:    ucvtf v2.2d, v2.2d
-; CHECK-GI-FP16-NEXT:    ucvtf v3.2d, v3.2d
-; CHECK-GI-FP16-NEXT:    mov d16, v0.d[1]
-; CHECK-GI-FP16-NEXT:    mov d17, v4.d[1]
-; CHECK-GI-FP16-NEXT:    fcvt h0, d0
-; CHECK-GI-FP16-NEXT:    fcvt h1, d4
-; CHECK-GI-FP16-NEXT:    mov d19, v5.d[1]
-; CHECK-GI-FP16-NEXT:    fcvt h5, d5
-; CHECK-GI-FP16-NEXT:    fcvt h16, d16
-; CHECK-GI-FP16-NEXT:    fcvt h4, d17
-; CHECK-GI-FP16-NEXT:    mov d17, v18.d[1]
-; CHECK-GI-FP16-NEXT:    fcvt h18, d18
-; CHECK-GI-FP16-NEXT:    mov v0.h[1], v16.h[0]
-; CHECK-GI-FP16-NEXT:    mov v1.h[1], v4.h[0]
-; CHECK-GI-FP16-NEXT:    ucvtf v4.2d, v6.2d
-; CHECK-GI-FP16-NEXT:    fcvt h6, d17
-; CHECK-GI-FP16-NEXT:    fcvt h16, d19
-; CHECK-GI-FP16-NEXT:    mov v0.h[2], v18.h[0]
-; CHECK-GI-FP16-NEXT:    mov v1.h[2], v5.h[0]
-; CHECK-GI-FP16-NEXT:    mov d5, v2.d[1]
-; CHECK-GI-FP16-NEXT:    fcvt h2, d2
-; CHECK-GI-FP16-NEXT:    mov d17, v4.d[1]
-; CHECK-GI-FP16-NEXT:    fcvt h4, d4
-; CHECK-GI-FP16-NEXT:    mov v0.h[3], v6.h[0]
-; CHECK-GI-FP16-NEXT:    mov v1.h[3], v16.h[0]
-; CHECK-GI-FP16-NEXT:    ucvtf v6.2d, v7.2d
-; CHECK-GI-FP16-NEXT:    fcvt h5, d5
-; CHECK-GI-FP16-NEXT:    fcvt h7, d17
-; CHECK-GI-FP16-NEXT:    mov v0.h[4], v2.h[0]
-; CHECK-GI-FP16-NEXT:    mov v1.h[4], v4.h[0]
-; CHECK-GI-FP16-NEXT:    mov d2, v3.d[1]
-; CHECK-GI-FP16-NEXT:    fcvt h3, d3
-; CHECK-GI-FP16-NEXT:    mov d4, v6.d[1]
-; CHECK-GI-FP16-NEXT:    fcvt h6, d6
-; CHECK-GI-FP16-NEXT:    mov v0.h[5], v5.h[0]
-; CHECK-GI-FP16-NEXT:    mov v1.h[5], v7.h[0]
-; CHECK-GI-FP16-NEXT:    fcvt h2, d2
-; CHECK-GI-FP16-NEXT:    fcvt h4, d4
-; CHECK-GI-FP16-NEXT:    mov v0.h[6], v3.h[0]
-; CHECK-GI-FP16-NEXT:    mov v1.h[6], v6.h[0]
-; CHECK-GI-FP16-NEXT:    mov v0.h[7], v2.h[0]
-; CHECK-GI-FP16-NEXT:    mov v1.h[7], v4.h[0]
-; CHECK-GI-FP16-NEXT:    ret
+; CHECK-GI-LABEL: utofp_v16i64_v16f16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ucvtf v0.2d, v0.2d
+; CHECK-GI-NEXT:    ucvtf v4.2d, v4.2d
+; CHECK-GI-NEXT:    ucvtf v1.2d, v1.2d
+; CHECK-GI-NEXT:    ucvtf v2.2d, v2.2d
+; CHECK-GI-NEXT:    ucvtf v5.2d, v5.2d
+; CHECK-GI-NEXT:    ucvtf v6.2d, v6.2d
+; CHECK-GI-NEXT:    ucvtf v3.2d, v3.2d
+; CHECK-GI-NEXT:    ucvtf v7.2d, v7.2d
+; CHECK-GI-NEXT:    fcvtn v0.2s, v0.2d
+; CHECK-GI-NEXT:    fcvtn v4.2s, v4.2d
+; CHECK-GI-NEXT:    fcvtn v2.2s, v2.2d
+; CHECK-GI-NEXT:    fcvtn v6.2s, v6.2d
+; CHECK-GI-NEXT:    fcvtn2 v0.4s, v1.2d
+; CHECK-GI-NEXT:    fcvtn2 v4.4s, v5.2d
+; CHECK-GI-NEXT:    fcvtn2 v2.4s, v3.2d
+; CHECK-GI-NEXT:    fcvtn2 v6.4s, v7.2d
+; CHECK-GI-NEXT:    fcvtn v0.4h, v0.4s
+; CHECK-GI-NEXT:    fcvtn v1.4h, v4.4s
+; CHECK-GI-NEXT:    fcvtn2 v0.8h, v2.4s
+; CHECK-GI-NEXT:    fcvtn2 v1.8h, v6.4s
+; CHECK-GI-NEXT:    ret
 entry:
   %c = uitofp <16 x i64> %a to <16 x half>
   ret <16 x half> %c
@@ -6813,155 +6835,53 @@ define <32 x half> @stofp_v32i64_v32f16(<32 x i64> %a) {
 ; CHECK-SD-NEXT:    fcvtn2 v3.8h, v7.4s
 ; CHECK-SD-NEXT:    ret
 ;
-; CHECK-GI-NOFP16-LABEL: stofp_v32i64_v32f16:
-; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    ldp q16, q17, [sp]
-; CHECK-GI-NOFP16-NEXT:    scvtf v0.2d, v0.2d
-; CHECK-GI-NOFP16-NEXT:    ldp q20, q21, [sp, #64]
-; CHECK-GI-NOFP16-NEXT:    scvtf v4.2d, v4.2d
-; CHECK-GI-NOFP16-NEXT:    ldp q18, q19, [sp, #32]
-; CHECK-GI-NOFP16-NEXT:    scvtf v1.2d, v1.2d
-; CHECK-GI-NOFP16-NEXT:    scvtf v16.2d, v16.2d
-; CHECK-GI-NOFP16-NEXT:    ldp q22, q23, [sp, #96]
-; CHECK-GI-NOFP16-NEXT:    scvtf v20.2d, v20.2d
-; CHECK-GI-NOFP16-NEXT:    scvtf v2.2d, v2.2d
-; CHECK-GI-NOFP16-NEXT:    scvtf v5.2d, v5.2d
-; CHECK-GI-NOFP16-NEXT:    scvtf v6.2d, v6.2d
-; CHECK-GI-NOFP16-NEXT:    scvtf v17.2d, v17.2d
-; CHECK-GI-NOFP16-NEXT:    scvtf v18.2d, v18.2d
-; CHECK-GI-NOFP16-NEXT:    scvtf v21.2d, v21.2d
-; CHECK-GI-NOFP16-NEXT:    scvtf v22.2d, v22.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn v0.2s, v0.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn v4.2s, v4.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn v16.2s, v16.2d
-; CHECK-GI-NOFP16-NEXT:    scvtf v3.2d, v3.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn v20.2s, v20.2d
-; CHECK-GI-NOFP16-NEXT:    scvtf v7.2d, v7.2d
-; CHECK-GI-NOFP16-NEXT:    scvtf v19.2d, v19.2d
-; CHECK-GI-NOFP16-NEXT:    scvtf v23.2d, v23.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn v24.2s, v2.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn v6.2s, v6.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn v18.2s, v18.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn v22.2s, v22.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v0.4s, v1.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v4.4s, v5.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v16.4s, v17.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v20.4s, v21.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v24.4s, v3.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v6.4s, v7.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v18.4s, v19.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v22.4s, v23.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn v0.4h, v0.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtn v1.4h, v4.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtn v2.4h, v16.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtn v3.4h, v20.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v0.8h, v24.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v1.8h, v6.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v2.8h, v18.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v3.8h, v22.4s
-; CHECK-GI-NOFP16-NEXT:    ret
-;
-; CHECK-GI-FP16-LABEL: stofp_v32i64_v32f16:
-; CHECK-GI-FP16:       // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT:    ldp q16, q18, [sp]
-; CHECK-GI-FP16-NEXT:    scvtf v0.2d, v0.2d
-; CHECK-GI-FP16-NEXT:    ldp q17, q19, [sp, #64]
-; CHECK-GI-FP16-NEXT:    scvtf v4.2d, v4.2d
-; CHECK-GI-FP16-NEXT:    scvtf v1.2d, v1.2d
-; CHECK-GI-FP16-NEXT:    scvtf v5.2d, v5.2d
-; CHECK-GI-FP16-NEXT:    scvtf v6.2d, v6.2d
-; CHECK-GI-FP16-NEXT:    scvtf v20.2d, v16.2d
-; CHECK-GI-FP16-NEXT:    scvtf v24.2d, v18.2d
-; CHECK-GI-FP16-NEXT:    scvtf v2.2d, v2.2d
-; CHECK-GI-FP16-NEXT:    scvtf v16.2d, v17.2d
-; CHECK-GI-FP16-NEXT:    mov d21, v0.d[1]
-; CHECK-GI-FP16-NEXT:    scvtf v25.2d, v19.2d
-; CHECK-GI-FP16-NEXT:    mov d22, v4.d[1]
-; CHECK-GI-FP16-NEXT:    fcvt h0, d0
-; CHECK-GI-FP16-NEXT:    scvtf v3.2d, v3.2d
-; CHECK-GI-FP16-NEXT:    mov d18, v1.d[1]
-; CHECK-GI-FP16-NEXT:    fcvt h17, d1
-; CHECK-GI-FP16-NEXT:    mov d19, v5.d[1]
-; CHECK-GI-FP16-NEXT:    mov d23, v20.d[1]
-; CHECK-GI-FP16-NEXT:    fcvt h1, d4
-; CHECK-GI-FP16-NEXT:    fcvt h4, d20
-; CHECK-GI-FP16-NEXT:    mov d26, v16.d[1]
-; CHECK-GI-FP16-NEXT:    fcvt h20, d5
-; CHECK-GI-FP16-NEXT:    fcvt h5, d16
-; CHECK-GI-FP16-NEXT:    fcvt h28, d21
-; CHECK-GI-FP16-NEXT:    fcvt h29, d22
-; CHECK-GI-FP16-NEXT:    fcvt h22, d24
-; CHECK-GI-FP16-NEXT:    fcvt h21, d25
-; CHECK-GI-FP16-NEXT:    fcvt h18, d18
-; CHECK-GI-FP16-NEXT:    fcvt h19, d19
-; CHECK-GI-FP16-NEXT:    fcvt h27, d23
-; CHECK-GI-FP16-NEXT:    mov d23, v24.d[1]
-; CHECK-GI-FP16-NEXT:    mov d24, v25.d[1]
-; CHECK-GI-FP16-NEXT:    ldp q25, q16, [sp, #32]
-; CHECK-GI-FP16-NEXT:    fcvt h26, d26
-; CHECK-GI-FP16-NEXT:    mov v0.h[1], v28.h[0]
-; CHECK-GI-FP16-NEXT:    mov v1.h[1], v29.h[0]
-; CHECK-GI-FP16-NEXT:    scvtf v7.2d, v7.2d
-; CHECK-GI-FP16-NEXT:    mov v4.h[1], v27.h[0]
-; CHECK-GI-FP16-NEXT:    scvtf v25.2d, v25.2d
-; CHECK-GI-FP16-NEXT:    fcvt h23, d23
-; CHECK-GI-FP16-NEXT:    mov v5.h[1], v26.h[0]
-; CHECK-GI-FP16-NEXT:    ldp q26, q27, [sp, #96]
-; CHECK-GI-FP16-NEXT:    fcvt h24, d24
-; CHECK-GI-FP16-NEXT:    mov v0.h[2], v17.h[0]
-; CHECK-GI-FP16-NEXT:    mov v1.h[2], v20.h[0]
-; CHECK-GI-FP16-NEXT:    mov d20, v6.d[1]
-; CHECK-GI-FP16-NEXT:    mov d17, v2.d[1]
-; CHECK-GI-FP16-NEXT:    fcvt h2, d2
-; CHECK-GI-FP16-NEXT:    scvtf v26.2d, v26.2d
-; CHECK-GI-FP16-NEXT:    mov v4.h[2], v22.h[0]
-; CHECK-GI-FP16-NEXT:    fcvt h22, d25
-; CHECK-GI-FP16-NEXT:    mov v5.h[2], v21.h[0]
-; CHECK-GI-FP16-NEXT:    mov d21, v25.d[1]
-; CHECK-GI-FP16-NEXT:    fcvt h6, d6
-; CHECK-GI-FP16-NEXT:    mov v0.h[3], v18.h[0]
-; CHECK-GI-FP16-NEXT:    mov v1.h[3], v19.h[0]
-; CHECK-GI-FP16-NEXT:    scvtf v16.2d, v16.2d
-; CHECK-GI-FP16-NEXT:    scvtf v18.2d, v27.2d
-; CHECK-GI-FP16-NEXT:    fcvt h19, d20
-; CHECK-GI-FP16-NEXT:    fcvt h17, d17
-; CHECK-GI-FP16-NEXT:    mov d25, v26.d[1]
-; CHECK-GI-FP16-NEXT:    fcvt h26, d26
-; CHECK-GI-FP16-NEXT:    mov v4.h[3], v23.h[0]
-; CHECK-GI-FP16-NEXT:    mov v5.h[3], v24.h[0]
-; CHECK-GI-FP16-NEXT:    fcvt h20, d21
-; CHECK-GI-FP16-NEXT:    mov v0.h[4], v2.h[0]
-; CHECK-GI-FP16-NEXT:    mov v1.h[4], v6.h[0]
-; CHECK-GI-FP16-NEXT:    mov d2, v3.d[1]
-; CHECK-GI-FP16-NEXT:    mov d23, v18.d[1]
-; CHECK-GI-FP16-NEXT:    fcvt h18, d18
-; CHECK-GI-FP16-NEXT:    fcvt h3, d3
-; CHECK-GI-FP16-NEXT:    fcvt h21, d25
-; CHECK-GI-FP16-NEXT:    mov v4.h[4], v22.h[0]
-; CHECK-GI-FP16-NEXT:    mov d22, v16.d[1]
-; CHECK-GI-FP16-NEXT:    mov v5.h[4], v26.h[0]
-; CHECK-GI-FP16-NEXT:    fcvt h16, d16
-; CHECK-GI-FP16-NEXT:    mov d6, v7.d[1]
-; CHECK-GI-FP16-NEXT:    fcvt h7, d7
-; CHECK-GI-FP16-NEXT:    mov v0.h[5], v17.h[0]
-; CHECK-GI-FP16-NEXT:    mov v1.h[5], v19.h[0]
-; CHECK-GI-FP16-NEXT:    fcvt h19, d23
-; CHECK-GI-FP16-NEXT:    fcvt h2, d2
-; CHECK-GI-FP16-NEXT:    mov v4.h[5], v20.h[0]
-; CHECK-GI-FP16-NEXT:    fcvt h17, d22
-; CHECK-GI-FP16-NEXT:    mov v5.h[5], v21.h[0]
-; CHECK-GI-FP16-NEXT:    fcvt h6, d6
-; CHECK-GI-FP16-NEXT:    mov v0.h[6], v3.h[0]
-; CHECK-GI-FP16-NEXT:    mov v1.h[6], v7.h[0]
-; CHECK-GI-FP16-NEXT:    mov v4.h[6], v16.h[0]
-; CHECK-GI-FP16-NEXT:    mov v5.h[6], v18.h[0]
-; CHECK-GI-FP16-NEXT:    mov v0.h[7], v2.h[0]
-; CHECK-GI-FP16-NEXT:    mov v1.h[7], v6.h[0]
-; CHECK-GI-FP16-NEXT:    mov v4.h[7], v17.h[0]
-; CHECK-GI-FP16-NEXT:    mov v5.h[7], v19.h[0]
-; CHECK-GI-FP16-NEXT:    mov v2.16b, v4.16b
-; CHECK-GI-FP16-NEXT:    mov v3.16b, v5.16b
-; CHECK-GI-FP16-NEXT:    ret
+; CHECK-GI-LABEL: stofp_v32i64_v32f16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldp q16, q17, [sp]
+; CHECK-GI-NEXT:    scvtf v0.2d, v0.2d
+; CHECK-GI-NEXT:    ldp q20, q21, [sp, #64]
+; CHECK-GI-NEXT:    scvtf v4.2d, v4.2d
+; CHECK-GI-NEXT:    ldp q18, q19, [sp, #32]
+; CHECK-GI-NEXT:    scvtf v1.2d, v1.2d
+; CHECK-GI-NEXT:    scvtf v16.2d, v16.2d
+; CHECK-GI-NEXT:    ldp q22, q23, [sp, #96]
+; CHECK-GI-NEXT:    scvtf v20.2d, v20.2d
+; CHECK-GI-NEXT:    scvtf v2.2d, v2.2d
+; CHECK-GI-NEXT:    scvtf v5.2d, v5.2d
+; CHECK-GI-NEXT:    scvtf v6.2d, v6.2d
+; CHECK-GI-NEXT:    scvtf v17.2d, v17.2d
+; CHECK-GI-NEXT:    scvtf v18.2d, v18.2d
+; CHECK-GI-NEXT:    scvtf v21.2d, v21.2d
+; CHECK-GI-NEXT:    scvtf v22.2d, v22.2d
+; CHECK-GI-NEXT:    fcvtn v0.2s, v0.2d
+; CHECK-GI-NEXT:    fcvtn v4.2s, v4.2d
+; CHECK-GI-NEXT:    fcvtn v16.2s, v16.2d
+; CHECK-GI-NEXT:    scvtf v3.2d, v3.2d
+; CHECK-GI-NEXT:    fcvtn v20.2s, v20.2d
+; CHECK-GI-NEXT:    scvtf v7.2d, v7.2d
+; CHECK-GI-NEXT:    scvtf v19.2d, v19.2d
+; CHECK-GI-NEXT:    scvtf v23.2d, v23.2d
+; CHECK-GI-NEXT:    fcvtn v24.2s, v2.2d
+; CHECK-GI-NEXT:    fcvtn v6.2s, v6.2d
+; CHECK-GI-NEXT:    fcvtn v18.2s, v18.2d
+; CHECK-GI-NEXT:    fcvtn v22.2s, v22.2d
+; CHECK-GI-NEXT:    fcvtn2 v0.4s, v1.2d
+; CHECK-GI-NEXT:    fcvtn2 v4.4s, v5.2d
+; CHECK-GI-NEXT:    fcvtn2 v16.4s, v17.2d
+; CHECK-GI-NEXT:    fcvtn2 v20.4s, v21.2d
+; CHECK-GI-NEXT:    fcvtn2 v24.4s, v3.2d
+; CHECK-GI-NEXT:    fcvtn2 v6.4s, v7.2d
+; CHECK-GI-NEXT:    fcvtn2 v18.4s, v19.2d
+; CHECK-GI-NEXT:    fcvtn2 v22.4s, v23.2d
+; CHECK-GI-NEXT:    fcvtn v0.4h, v0.4s
+; CHECK-GI-NEXT:    fcvtn v1.4h, v4.4s
+; CHECK-GI-NEXT:    fcvtn v2.4h, v16.4s
+; CHECK-GI-NEXT:    fcvtn v3.4h, v20.4s
+; CHECK-GI-NEXT:    fcvtn2 v0.8h, v24.4s
+; CHECK-GI-NEXT:    fcvtn2 v1.8h, v6.4s
+; CHECK-GI-NEXT:    fcvtn2 v2.8h, v18.4s
+; CHECK-GI-NEXT:    fcvtn2 v3.8h, v22.4s
+; CHECK-GI-NEXT:    ret
 entry:
   %c = sitofp <32 x i64> %a to <32 x half>
   ret <32 x half> %c
@@ -7016,155 +6936,53 @@ define <32 x half> @utofp_v32i64_v32f16(<32 x i64> %a) {
 ; CHECK-SD-NEXT:    fcvtn2 v3.8h, v7.4s
 ; CHECK-SD-NEXT:    ret
 ;
-; CHECK-GI-NOFP16-LABEL: utofp_v32i64_v32f16:
-; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    ldp q16, q17, [sp]
-; CHECK-GI-NOFP16-NEXT:    ucvtf v0.2d, v0.2d
-; CHECK-GI-NOFP16-NEXT:    ldp q20, q21, [sp, #64]
-; CHECK-GI-NOFP16-NEXT:    ucvtf v4.2d, v4.2d
-; CHECK-GI-NOFP16-NEXT:    ldp q18, q19, [sp, #32]
-; CHECK-GI-NOFP16-NEXT:    ucvtf v1.2d, v1.2d
-; CHECK-GI-NOFP16-NEXT:    ucvtf v16.2d, v16.2d
-; CHECK-GI-NOFP16-NEXT:    ldp q22, q23, [sp, #96]
-; CHECK-GI-NOFP16-NEXT:    ucvtf v20.2d, v20.2d
-; CHECK-GI-NOFP16-NEXT:    ucvtf v2.2d, v2.2d
-; CHECK-GI-NOFP16-NEXT:    ucvtf v5.2d, v5.2d
-; CHECK-GI-NOFP16-NEXT:    ucvtf v6.2d, v6.2d
-; CHECK-GI-NOFP16-NEXT:    ucvtf v17.2d, v17.2d
-; CHECK-GI-NOFP16-NEXT:    ucvtf v18.2d, v18.2d
-; CHECK-GI-NOFP16-NEXT:    ucvtf v21.2d, v21.2d
-; CHECK-GI-NOFP16-NEXT:    ucvtf v22.2d, v22.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn v0.2s, v0.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn v4.2s, v4.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn v16.2s, v16.2d
-; CHECK-GI-NOFP16-NEXT:    ucvtf v3.2d, v3.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn v20.2s, v20.2d
-; CHECK-GI-NOFP16-NEXT:    ucvtf v7.2d, v7.2d
-; CHECK-GI-NOFP16-NEXT:    ucvtf v19.2d, v19.2d
-; CHECK-GI-NOFP16-NEXT:    ucvtf v23.2d, v23.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn v24.2s, v2.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn v6.2s, v6.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn v18.2s, v18.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn v22.2s, v22.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v0.4s, v1.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v4.4s, v5.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v16.4s, v17.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v20.4s, v21.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v24.4s, v3.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v6.4s, v7.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v18.4s, v19.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v22.4s, v23.2d
-; CHECK-GI-NOFP16-NEXT:    fcvtn v0.4h, v0.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtn v1.4h, v4.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtn v2.4h, v16.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtn v3.4h, v20.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v0.8h, v24.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v1.8h, v6.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v2.8h, v18.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v3.8h, v22.4s
-; CHECK-GI-NOFP16-NEXT:    ret
-;
-; CHECK-GI-FP16-LABEL: utofp_v32i64_v32f16:
-; CHECK-GI-FP16:       // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT:    ldp q16, q18, [sp]
-; CHECK-GI-FP16-NEXT:    ucvtf v0.2d, v0.2d
-; CHECK-GI-FP16-NEXT:    ldp q17, q19, [sp, #64]
-; CHECK-GI-FP16-NEXT:    ucvtf v4.2d, v4.2d
-; CHECK-GI-FP16-NEXT:    ucvtf v1.2d, v1.2d
-; CHECK-GI-FP16-NEXT:    ucvtf v5.2d, v5.2d
-; CHECK-GI-FP16-NEXT:    ucvtf v6.2d, v6.2d
-; CHECK-GI-FP16-NEXT:    ucvtf v20.2d, v16.2d
-; CHECK-GI-FP16-NEXT:    ucvtf v24.2d, v18.2d
-; CHECK-GI-FP16-NEXT:    ucvtf v2.2d, v2.2d
-; CHECK-GI-FP16-NEXT:    ucvtf v16.2d, v17.2d
-; CHECK-GI-FP16-NEXT:    mov d21, v0.d[1]
-; CHECK-GI-FP16-NEXT:    ucvtf v25.2d, v19.2d
-; CHECK-GI-FP16-NEXT:    mov d22, v4.d[1]
-; CHECK-GI-FP16-NEXT:    fcvt h0, d0
-; CHECK-GI-FP16-NEXT:    ucvtf v3.2d, v3.2d
-; CHECK-GI-FP16-NEXT:    mov d18, v1.d[1]
-; CHECK-GI-FP16-NEXT:    fcvt h17, d1
-; CHECK-GI-FP16-NEXT:    mov d19, v5.d[1]
-; CHECK-GI-FP16-NEXT:    mov d23, v20.d[1]
-; CHECK-GI-FP16-NEXT:    fcvt h1, d4
-; CHECK-GI-FP16-NEXT:    fcvt h4, d20
-; CHECK-GI-FP16-NEXT:    mov d26, v16.d[1]
-; CHECK-GI-FP16-NEXT:    fcvt h20, d5
-; CHECK-GI-FP16-NEXT:    fcvt h5, d16
-; CHECK-GI-FP16-NEXT:    fcvt h28, d21
-; CHECK-GI-FP16-NEXT:    fcvt h29, d22
-; CHECK-GI-FP16-NEXT:    fcvt h22, d24
-; CHECK-GI-FP16-NEXT:    fcvt h21, d25
-; CHECK-GI-FP16-NEXT:    fcvt h18, d18
-; CHECK-GI-FP16-NEXT:    fcvt h19, d19
-; CHECK-GI-FP16-NEXT:    fcvt h27, d23
-; CHECK-GI-FP16-NEXT:    mov d23, v24.d[1]
-; CHECK-GI-FP16-NEXT:    mov d24, v25.d[1]
-; CHECK-GI-FP16-NEXT:    ldp q25, q16, [sp, #32]
-; CHECK-GI-FP16-NEXT:    fcvt h26, d26
-; CHECK-GI-FP16-NEXT:    mov v0.h[1], v28.h[0]
-; CHECK-GI-FP16-NEXT:    mov v1.h[1], v29.h[0]
-; CHECK-GI-FP16-NEXT:    ucvtf v7.2d, v7.2d
-; CHECK-GI-FP16-NEXT:    mov v4.h[1], v27.h[0]
-; CHECK-GI-FP16-NEXT:    ucvtf v25.2d, v25.2d
-; CHECK-GI-FP16-NEXT:    fcvt h23, d23
-; CHECK-GI-FP16-NEXT:    mov v5.h[1], v26.h[0]
-; CHECK-GI-FP16-NEXT:    ldp q26, q27, [sp, #96]
-; CHECK-GI-FP16-NEXT:    fcvt h24, d24
-; CHECK-GI-FP16-NEXT:    mov v0.h[2], v17.h[0]
-; CHECK-GI-FP16-NEXT:    mov v1.h[2], v20.h[0]
-; CHECK-GI-FP16-NEXT:    mov d20, v6.d[1]
-; CHECK-GI-FP16-NEXT:    mov d17, v2.d[1]
-; CHECK-GI-FP16-NEXT:    fcvt h2, d2
-; CHECK-GI-FP16-NEXT:    ucvtf v26.2d, v26.2d
-; CHECK-GI-FP16-NEXT:    mov v4.h[2], v22.h[0]
-; CHECK-GI-FP16-NEXT:    fcvt h22, d25
-; CHECK-GI-FP16-NEXT:    mov v5.h[2], v21.h[0]
-; CHECK-GI-FP16-NEXT:    mov d21, v25.d[1]
-; CHECK-GI-FP16-NEXT:    fcvt h6, d6
-; CHECK-GI-FP16-NEXT:    mov v0.h[3], v18.h[0]
-; CHECK-GI-FP16-NEXT:    mov v1.h[3], v19.h[0]
-; CHECK-GI-FP16-NEXT:    ucvtf v16.2d, v16.2d
-; CHECK-GI-FP16-NEXT:    ucvtf v18.2d, v27.2d
-; CHECK-GI-FP16-NEXT:    fcvt h19, d20
-; CHECK-GI-FP16-NEXT:    fcvt h17, d17
-; CHECK-GI-FP16-NEXT:    mov d25, v26.d[1]
-; CHECK-GI-FP16-NEXT:    fcvt h26, d26
-; CHECK-GI-FP16-NEXT:    mov v4.h[3], v23.h[0]
-; CHECK-GI-FP16-NEXT:    mov v5.h[3], v24.h[0]
-; CHECK-GI-FP16-NEXT:    fcvt h20, d21
-; CHECK-GI-FP16-NEXT:    mov v0.h[4], v2.h[0]
-; CHECK-GI-FP16-NEXT:    mov v1.h[4], v6.h[0]
-; CHECK-GI-FP16-NEXT:    mov d2, v3.d[1]
-; CHECK-GI-FP16-NEXT:    mov d23, v18.d[1]
-; CHECK-GI-FP16-NEXT:    fcvt h18, d18
-; CHECK-GI-FP16-NEXT:    fcvt h3, d3
-; CHECK-GI-FP16-NEXT:    fcvt h21, d25
-; CHECK-GI-FP16-NEXT:    mov v4.h[4], v22.h[0]
-; CHECK-GI-FP16-NEXT:    mov d22, v16.d[1]
-; CHECK-GI-FP16-NEXT:    mov v5.h[4], v26.h[0]
-; CHECK-GI-FP16-NEXT:    fcvt h16, d16
-; CHECK-GI-FP16-NEXT:    mov d6, v7.d[1]
-; CHECK-GI-FP16-NEXT:    fcvt h7, d7
-; CHECK-GI-FP16-NEXT:    mov v0.h[5], v17.h[0]
-; CHECK-GI-FP16-NEXT:    mov v1.h[5], v19.h[0]
-; CHECK-GI-FP16-NEXT:    fcvt h19, d23
-; CHECK-GI-FP16-NEXT:    fcvt h2, d2
-; CHECK-GI-FP16-NEXT:    mov v4.h[5], v20.h[0]
-; CHECK-GI-FP16-NEXT:    fcvt h17, d22
-; CHECK-GI-FP16-NEXT:    mov v5.h[5], v21.h[0]
-; CHECK-GI-FP16-NEXT:    fcvt h6, d6
-; CHECK-GI-FP16-NEXT:    mov v0.h[6], v3.h[0]
-; CHECK-GI-FP16-NEXT:    mov v1.h[6], v7.h[0]
-; CHECK-GI-FP16-NEXT:    mov v4.h[6], v16.h[0]
-; CHECK-GI-FP16-NEXT:    mov v5.h[6], v18.h[0]
-; CHECK-GI-FP16-NEXT:    mov v0.h[7], v2.h[0]
-; CHECK-GI-FP16-NEXT:    mov v1.h[7], v6.h[0]
-; CHECK-GI-FP16-NEXT:    mov v4.h[7], v17.h[0]
-; CHECK-GI-FP16-NEXT:    mov v5.h[7], v19.h[0]
-; CHECK-GI-FP16-NEXT:    mov v2.16b, v4.16b
-; CHECK-GI-FP16-NEXT:    mov v3.16b, v5.16b
-; CHECK-GI-FP16-NEXT:    ret
+; CHECK-GI-LABEL: utofp_v32i64_v32f16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldp q16, q17, [sp]
+; CHECK-GI-NEXT:    ucvtf v0.2d, v0.2d
+; CHECK-GI-NEXT:    ldp q20, q21, [sp, #64]
+; CHECK-GI-NEXT:    ucvtf v4.2d, v4.2d
+; CHECK-GI-NEXT:    ldp q18, q19, [sp, #32]
+; CHECK-GI-NEXT:    ucvtf v1.2d, v1.2d
+; CHECK-GI-NEXT:    ucvtf v16.2d, v16.2d
+; CHECK-GI-NEXT:    ldp q22, q23, [sp, #96]
+; CHECK-GI-NEXT:    ucvtf v20.2d, v20.2d
+; CHECK-GI-NEXT:    ucvtf v2.2d, v2.2d
+; CHECK-GI-NEXT:    ucvtf v5.2d, v5.2d
+; CHECK-GI-NEXT:    ucvtf v6.2d, v6.2d
+; CHECK-GI-NEXT:    ucvtf v17.2d, v17.2d
+; CHECK-GI-NEXT:    ucvtf v18.2d, v18.2d
+; CHECK-GI-NEXT:    ucvtf v21.2d, v21.2d
+; CHECK-GI-NEXT:    ucvtf v22.2d, v22.2d
+; CHECK-GI-NEXT:    fcvtn v0.2s, v0.2d
+; CHECK-GI-NEXT:    fcvtn v4.2s, v4.2d
+; CHECK-GI-NEXT:    fcvtn v16.2s, v16.2d
+; CHECK-GI-NEXT:    ucvtf v3.2d, v3.2d
+; CHECK-GI-NEXT:    fcvtn v20.2s, v20.2d
+; CHECK-GI-NEXT:    ucvtf v7.2d, v7.2d
+; CHECK-GI-NEXT:    ucvtf v19.2d, v19.2d
+; CHECK-GI-NEXT:    ucvtf v23.2d, v23.2d
+; CHECK-GI-NEXT:    fcvtn v24.2s, v2.2d
+; CHECK-GI-NEXT:    fcvtn v6.2s, v6.2d
+; CHECK-GI-NEXT:    fcvtn v18.2s, v18.2d
+; CHECK-GI-NEXT:    fcvtn v22.2s, v22.2d
+; CHECK-GI-NEXT:    fcvtn2 v0.4s, v1.2d
+; CHECK-GI-NEXT:    fcvtn2 v4.4s, v5.2d
+; CHECK-GI-NEXT:    fcvtn2 v16.4s, v17.2d
+; CHECK-GI-NEXT:    fcvtn2 v20.4s, v21.2d
+; CHECK-GI-NEXT:    fcvtn2 v24.4s, v3.2d
+; CHECK-GI-NEXT:    fcvtn2 v6.4s, v7.2d
+; CHECK-GI-NEXT:    fcvtn2 v18.4s, v19.2d
+; CHECK-GI-NEXT:    fcvtn2 v22.4s, v23.2d
+; CHECK-GI-NEXT:    fcvtn v0.4h, v0.4s
+; CHECK-GI-NEXT:    fcvtn v1.4h, v4.4s
+; CHECK-GI-NEXT:    fcvtn v2.4h, v16.4s
+; CHECK-GI-NEXT:    fcvtn v3.4h, v20.4s
+; CHECK-GI-NEXT:    fcvtn2 v0.8h, v24.4s
+; CHECK-GI-NEXT:    fcvtn2 v1.8h, v6.4s
+; CHECK-GI-NEXT:    fcvtn2 v2.8h, v18.4s
+; CHECK-GI-NEXT:    fcvtn2 v3.8h, v22.4s
+; CHECK-GI-NEXT:    ret
 entry:
   %c = uitofp <32 x i64> %a to <32 x half>
   ret <32 x half> %c



More information about the llvm-commits mailing list