[llvm] 54574d3 - [AArch64][GlobalISel] Expand handling for fptosi and fptoui (#70635)

via llvm-commits llvm-commits at lists.llvm.org
Sat Nov 4 04:47:10 PDT 2023


Author: David Green
Date: 2023-11-04T11:47:05Z
New Revision: 54574d3272b5d0bb35d82fd8f5941703d91ef087

URL: https://github.com/llvm/llvm-project/commit/54574d3272b5d0bb35d82fd8f5941703d91ef087
DIFF: https://github.com/llvm/llvm-project/commit/54574d3272b5d0bb35d82fd8f5941703d91ef087.diff

LOG: [AArch64][GlobalISel] Expand handling for fptosi and fptoui (#70635)

Now that we have more types handled for zext/sext and trunc, it is
possible to get more types working for the vector float to integer
conversions. This patch adds fp16, widening and narrowing vector support
to handle more types. The smaller types wil be expanded to the size of
the larger element type. A couple of case require more awkward truncates
to get working as they go from illegal to illegal types.

Added: 
    

Modified: 
    llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
    llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
    llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
    llvm/test/CodeGen/AArch64/fptoi.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 108768494ccbb28..00d9f3f7c30c95f 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -5124,7 +5124,9 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
   }
   case TargetOpcode::G_TRUNC:
   case TargetOpcode::G_FPTRUNC:
-  case TargetOpcode::G_FPEXT: {
+  case TargetOpcode::G_FPEXT:
+  case TargetOpcode::G_FPTOSI:
+  case TargetOpcode::G_FPTOUI: {
     if (TypeIdx != 0)
       return UnableToLegalize;
     Observer.changingInstr(MI);

diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 598a195d4fb1016..7edfa41d237836a 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -643,10 +643,33 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
   // Conversions
   getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
       .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
+      .legalIf([=](const LegalityQuery &Query) {
+        return HasFP16 &&
+               (Query.Types[1] == s16 || Query.Types[1] == v4s16 ||
+                Query.Types[1] == v8s16) &&
+               (Query.Types[0] == s32 || Query.Types[0] == s64 ||
+                Query.Types[0] == v4s16 || Query.Types[0] == v8s16);
+      })
       .widenScalarToNextPow2(0)
       .clampScalar(0, s32, s64)
       .widenScalarToNextPow2(1)
-      .clampScalar(1, s32, s64);
+      .clampScalarOrElt(1, MinFPScalar, s64)
+      .moreElementsToNextPow2(0)
+      .widenScalarIf(
+          [=](const LegalityQuery &Query) {
+            return Query.Types[0].getScalarSizeInBits() >
+                   Query.Types[1].getScalarSizeInBits();
+          },
+          LegalizeMutations::changeElementSizeTo(1, 0))
+      .widenScalarIf(
+          [=](const LegalityQuery &Query) {
+            return Query.Types[0].getScalarSizeInBits() <
+                   Query.Types[1].getScalarSizeInBits();
+          },
+          LegalizeMutations::changeElementSizeTo(0, 1))
+      .clampNumElements(0, v4s16, v8s16)
+      .clampNumElements(0, v2s32, v4s32)
+      .clampMaxNumElements(0, s64, 2);
 
   getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
       .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
index d5f7507ec5dd767..f7493b128de1e23 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
@@ -503,12 +503,12 @@
 # DEBUG-NEXT: .. the first uncovered type index: 2, OK
 # DEBUG-NEXT: .. the first uncovered imm index: 0, OK
 # DEBUG-NEXT: G_FPTOSI (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
-# DEBUG-NEXT: .. the first uncovered type index: 2, OK
-# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
+# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
 # DEBUG-NEXT: G_FPTOUI (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
 # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
-# DEBUG-NEXT: .. the first uncovered type index: 2, OK
-# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
+# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
 # DEBUG-NEXT: G_SITOFP (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
 # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
 # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected

diff  --git a/llvm/test/CodeGen/AArch64/fptoi.ll b/llvm/test/CodeGen/AArch64/fptoi.ll
index c13d9144d2aea31..f30dad966492c12 100644
--- a/llvm/test/CodeGen/AArch64/fptoi.ll
+++ b/llvm/test/CodeGen/AArch64/fptoi.ll
@@ -4,140 +4,10 @@
 ; RUN: llc -mtriple=aarch64-none-eabi -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16
 ; RUN: llc -mtriple=aarch64-none-eabi -mattr=+fullfp16 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16
 
-; CHECK-GI:       warning: Instruction selection used fallback path for fptos_v3f64_v3i64
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v3f64_v3i64
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v4f64_v4i64
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v4f64_v4i64
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v8f64_v8i64
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v8f64_v8i64
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v16f64_v16i64
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v16f64_v16i64
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v32f64_v32i64
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v32f64_v32i64
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v3f64_v3i32
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v3f64_v3i32
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v4f64_v4i32
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v4f64_v4i32
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v8f64_v8i32
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v8f64_v8i32
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v16f64_v16i32
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v16f64_v16i32
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v32f64_v32i32
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v32f64_v32i32
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v2f64_v2i16
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v2f64_v2i16
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v3f64_v3i16
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v3f64_v3i16
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v4f64_v4i16
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v4f64_v4i16
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v8f64_v8i16
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v8f64_v8i16
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v16f64_v16i16
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v16f64_v16i16
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v32f64_v32i16
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v32f64_v32i16
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v2f64_v2i8
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v2f64_v2i8
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v3f64_v3i8
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v3f64_v3i8
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v4f64_v4i8
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v4f64_v4i8
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v8f64_v8i8
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v8f64_v8i8
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v16f64_v16i8
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v16f64_v16i8
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v32f64_v32i8
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v32f64_v32i8
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v3f32_v3i64
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v3f32_v3i64
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v4f32_v4i64
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v4f32_v4i64
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v8f32_v8i64
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v8f32_v8i64
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v16f32_v16i64
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v16f32_v16i64
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v32f32_v32i64
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v32f32_v32i64
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v3f32_v3i32
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v3f32_v3i32
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v8f32_v8i32
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v8f32_v8i32
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v16f32_v16i32
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v16f32_v16i32
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v32f32_v32i32
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v32f32_v32i32
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v2f32_v2i16
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v2f32_v2i16
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v3f32_v3i16
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v3f32_v3i16
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v4f32_v4i16
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v4f32_v4i16
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v8f32_v8i16
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v8f32_v8i16
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v16f32_v16i16
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v16f32_v16i16
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v32f32_v32i16
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v32f32_v32i16
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v2f32_v2i8
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v2f32_v2i8
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v3f32_v3i8
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v3f32_v3i8
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v4f32_v4i8
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v4f32_v4i8
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v8f32_v8i8
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v8f32_v8i8
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v16f32_v16i8
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v16f32_v16i8
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v32f32_v32i8
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v32f32_v32i8
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v2f16_v2i64
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v2f16_v2i64
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v3f16_v3i64
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v3f16_v3i64
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v4f16_v4i64
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v4f16_v4i64
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v8f16_v8i64
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v8f16_v8i64
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v16f16_v16i64
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v16f16_v16i64
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v32f16_v32i64
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v32f16_v32i64
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v2f16_v2i32
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v2f16_v2i32
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v3f16_v3i32
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v3f16_v3i32
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v4f16_v4i32
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v4f16_v4i32
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v8f16_v8i32
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v8f16_v8i32
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v16f16_v16i32
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v16f16_v16i32
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v32f16_v32i32
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v32f16_v32i32
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v2f16_v2i16
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v2f16_v2i16
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v3f16_v3i16
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v3f16_v3i16
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v4f16_v4i16
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v4f16_v4i16
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v8f16_v8i16
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v8f16_v8i16
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v16f16_v16i16
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v16f16_v16i16
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v32f16_v32i16
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v32f16_v32i16
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v2f16_v2i8
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v2f16_v2i8
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v3f16_v3i8
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v3f16_v3i8
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v4f16_v4i8
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v4f16_v4i8
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v8f16_v8i8
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v8f16_v8i8
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v16f16_v16i8
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v16f16_v16i8
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v32f16_v32i8
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v32f16_v32i8
+; CHECK-GI-FP16:       warning: Instruction selection used fallback path for fptos_v2f16_v2i16
+; CHECK-GI-FP16-NEXT:  warning: Instruction selection used fallback path for fptou_v2f16_v2i16
+; CHECK-GI-FP16-NEXT:  warning: Instruction selection used fallback path for fptos_v2f16_v2i8
+; CHECK-GI-FP16-NEXT:  warning: Instruction selection used fallback path for fptou_v2f16_v2i8
 
 define i64 @fptos_f64_i64(double %a) {
 ; CHECK-LABEL: fptos_f64_i64:
@@ -331,11 +201,16 @@ define i64 @fptos_f16_i64(half %a) {
 ; CHECK-SD-FP16-NEXT:    fcvtzs x0, h0
 ; CHECK-SD-FP16-NEXT:    ret
 ;
-; CHECK-GI-LABEL: fptos_f16_i64:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    fcvt s0, h0
-; CHECK-GI-NEXT:    fcvtzs x0, s0
-; CHECK-GI-NEXT:    ret
+; CHECK-GI-NOFP16-LABEL: fptos_f16_i64:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT:    fcvtzs x0, s0
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: fptos_f16_i64:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    fcvtzs x0, h0
+; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = fptosi half %a to i64
   ret i64 %c
@@ -353,11 +228,16 @@ define i64 @fptou_f16_i64(half %a) {
 ; CHECK-SD-FP16-NEXT:    fcvtzu x0, h0
 ; CHECK-SD-FP16-NEXT:    ret
 ;
-; CHECK-GI-LABEL: fptou_f16_i64:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    fcvt s0, h0
-; CHECK-GI-NEXT:    fcvtzu x0, s0
-; CHECK-GI-NEXT:    ret
+; CHECK-GI-NOFP16-LABEL: fptou_f16_i64:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT:    fcvtzu x0, s0
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: fptou_f16_i64:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    fcvtzu x0, h0
+; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = fptoui half %a to i64
   ret i64 %c
@@ -375,11 +255,16 @@ define i32 @fptos_f16_i32(half %a) {
 ; CHECK-SD-FP16-NEXT:    fcvtzs w0, h0
 ; CHECK-SD-FP16-NEXT:    ret
 ;
-; CHECK-GI-LABEL: fptos_f16_i32:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    fcvt s0, h0
-; CHECK-GI-NEXT:    fcvtzs w0, s0
-; CHECK-GI-NEXT:    ret
+; CHECK-GI-NOFP16-LABEL: fptos_f16_i32:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT:    fcvtzs w0, s0
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: fptos_f16_i32:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    fcvtzs w0, h0
+; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = fptosi half %a to i32
   ret i32 %c
@@ -397,11 +282,16 @@ define i32 @fptou_f16_i32(half %a) {
 ; CHECK-SD-FP16-NEXT:    fcvtzu w0, h0
 ; CHECK-SD-FP16-NEXT:    ret
 ;
-; CHECK-GI-LABEL: fptou_f16_i32:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    fcvt s0, h0
-; CHECK-GI-NEXT:    fcvtzu w0, s0
-; CHECK-GI-NEXT:    ret
+; CHECK-GI-NOFP16-LABEL: fptou_f16_i32:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT:    fcvtzu w0, s0
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: fptou_f16_i32:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    fcvtzu w0, h0
+; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = fptoui half %a to i32
   ret i32 %c
@@ -419,11 +309,16 @@ define i16 @fptos_f16_i16(half %a) {
 ; CHECK-SD-FP16-NEXT:    fcvtzs w0, h0
 ; CHECK-SD-FP16-NEXT:    ret
 ;
-; CHECK-GI-LABEL: fptos_f16_i16:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    fcvt s0, h0
-; CHECK-GI-NEXT:    fcvtzs w0, s0
-; CHECK-GI-NEXT:    ret
+; CHECK-GI-NOFP16-LABEL: fptos_f16_i16:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT:    fcvtzs w0, s0
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: fptos_f16_i16:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    fcvtzs w0, h0
+; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = fptosi half %a to i16
   ret i16 %c
@@ -441,11 +336,16 @@ define i16 @fptou_f16_i16(half %a) {
 ; CHECK-SD-FP16-NEXT:    fcvtzs w0, h0
 ; CHECK-SD-FP16-NEXT:    ret
 ;
-; CHECK-GI-LABEL: fptou_f16_i16:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    fcvt s0, h0
-; CHECK-GI-NEXT:    fcvtzu w0, s0
-; CHECK-GI-NEXT:    ret
+; CHECK-GI-NOFP16-LABEL: fptou_f16_i16:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT:    fcvtzu w0, s0
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: fptou_f16_i16:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    fcvtzu w0, h0
+; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = fptoui half %a to i16
   ret i16 %c
@@ -463,11 +363,16 @@ define i8 @fptos_f16_i8(half %a) {
 ; CHECK-SD-FP16-NEXT:    fcvtzs w0, h0
 ; CHECK-SD-FP16-NEXT:    ret
 ;
-; CHECK-GI-LABEL: fptos_f16_i8:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    fcvt s0, h0
-; CHECK-GI-NEXT:    fcvtzs w0, s0
-; CHECK-GI-NEXT:    ret
+; CHECK-GI-NOFP16-LABEL: fptos_f16_i8:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT:    fcvtzs w0, s0
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: fptos_f16_i8:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    fcvtzs w0, h0
+; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = fptosi half %a to i8
   ret i8 %c
@@ -485,11 +390,16 @@ define i8 @fptou_f16_i8(half %a) {
 ; CHECK-SD-FP16-NEXT:    fcvtzs w0, h0
 ; CHECK-SD-FP16-NEXT:    ret
 ;
-; CHECK-GI-LABEL: fptou_f16_i8:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    fcvt s0, h0
-; CHECK-GI-NEXT:    fcvtzu w0, s0
-; CHECK-GI-NEXT:    ret
+; CHECK-GI-NOFP16-LABEL: fptou_f16_i8:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT:    fcvtzu w0, s0
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: fptou_f16_i8:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    fcvtzu w0, h0
+; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = fptoui half %a to i8
   ret i8 %c
@@ -516,38 +426,64 @@ entry:
 }
 
 define <3 x i64> @fptos_v3f64_v3i64(<3 x double> %a) {
-; CHECK-LABEL: fptos_v3f64_v3i64:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT:    mov v0.d[1], v1.d[0]
-; CHECK-NEXT:    fcvtzs v2.2d, v2.2d
-; CHECK-NEXT:    // kill: def $d2 killed $d2 killed $q2
-; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
-; CHECK-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $q1
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptos_v3f64_v3i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT:    fcvtzs v2.2d, v2.2d
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
+; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptos_v3f64_v3i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    fcvtzs v2.2d, v2.2d
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
+; CHECK-GI-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptosi <3 x double> %a to <3 x i64>
   ret <3 x i64> %c
 }
 
 define <3 x i64> @fptou_v3f64_v3i64(<3 x double> %a) {
-; CHECK-LABEL: fptou_v3f64_v3i64:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT:    mov v0.d[1], v1.d[0]
-; CHECK-NEXT:    fcvtzu v2.2d, v2.2d
-; CHECK-NEXT:    // kill: def $d2 killed $d2 killed $q2
-; CHECK-NEXT:    fcvtzu v0.2d, v0.2d
-; CHECK-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $q1
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptou_v3f64_v3i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT:    fcvtzu v2.2d, v2.2d
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
+; CHECK-SD-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptou_v3f64_v3i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    fcvtzu v2.2d, v2.2d
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
+; CHECK-GI-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptoui <3 x double> %a to <3 x i64>
   ret <3 x i64> %c
@@ -636,76 +572,142 @@ entry:
 }
 
 define <32 x i64> @fptos_v32f64_v32i64(<32 x double> %a) {
-; CHECK-LABEL: fptos_v32f64_v32i64:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ldp q17, q16, [sp, #96]
-; CHECK-NEXT:    fcvtzs v7.2d, v7.2d
-; CHECK-NEXT:    ldp q19, q18, [sp, #64]
-; CHECK-NEXT:    fcvtzs v6.2d, v6.2d
-; CHECK-NEXT:    ldp q21, q20, [sp, #32]
-; CHECK-NEXT:    fcvtzs v5.2d, v5.2d
-; CHECK-NEXT:    fcvtzs v16.2d, v16.2d
-; CHECK-NEXT:    fcvtzs v17.2d, v17.2d
-; CHECK-NEXT:    fcvtzs v4.2d, v4.2d
-; CHECK-NEXT:    fcvtzs v18.2d, v18.2d
-; CHECK-NEXT:    fcvtzs v19.2d, v19.2d
-; CHECK-NEXT:    fcvtzs v3.2d, v3.2d
-; CHECK-NEXT:    fcvtzs v20.2d, v20.2d
-; CHECK-NEXT:    fcvtzs v21.2d, v21.2d
-; CHECK-NEXT:    fcvtzs v2.2d, v2.2d
-; CHECK-NEXT:    fcvtzs v1.2d, v1.2d
-; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
-; CHECK-NEXT:    stp q5, q6, [x8, #80]
-; CHECK-NEXT:    str q16, [x8, #240]
-; CHECK-NEXT:    ldp q22, q16, [sp]
-; CHECK-NEXT:    stp q3, q4, [x8, #48]
-; CHECK-NEXT:    stp q20, q19, [x8, #176]
-; CHECK-NEXT:    fcvtzs v16.2d, v16.2d
-; CHECK-NEXT:    stp q1, q2, [x8, #16]
-; CHECK-NEXT:    stp q18, q17, [x8, #208]
-; CHECK-NEXT:    fcvtzs v17.2d, v22.2d
-; CHECK-NEXT:    str q0, [x8]
-; CHECK-NEXT:    stp q16, q21, [x8, #144]
-; CHECK-NEXT:    stp q7, q17, [x8, #112]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptos_v32f64_v32i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ldp q17, q16, [sp, #96]
+; CHECK-SD-NEXT:    fcvtzs v7.2d, v7.2d
+; CHECK-SD-NEXT:    ldp q19, q18, [sp, #64]
+; CHECK-SD-NEXT:    fcvtzs v6.2d, v6.2d
+; CHECK-SD-NEXT:    ldp q21, q20, [sp, #32]
+; CHECK-SD-NEXT:    fcvtzs v5.2d, v5.2d
+; CHECK-SD-NEXT:    fcvtzs v16.2d, v16.2d
+; CHECK-SD-NEXT:    fcvtzs v17.2d, v17.2d
+; CHECK-SD-NEXT:    fcvtzs v4.2d, v4.2d
+; CHECK-SD-NEXT:    fcvtzs v18.2d, v18.2d
+; CHECK-SD-NEXT:    fcvtzs v19.2d, v19.2d
+; CHECK-SD-NEXT:    fcvtzs v3.2d, v3.2d
+; CHECK-SD-NEXT:    fcvtzs v20.2d, v20.2d
+; CHECK-SD-NEXT:    fcvtzs v21.2d, v21.2d
+; CHECK-SD-NEXT:    fcvtzs v2.2d, v2.2d
+; CHECK-SD-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-SD-NEXT:    stp q5, q6, [x8, #80]
+; CHECK-SD-NEXT:    str q16, [x8, #240]
+; CHECK-SD-NEXT:    ldp q22, q16, [sp]
+; CHECK-SD-NEXT:    stp q3, q4, [x8, #48]
+; CHECK-SD-NEXT:    stp q20, q19, [x8, #176]
+; CHECK-SD-NEXT:    fcvtzs v16.2d, v16.2d
+; CHECK-SD-NEXT:    stp q1, q2, [x8, #16]
+; CHECK-SD-NEXT:    stp q18, q17, [x8, #208]
+; CHECK-SD-NEXT:    fcvtzs v17.2d, v22.2d
+; CHECK-SD-NEXT:    str q0, [x8]
+; CHECK-SD-NEXT:    stp q16, q21, [x8, #144]
+; CHECK-SD-NEXT:    stp q7, q17, [x8, #112]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptos_v32f64_v32i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-GI-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-GI-NEXT:    fcvtzs v2.2d, v2.2d
+; CHECK-GI-NEXT:    fcvtzs v3.2d, v3.2d
+; CHECK-GI-NEXT:    fcvtzs v4.2d, v4.2d
+; CHECK-GI-NEXT:    stp q0, q1, [x8]
+; CHECK-GI-NEXT:    fcvtzs v0.2d, v5.2d
+; CHECK-GI-NEXT:    fcvtzs v1.2d, v6.2d
+; CHECK-GI-NEXT:    str q2, [x8, #32]
+; CHECK-GI-NEXT:    ldp q2, q5, [sp]
+; CHECK-GI-NEXT:    fcvtzs v6.2d, v7.2d
+; CHECK-GI-NEXT:    stp q3, q4, [x8, #48]
+; CHECK-GI-NEXT:    ldp q3, q4, [sp, #32]
+; CHECK-GI-NEXT:    fcvtzs v2.2d, v2.2d
+; CHECK-GI-NEXT:    fcvtzs v5.2d, v5.2d
+; CHECK-GI-NEXT:    stp q0, q1, [x8, #80]
+; CHECK-GI-NEXT:    fcvtzs v0.2d, v3.2d
+; CHECK-GI-NEXT:    ldp q1, q3, [sp, #64]
+; CHECK-GI-NEXT:    fcvtzs v4.2d, v4.2d
+; CHECK-GI-NEXT:    stp q6, q2, [x8, #112]
+; CHECK-GI-NEXT:    ldp q2, q6, [sp, #96]
+; CHECK-GI-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-GI-NEXT:    fcvtzs v3.2d, v3.2d
+; CHECK-GI-NEXT:    stp q5, q0, [x8, #144]
+; CHECK-GI-NEXT:    fcvtzs v2.2d, v2.2d
+; CHECK-GI-NEXT:    fcvtzs v0.2d, v6.2d
+; CHECK-GI-NEXT:    stp q4, q1, [x8, #176]
+; CHECK-GI-NEXT:    stp q3, q2, [x8, #208]
+; CHECK-GI-NEXT:    str q0, [x8, #240]
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptosi <32 x double> %a to <32 x i64>
   ret <32 x i64> %c
 }
 
 define <32 x i64> @fptou_v32f64_v32i64(<32 x double> %a) {
-; CHECK-LABEL: fptou_v32f64_v32i64:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ldp q17, q16, [sp, #96]
-; CHECK-NEXT:    fcvtzu v7.2d, v7.2d
-; CHECK-NEXT:    ldp q19, q18, [sp, #64]
-; CHECK-NEXT:    fcvtzu v6.2d, v6.2d
-; CHECK-NEXT:    ldp q21, q20, [sp, #32]
-; CHECK-NEXT:    fcvtzu v5.2d, v5.2d
-; CHECK-NEXT:    fcvtzu v16.2d, v16.2d
-; CHECK-NEXT:    fcvtzu v17.2d, v17.2d
-; CHECK-NEXT:    fcvtzu v4.2d, v4.2d
-; CHECK-NEXT:    fcvtzu v18.2d, v18.2d
-; CHECK-NEXT:    fcvtzu v19.2d, v19.2d
-; CHECK-NEXT:    fcvtzu v3.2d, v3.2d
-; CHECK-NEXT:    fcvtzu v20.2d, v20.2d
-; CHECK-NEXT:    fcvtzu v21.2d, v21.2d
-; CHECK-NEXT:    fcvtzu v2.2d, v2.2d
-; CHECK-NEXT:    fcvtzu v1.2d, v1.2d
-; CHECK-NEXT:    fcvtzu v0.2d, v0.2d
-; CHECK-NEXT:    stp q5, q6, [x8, #80]
-; CHECK-NEXT:    str q16, [x8, #240]
-; CHECK-NEXT:    ldp q22, q16, [sp]
-; CHECK-NEXT:    stp q3, q4, [x8, #48]
-; CHECK-NEXT:    stp q20, q19, [x8, #176]
-; CHECK-NEXT:    fcvtzu v16.2d, v16.2d
-; CHECK-NEXT:    stp q1, q2, [x8, #16]
-; CHECK-NEXT:    stp q18, q17, [x8, #208]
-; CHECK-NEXT:    fcvtzu v17.2d, v22.2d
-; CHECK-NEXT:    str q0, [x8]
-; CHECK-NEXT:    stp q16, q21, [x8, #144]
-; CHECK-NEXT:    stp q7, q17, [x8, #112]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptou_v32f64_v32i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ldp q17, q16, [sp, #96]
+; CHECK-SD-NEXT:    fcvtzu v7.2d, v7.2d
+; CHECK-SD-NEXT:    ldp q19, q18, [sp, #64]
+; CHECK-SD-NEXT:    fcvtzu v6.2d, v6.2d
+; CHECK-SD-NEXT:    ldp q21, q20, [sp, #32]
+; CHECK-SD-NEXT:    fcvtzu v5.2d, v5.2d
+; CHECK-SD-NEXT:    fcvtzu v16.2d, v16.2d
+; CHECK-SD-NEXT:    fcvtzu v17.2d, v17.2d
+; CHECK-SD-NEXT:    fcvtzu v4.2d, v4.2d
+; CHECK-SD-NEXT:    fcvtzu v18.2d, v18.2d
+; CHECK-SD-NEXT:    fcvtzu v19.2d, v19.2d
+; CHECK-SD-NEXT:    fcvtzu v3.2d, v3.2d
+; CHECK-SD-NEXT:    fcvtzu v20.2d, v20.2d
+; CHECK-SD-NEXT:    fcvtzu v21.2d, v21.2d
+; CHECK-SD-NEXT:    fcvtzu v2.2d, v2.2d
+; CHECK-SD-NEXT:    fcvtzu v1.2d, v1.2d
+; CHECK-SD-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-SD-NEXT:    stp q5, q6, [x8, #80]
+; CHECK-SD-NEXT:    str q16, [x8, #240]
+; CHECK-SD-NEXT:    ldp q22, q16, [sp]
+; CHECK-SD-NEXT:    stp q3, q4, [x8, #48]
+; CHECK-SD-NEXT:    stp q20, q19, [x8, #176]
+; CHECK-SD-NEXT:    fcvtzu v16.2d, v16.2d
+; CHECK-SD-NEXT:    stp q1, q2, [x8, #16]
+; CHECK-SD-NEXT:    stp q18, q17, [x8, #208]
+; CHECK-SD-NEXT:    fcvtzu v17.2d, v22.2d
+; CHECK-SD-NEXT:    str q0, [x8]
+; CHECK-SD-NEXT:    stp q16, q21, [x8, #144]
+; CHECK-SD-NEXT:    stp q7, q17, [x8, #112]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptou_v32f64_v32i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-GI-NEXT:    fcvtzu v1.2d, v1.2d
+; CHECK-GI-NEXT:    fcvtzu v2.2d, v2.2d
+; CHECK-GI-NEXT:    fcvtzu v3.2d, v3.2d
+; CHECK-GI-NEXT:    fcvtzu v4.2d, v4.2d
+; CHECK-GI-NEXT:    stp q0, q1, [x8]
+; CHECK-GI-NEXT:    fcvtzu v0.2d, v5.2d
+; CHECK-GI-NEXT:    fcvtzu v1.2d, v6.2d
+; CHECK-GI-NEXT:    str q2, [x8, #32]
+; CHECK-GI-NEXT:    ldp q2, q5, [sp]
+; CHECK-GI-NEXT:    fcvtzu v6.2d, v7.2d
+; CHECK-GI-NEXT:    stp q3, q4, [x8, #48]
+; CHECK-GI-NEXT:    ldp q3, q4, [sp, #32]
+; CHECK-GI-NEXT:    fcvtzu v2.2d, v2.2d
+; CHECK-GI-NEXT:    fcvtzu v5.2d, v5.2d
+; CHECK-GI-NEXT:    stp q0, q1, [x8, #80]
+; CHECK-GI-NEXT:    fcvtzu v0.2d, v3.2d
+; CHECK-GI-NEXT:    ldp q1, q3, [sp, #64]
+; CHECK-GI-NEXT:    fcvtzu v4.2d, v4.2d
+; CHECK-GI-NEXT:    stp q6, q2, [x8, #112]
+; CHECK-GI-NEXT:    ldp q2, q6, [sp, #96]
+; CHECK-GI-NEXT:    fcvtzu v1.2d, v1.2d
+; CHECK-GI-NEXT:    fcvtzu v3.2d, v3.2d
+; CHECK-GI-NEXT:    stp q5, q0, [x8, #144]
+; CHECK-GI-NEXT:    fcvtzu v2.2d, v2.2d
+; CHECK-GI-NEXT:    fcvtzu v0.2d, v6.2d
+; CHECK-GI-NEXT:    stp q4, q1, [x8, #176]
+; CHECK-GI-NEXT:    stp q3, q2, [x8, #208]
+; CHECK-GI-NEXT:    str q0, [x8, #240]
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptoui <32 x double> %a to <32 x i64>
   ret <32 x i64> %c
@@ -766,170 +768,300 @@ entry:
 }
 
 define <4 x i32> @fptos_v4f64_v4i32(<4 x double> %a) {
-; CHECK-LABEL: fptos_v4f64_v4i32:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzs v1.2d, v1.2d
-; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
-; CHECK-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptos_v4f64_v4i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-SD-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptos_v4f64_v4i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-GI-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-GI-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptosi <4 x double> %a to <4 x i32>
   ret <4 x i32> %c
 }
 
 define <4 x i32> @fptou_v4f64_v4i32(<4 x double> %a) {
-; CHECK-LABEL: fptou_v4f64_v4i32:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzu v1.2d, v1.2d
-; CHECK-NEXT:    fcvtzu v0.2d, v0.2d
-; CHECK-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptou_v4f64_v4i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtzu v1.2d, v1.2d
+; CHECK-SD-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-SD-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptou_v4f64_v4i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-GI-NEXT:    fcvtzu v1.2d, v1.2d
+; CHECK-GI-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptoui <4 x double> %a to <4 x i32>
   ret <4 x i32> %c
 }
 
 define <8 x i32> @fptos_v8f64_v8i32(<8 x double> %a) {
-; CHECK-LABEL: fptos_v8f64_v8i32:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzs v1.2d, v1.2d
-; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
-; CHECK-NEXT:    fcvtzs v3.2d, v3.2d
-; CHECK-NEXT:    fcvtzs v2.2d, v2.2d
-; CHECK-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    uzp1 v1.4s, v2.4s, v3.4s
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptos_v8f64_v8i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-SD-NEXT:    fcvtzs v3.2d, v3.2d
+; CHECK-SD-NEXT:    fcvtzs v2.2d, v2.2d
+; CHECK-SD-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT:    uzp1 v1.4s, v2.4s, v3.4s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptos_v8f64_v8i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-GI-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-GI-NEXT:    fcvtzs v2.2d, v2.2d
+; CHECK-GI-NEXT:    fcvtzs v3.2d, v3.2d
+; CHECK-GI-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    uzp1 v1.4s, v2.4s, v3.4s
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptosi <8 x double> %a to <8 x i32>
   ret <8 x i32> %c
 }
 
 define <8 x i32> @fptou_v8f64_v8i32(<8 x double> %a) {
-; CHECK-LABEL: fptou_v8f64_v8i32:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzu v1.2d, v1.2d
-; CHECK-NEXT:    fcvtzu v0.2d, v0.2d
-; CHECK-NEXT:    fcvtzu v3.2d, v3.2d
-; CHECK-NEXT:    fcvtzu v2.2d, v2.2d
-; CHECK-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    uzp1 v1.4s, v2.4s, v3.4s
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptou_v8f64_v8i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtzu v1.2d, v1.2d
+; CHECK-SD-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-SD-NEXT:    fcvtzu v3.2d, v3.2d
+; CHECK-SD-NEXT:    fcvtzu v2.2d, v2.2d
+; CHECK-SD-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT:    uzp1 v1.4s, v2.4s, v3.4s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptou_v8f64_v8i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-GI-NEXT:    fcvtzu v1.2d, v1.2d
+; CHECK-GI-NEXT:    fcvtzu v2.2d, v2.2d
+; CHECK-GI-NEXT:    fcvtzu v3.2d, v3.2d
+; CHECK-GI-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    uzp1 v1.4s, v2.4s, v3.4s
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptoui <8 x double> %a to <8 x i32>
   ret <8 x i32> %c
 }
 
 define <16 x i32> @fptos_v16f64_v16i32(<16 x double> %a) {
-; CHECK-LABEL: fptos_v16f64_v16i32:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzs v3.2d, v3.2d
-; CHECK-NEXT:    fcvtzs v1.2d, v1.2d
-; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
-; CHECK-NEXT:    fcvtzs v2.2d, v2.2d
-; CHECK-NEXT:    fcvtzs v5.2d, v5.2d
-; CHECK-NEXT:    fcvtzs v4.2d, v4.2d
-; CHECK-NEXT:    fcvtzs v7.2d, v7.2d
-; CHECK-NEXT:    fcvtzs v6.2d, v6.2d
-; CHECK-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    uzp1 v1.4s, v2.4s, v3.4s
-; CHECK-NEXT:    uzp1 v2.4s, v4.4s, v5.4s
-; CHECK-NEXT:    uzp1 v3.4s, v6.4s, v7.4s
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptos_v16f64_v16i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtzs v3.2d, v3.2d
+; CHECK-SD-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-SD-NEXT:    fcvtzs v2.2d, v2.2d
+; CHECK-SD-NEXT:    fcvtzs v5.2d, v5.2d
+; CHECK-SD-NEXT:    fcvtzs v4.2d, v4.2d
+; CHECK-SD-NEXT:    fcvtzs v7.2d, v7.2d
+; CHECK-SD-NEXT:    fcvtzs v6.2d, v6.2d
+; CHECK-SD-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT:    uzp1 v1.4s, v2.4s, v3.4s
+; CHECK-SD-NEXT:    uzp1 v2.4s, v4.4s, v5.4s
+; CHECK-SD-NEXT:    uzp1 v3.4s, v6.4s, v7.4s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptos_v16f64_v16i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-GI-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-GI-NEXT:    fcvtzs v2.2d, v2.2d
+; CHECK-GI-NEXT:    fcvtzs v3.2d, v3.2d
+; CHECK-GI-NEXT:    fcvtzs v4.2d, v4.2d
+; CHECK-GI-NEXT:    fcvtzs v5.2d, v5.2d
+; CHECK-GI-NEXT:    fcvtzs v6.2d, v6.2d
+; CHECK-GI-NEXT:    fcvtzs v7.2d, v7.2d
+; CHECK-GI-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    uzp1 v1.4s, v2.4s, v3.4s
+; CHECK-GI-NEXT:    uzp1 v2.4s, v4.4s, v5.4s
+; CHECK-GI-NEXT:    uzp1 v3.4s, v6.4s, v7.4s
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptosi <16 x double> %a to <16 x i32>
   ret <16 x i32> %c
 }
 
 define <16 x i32> @fptou_v16f64_v16i32(<16 x double> %a) {
-; CHECK-LABEL: fptou_v16f64_v16i32:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzu v3.2d, v3.2d
-; CHECK-NEXT:    fcvtzu v1.2d, v1.2d
-; CHECK-NEXT:    fcvtzu v0.2d, v0.2d
-; CHECK-NEXT:    fcvtzu v2.2d, v2.2d
-; CHECK-NEXT:    fcvtzu v5.2d, v5.2d
-; CHECK-NEXT:    fcvtzu v4.2d, v4.2d
-; CHECK-NEXT:    fcvtzu v7.2d, v7.2d
-; CHECK-NEXT:    fcvtzu v6.2d, v6.2d
-; CHECK-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    uzp1 v1.4s, v2.4s, v3.4s
-; CHECK-NEXT:    uzp1 v2.4s, v4.4s, v5.4s
-; CHECK-NEXT:    uzp1 v3.4s, v6.4s, v7.4s
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptou_v16f64_v16i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtzu v3.2d, v3.2d
+; CHECK-SD-NEXT:    fcvtzu v1.2d, v1.2d
+; CHECK-SD-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-SD-NEXT:    fcvtzu v2.2d, v2.2d
+; CHECK-SD-NEXT:    fcvtzu v5.2d, v5.2d
+; CHECK-SD-NEXT:    fcvtzu v4.2d, v4.2d
+; CHECK-SD-NEXT:    fcvtzu v7.2d, v7.2d
+; CHECK-SD-NEXT:    fcvtzu v6.2d, v6.2d
+; CHECK-SD-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT:    uzp1 v1.4s, v2.4s, v3.4s
+; CHECK-SD-NEXT:    uzp1 v2.4s, v4.4s, v5.4s
+; CHECK-SD-NEXT:    uzp1 v3.4s, v6.4s, v7.4s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptou_v16f64_v16i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-GI-NEXT:    fcvtzu v1.2d, v1.2d
+; CHECK-GI-NEXT:    fcvtzu v2.2d, v2.2d
+; CHECK-GI-NEXT:    fcvtzu v3.2d, v3.2d
+; CHECK-GI-NEXT:    fcvtzu v4.2d, v4.2d
+; CHECK-GI-NEXT:    fcvtzu v5.2d, v5.2d
+; CHECK-GI-NEXT:    fcvtzu v6.2d, v6.2d
+; CHECK-GI-NEXT:    fcvtzu v7.2d, v7.2d
+; CHECK-GI-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    uzp1 v1.4s, v2.4s, v3.4s
+; CHECK-GI-NEXT:    uzp1 v2.4s, v4.4s, v5.4s
+; CHECK-GI-NEXT:    uzp1 v3.4s, v6.4s, v7.4s
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptoui <16 x double> %a to <16 x i32>
   ret <16 x i32> %c
 }
 
 define <32 x i32> @fptos_v32f64_v32i32(<32 x double> %a) {
-; CHECK-LABEL: fptos_v32f64_v32i32:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ldp q16, q17, [sp, #96]
-; CHECK-NEXT:    fcvtzs v1.2d, v1.2d
-; CHECK-NEXT:    ldp q18, q19, [sp, #64]
-; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
-; CHECK-NEXT:    ldp q20, q21, [sp, #32]
-; CHECK-NEXT:    fcvtzs v3.2d, v3.2d
-; CHECK-NEXT:    ldp q22, q23, [sp]
-; CHECK-NEXT:    fcvtzs v2.2d, v2.2d
-; CHECK-NEXT:    fcvtzs v5.2d, v5.2d
-; CHECK-NEXT:    fcvtzs v4.2d, v4.2d
-; CHECK-NEXT:    fcvtzs v7.2d, v7.2d
-; CHECK-NEXT:    fcvtzs v6.2d, v6.2d
-; CHECK-NEXT:    fcvtzs v21.2d, v21.2d
-; CHECK-NEXT:    fcvtzs v20.2d, v20.2d
-; CHECK-NEXT:    fcvtzs v23.2d, v23.2d
-; CHECK-NEXT:    fcvtzs v22.2d, v22.2d
-; CHECK-NEXT:    fcvtzs v19.2d, v19.2d
-; CHECK-NEXT:    fcvtzs v18.2d, v18.2d
-; CHECK-NEXT:    fcvtzs v17.2d, v17.2d
-; CHECK-NEXT:    fcvtzs v16.2d, v16.2d
-; CHECK-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    uzp1 v1.4s, v2.4s, v3.4s
-; CHECK-NEXT:    uzp1 v2.4s, v4.4s, v5.4s
-; CHECK-NEXT:    uzp1 v3.4s, v6.4s, v7.4s
-; CHECK-NEXT:    uzp1 v5.4s, v20.4s, v21.4s
-; CHECK-NEXT:    uzp1 v4.4s, v22.4s, v23.4s
-; CHECK-NEXT:    uzp1 v6.4s, v18.4s, v19.4s
-; CHECK-NEXT:    uzp1 v7.4s, v16.4s, v17.4s
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptos_v32f64_v32i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ldp q16, q17, [sp, #96]
+; CHECK-SD-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-SD-NEXT:    ldp q18, q19, [sp, #64]
+; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-SD-NEXT:    ldp q20, q21, [sp, #32]
+; CHECK-SD-NEXT:    fcvtzs v3.2d, v3.2d
+; CHECK-SD-NEXT:    ldp q22, q23, [sp]
+; CHECK-SD-NEXT:    fcvtzs v2.2d, v2.2d
+; CHECK-SD-NEXT:    fcvtzs v5.2d, v5.2d
+; CHECK-SD-NEXT:    fcvtzs v4.2d, v4.2d
+; CHECK-SD-NEXT:    fcvtzs v7.2d, v7.2d
+; CHECK-SD-NEXT:    fcvtzs v6.2d, v6.2d
+; CHECK-SD-NEXT:    fcvtzs v21.2d, v21.2d
+; CHECK-SD-NEXT:    fcvtzs v20.2d, v20.2d
+; CHECK-SD-NEXT:    fcvtzs v23.2d, v23.2d
+; CHECK-SD-NEXT:    fcvtzs v22.2d, v22.2d
+; CHECK-SD-NEXT:    fcvtzs v19.2d, v19.2d
+; CHECK-SD-NEXT:    fcvtzs v18.2d, v18.2d
+; CHECK-SD-NEXT:    fcvtzs v17.2d, v17.2d
+; CHECK-SD-NEXT:    fcvtzs v16.2d, v16.2d
+; CHECK-SD-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT:    uzp1 v1.4s, v2.4s, v3.4s
+; CHECK-SD-NEXT:    uzp1 v2.4s, v4.4s, v5.4s
+; CHECK-SD-NEXT:    uzp1 v3.4s, v6.4s, v7.4s
+; CHECK-SD-NEXT:    uzp1 v5.4s, v20.4s, v21.4s
+; CHECK-SD-NEXT:    uzp1 v4.4s, v22.4s, v23.4s
+; CHECK-SD-NEXT:    uzp1 v6.4s, v18.4s, v19.4s
+; CHECK-SD-NEXT:    uzp1 v7.4s, v16.4s, v17.4s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptos_v32f64_v32i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldp q16, q17, [sp]
+; CHECK-GI-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-GI-NEXT:    ldp q18, q19, [sp, #32]
+; CHECK-GI-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-GI-NEXT:    ldp q20, q21, [sp, #64]
+; CHECK-GI-NEXT:    fcvtzs v2.2d, v2.2d
+; CHECK-GI-NEXT:    ldp q22, q23, [sp, #96]
+; CHECK-GI-NEXT:    fcvtzs v3.2d, v3.2d
+; CHECK-GI-NEXT:    fcvtzs v4.2d, v4.2d
+; CHECK-GI-NEXT:    fcvtzs v5.2d, v5.2d
+; CHECK-GI-NEXT:    fcvtzs v6.2d, v6.2d
+; CHECK-GI-NEXT:    fcvtzs v7.2d, v7.2d
+; CHECK-GI-NEXT:    fcvtzs v16.2d, v16.2d
+; CHECK-GI-NEXT:    fcvtzs v17.2d, v17.2d
+; CHECK-GI-NEXT:    fcvtzs v18.2d, v18.2d
+; CHECK-GI-NEXT:    fcvtzs v19.2d, v19.2d
+; CHECK-GI-NEXT:    fcvtzs v20.2d, v20.2d
+; CHECK-GI-NEXT:    fcvtzs v21.2d, v21.2d
+; CHECK-GI-NEXT:    fcvtzs v22.2d, v22.2d
+; CHECK-GI-NEXT:    fcvtzs v23.2d, v23.2d
+; CHECK-GI-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    uzp1 v1.4s, v2.4s, v3.4s
+; CHECK-GI-NEXT:    uzp1 v2.4s, v4.4s, v5.4s
+; CHECK-GI-NEXT:    uzp1 v3.4s, v6.4s, v7.4s
+; CHECK-GI-NEXT:    uzp1 v4.4s, v16.4s, v17.4s
+; CHECK-GI-NEXT:    uzp1 v5.4s, v18.4s, v19.4s
+; CHECK-GI-NEXT:    uzp1 v6.4s, v20.4s, v21.4s
+; CHECK-GI-NEXT:    uzp1 v7.4s, v22.4s, v23.4s
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptosi <32 x double> %a to <32 x i32>
   ret <32 x i32> %c
 }
 
 define <32 x i32> @fptou_v32f64_v32i32(<32 x double> %a) {
-; CHECK-LABEL: fptou_v32f64_v32i32:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ldp q16, q17, [sp, #96]
-; CHECK-NEXT:    fcvtzu v1.2d, v1.2d
-; CHECK-NEXT:    ldp q18, q19, [sp, #64]
-; CHECK-NEXT:    fcvtzu v0.2d, v0.2d
-; CHECK-NEXT:    ldp q20, q21, [sp, #32]
-; CHECK-NEXT:    fcvtzu v3.2d, v3.2d
-; CHECK-NEXT:    ldp q22, q23, [sp]
-; CHECK-NEXT:    fcvtzu v2.2d, v2.2d
-; CHECK-NEXT:    fcvtzu v5.2d, v5.2d
-; CHECK-NEXT:    fcvtzu v4.2d, v4.2d
-; CHECK-NEXT:    fcvtzu v7.2d, v7.2d
-; CHECK-NEXT:    fcvtzu v6.2d, v6.2d
-; CHECK-NEXT:    fcvtzu v21.2d, v21.2d
-; CHECK-NEXT:    fcvtzu v20.2d, v20.2d
-; CHECK-NEXT:    fcvtzu v23.2d, v23.2d
-; CHECK-NEXT:    fcvtzu v22.2d, v22.2d
-; CHECK-NEXT:    fcvtzu v19.2d, v19.2d
-; CHECK-NEXT:    fcvtzu v18.2d, v18.2d
-; CHECK-NEXT:    fcvtzu v17.2d, v17.2d
-; CHECK-NEXT:    fcvtzu v16.2d, v16.2d
-; CHECK-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    uzp1 v1.4s, v2.4s, v3.4s
-; CHECK-NEXT:    uzp1 v2.4s, v4.4s, v5.4s
-; CHECK-NEXT:    uzp1 v3.4s, v6.4s, v7.4s
-; CHECK-NEXT:    uzp1 v5.4s, v20.4s, v21.4s
-; CHECK-NEXT:    uzp1 v4.4s, v22.4s, v23.4s
-; CHECK-NEXT:    uzp1 v6.4s, v18.4s, v19.4s
-; CHECK-NEXT:    uzp1 v7.4s, v16.4s, v17.4s
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptou_v32f64_v32i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ldp q16, q17, [sp, #96]
+; CHECK-SD-NEXT:    fcvtzu v1.2d, v1.2d
+; CHECK-SD-NEXT:    ldp q18, q19, [sp, #64]
+; CHECK-SD-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-SD-NEXT:    ldp q20, q21, [sp, #32]
+; CHECK-SD-NEXT:    fcvtzu v3.2d, v3.2d
+; CHECK-SD-NEXT:    ldp q22, q23, [sp]
+; CHECK-SD-NEXT:    fcvtzu v2.2d, v2.2d
+; CHECK-SD-NEXT:    fcvtzu v5.2d, v5.2d
+; CHECK-SD-NEXT:    fcvtzu v4.2d, v4.2d
+; CHECK-SD-NEXT:    fcvtzu v7.2d, v7.2d
+; CHECK-SD-NEXT:    fcvtzu v6.2d, v6.2d
+; CHECK-SD-NEXT:    fcvtzu v21.2d, v21.2d
+; CHECK-SD-NEXT:    fcvtzu v20.2d, v20.2d
+; CHECK-SD-NEXT:    fcvtzu v23.2d, v23.2d
+; CHECK-SD-NEXT:    fcvtzu v22.2d, v22.2d
+; CHECK-SD-NEXT:    fcvtzu v19.2d, v19.2d
+; CHECK-SD-NEXT:    fcvtzu v18.2d, v18.2d
+; CHECK-SD-NEXT:    fcvtzu v17.2d, v17.2d
+; CHECK-SD-NEXT:    fcvtzu v16.2d, v16.2d
+; CHECK-SD-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT:    uzp1 v1.4s, v2.4s, v3.4s
+; CHECK-SD-NEXT:    uzp1 v2.4s, v4.4s, v5.4s
+; CHECK-SD-NEXT:    uzp1 v3.4s, v6.4s, v7.4s
+; CHECK-SD-NEXT:    uzp1 v5.4s, v20.4s, v21.4s
+; CHECK-SD-NEXT:    uzp1 v4.4s, v22.4s, v23.4s
+; CHECK-SD-NEXT:    uzp1 v6.4s, v18.4s, v19.4s
+; CHECK-SD-NEXT:    uzp1 v7.4s, v16.4s, v17.4s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptou_v32f64_v32i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldp q16, q17, [sp]
+; CHECK-GI-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-GI-NEXT:    ldp q18, q19, [sp, #32]
+; CHECK-GI-NEXT:    fcvtzu v1.2d, v1.2d
+; CHECK-GI-NEXT:    ldp q20, q21, [sp, #64]
+; CHECK-GI-NEXT:    fcvtzu v2.2d, v2.2d
+; CHECK-GI-NEXT:    ldp q22, q23, [sp, #96]
+; CHECK-GI-NEXT:    fcvtzu v3.2d, v3.2d
+; CHECK-GI-NEXT:    fcvtzu v4.2d, v4.2d
+; CHECK-GI-NEXT:    fcvtzu v5.2d, v5.2d
+; CHECK-GI-NEXT:    fcvtzu v6.2d, v6.2d
+; CHECK-GI-NEXT:    fcvtzu v7.2d, v7.2d
+; CHECK-GI-NEXT:    fcvtzu v16.2d, v16.2d
+; CHECK-GI-NEXT:    fcvtzu v17.2d, v17.2d
+; CHECK-GI-NEXT:    fcvtzu v18.2d, v18.2d
+; CHECK-GI-NEXT:    fcvtzu v19.2d, v19.2d
+; CHECK-GI-NEXT:    fcvtzu v20.2d, v20.2d
+; CHECK-GI-NEXT:    fcvtzu v21.2d, v21.2d
+; CHECK-GI-NEXT:    fcvtzu v22.2d, v22.2d
+; CHECK-GI-NEXT:    fcvtzu v23.2d, v23.2d
+; CHECK-GI-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    uzp1 v1.4s, v2.4s, v3.4s
+; CHECK-GI-NEXT:    uzp1 v2.4s, v4.4s, v5.4s
+; CHECK-GI-NEXT:    uzp1 v3.4s, v6.4s, v7.4s
+; CHECK-GI-NEXT:    uzp1 v4.4s, v16.4s, v17.4s
+; CHECK-GI-NEXT:    uzp1 v5.4s, v18.4s, v19.4s
+; CHECK-GI-NEXT:    uzp1 v6.4s, v20.4s, v21.4s
+; CHECK-GI-NEXT:    uzp1 v7.4s, v22.4s, v23.4s
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptoui <32 x double> %a to <32 x i32>
   ret <32 x i32> %c
@@ -947,309 +1079,485 @@ entry:
 }
 
 define <2 x i16> @fptou_v2f64_v2i16(<2 x double> %a) {
-; CHECK-LABEL: fptou_v2f64_v2i16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
-; CHECK-NEXT:    xtn v0.2s, v0.2d
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptou_v2f64_v2i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-SD-NEXT:    xtn v0.2s, v0.2d
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptou_v2f64_v2i16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-GI-NEXT:    xtn v0.2s, v0.2d
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptoui <2 x double> %a to <2 x i16>
   ret <2 x i16> %c
 }
 
 define <3 x i16> @fptos_v3f64_v3i16(<3 x double> %a) {
-; CHECK-LABEL: fptos_v3f64_v3i16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT:    mov v0.d[1], v1.d[0]
-; CHECK-NEXT:    fcvtzs v1.2d, v2.2d
-; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
-; CHECK-NEXT:    xtn v1.2s, v1.2d
-; CHECK-NEXT:    xtn v0.2s, v0.2d
-; CHECK-NEXT:    uzp1 v0.4h, v0.4h, v1.4h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptos_v3f64_v3i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT:    fcvtzs v1.2d, v2.2d
+; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-SD-NEXT:    xtn v1.2s, v1.2d
+; CHECK-SD-NEXT:    xtn v0.2s, v0.2d
+; CHECK-SD-NEXT:    uzp1 v0.4h, v0.4h, v1.4h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptos_v3f64_v3i16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    fcvtzs v1.2d, v2.2d
+; CHECK-GI-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-GI-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    xtn v0.4h, v0.4s
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptosi <3 x double> %a to <3 x i16>
   ret <3 x i16> %c
 }
 
 define <3 x i16> @fptou_v3f64_v3i16(<3 x double> %a) {
-; CHECK-LABEL: fptou_v3f64_v3i16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT:    mov v0.d[1], v1.d[0]
-; CHECK-NEXT:    fcvtzs v1.2d, v2.2d
-; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
-; CHECK-NEXT:    xtn v1.2s, v1.2d
-; CHECK-NEXT:    xtn v0.2s, v0.2d
-; CHECK-NEXT:    uzp1 v0.4h, v0.4h, v1.4h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptou_v3f64_v3i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT:    fcvtzs v1.2d, v2.2d
+; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-SD-NEXT:    xtn v1.2s, v1.2d
+; CHECK-SD-NEXT:    xtn v0.2s, v0.2d
+; CHECK-SD-NEXT:    uzp1 v0.4h, v0.4h, v1.4h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptou_v3f64_v3i16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    fcvtzu v1.2d, v2.2d
+; CHECK-GI-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-GI-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    xtn v0.4h, v0.4s
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptoui <3 x double> %a to <3 x i16>
   ret <3 x i16> %c
 }
 
 define <4 x i16> @fptos_v4f64_v4i16(<4 x double> %a) {
-; CHECK-LABEL: fptos_v4f64_v4i16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzs v1.2d, v1.2d
-; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
-; CHECK-NEXT:    xtn v1.2s, v1.2d
-; CHECK-NEXT:    xtn v0.2s, v0.2d
-; CHECK-NEXT:    uzp1 v0.4h, v0.4h, v1.4h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptos_v4f64_v4i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-SD-NEXT:    xtn v1.2s, v1.2d
+; CHECK-SD-NEXT:    xtn v0.2s, v0.2d
+; CHECK-SD-NEXT:    uzp1 v0.4h, v0.4h, v1.4h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptos_v4f64_v4i16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-GI-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-GI-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    xtn v0.4h, v0.4s
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptosi <4 x double> %a to <4 x i16>
   ret <4 x i16> %c
 }
 
 define <4 x i16> @fptou_v4f64_v4i16(<4 x double> %a) {
-; CHECK-LABEL: fptou_v4f64_v4i16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzs v1.2d, v1.2d
-; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
-; CHECK-NEXT:    xtn v1.2s, v1.2d
-; CHECK-NEXT:    xtn v0.2s, v0.2d
-; CHECK-NEXT:    uzp1 v0.4h, v0.4h, v1.4h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptou_v4f64_v4i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-SD-NEXT:    xtn v1.2s, v1.2d
+; CHECK-SD-NEXT:    xtn v0.2s, v0.2d
+; CHECK-SD-NEXT:    uzp1 v0.4h, v0.4h, v1.4h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptou_v4f64_v4i16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-GI-NEXT:    fcvtzu v1.2d, v1.2d
+; CHECK-GI-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    xtn v0.4h, v0.4s
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptoui <4 x double> %a to <4 x i16>
   ret <4 x i16> %c
 }
 
 define <8 x i16> @fptos_v8f64_v8i16(<8 x double> %a) {
-; CHECK-LABEL: fptos_v8f64_v8i16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzs v3.2d, v3.2d
-; CHECK-NEXT:    fcvtzs v2.2d, v2.2d
-; CHECK-NEXT:    adrp x8, .LCPI54_0
-; CHECK-NEXT:    fcvtzs v1.2d, v1.2d
-; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
-; CHECK-NEXT:    xtn v6.2s, v3.2d
-; CHECK-NEXT:    xtn v5.2s, v2.2d
-; CHECK-NEXT:    xtn v4.2s, v1.2d
-; CHECK-NEXT:    xtn v3.2s, v0.2d
-; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI54_0]
-; CHECK-NEXT:    tbl v0.16b, { v3.16b, v4.16b, v5.16b, v6.16b }, v0.16b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptos_v8f64_v8i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtzs v3.2d, v3.2d
+; CHECK-SD-NEXT:    fcvtzs v2.2d, v2.2d
+; CHECK-SD-NEXT:    adrp x8, .LCPI54_0
+; CHECK-SD-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-SD-NEXT:    xtn v6.2s, v3.2d
+; CHECK-SD-NEXT:    xtn v5.2s, v2.2d
+; CHECK-SD-NEXT:    xtn v4.2s, v1.2d
+; CHECK-SD-NEXT:    xtn v3.2s, v0.2d
+; CHECK-SD-NEXT:    ldr q0, [x8, :lo12:.LCPI54_0]
+; CHECK-SD-NEXT:    tbl v0.16b, { v3.16b, v4.16b, v5.16b, v6.16b }, v0.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptos_v8f64_v8i16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-GI-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-GI-NEXT:    fcvtzs v2.2d, v2.2d
+; CHECK-GI-NEXT:    fcvtzs v3.2d, v3.2d
+; CHECK-GI-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    uzp1 v1.4s, v2.4s, v3.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptosi <8 x double> %a to <8 x i16>
   ret <8 x i16> %c
 }
 
 define <8 x i16> @fptou_v8f64_v8i16(<8 x double> %a) {
-; CHECK-LABEL: fptou_v8f64_v8i16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzs v3.2d, v3.2d
-; CHECK-NEXT:    fcvtzs v2.2d, v2.2d
-; CHECK-NEXT:    adrp x8, .LCPI55_0
-; CHECK-NEXT:    fcvtzs v1.2d, v1.2d
-; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
-; CHECK-NEXT:    xtn v6.2s, v3.2d
-; CHECK-NEXT:    xtn v5.2s, v2.2d
-; CHECK-NEXT:    xtn v4.2s, v1.2d
-; CHECK-NEXT:    xtn v3.2s, v0.2d
-; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI55_0]
-; CHECK-NEXT:    tbl v0.16b, { v3.16b, v4.16b, v5.16b, v6.16b }, v0.16b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptou_v8f64_v8i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtzs v3.2d, v3.2d
+; CHECK-SD-NEXT:    fcvtzs v2.2d, v2.2d
+; CHECK-SD-NEXT:    adrp x8, .LCPI55_0
+; CHECK-SD-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-SD-NEXT:    xtn v6.2s, v3.2d
+; CHECK-SD-NEXT:    xtn v5.2s, v2.2d
+; CHECK-SD-NEXT:    xtn v4.2s, v1.2d
+; CHECK-SD-NEXT:    xtn v3.2s, v0.2d
+; CHECK-SD-NEXT:    ldr q0, [x8, :lo12:.LCPI55_0]
+; CHECK-SD-NEXT:    tbl v0.16b, { v3.16b, v4.16b, v5.16b, v6.16b }, v0.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptou_v8f64_v8i16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-GI-NEXT:    fcvtzu v1.2d, v1.2d
+; CHECK-GI-NEXT:    fcvtzu v2.2d, v2.2d
+; CHECK-GI-NEXT:    fcvtzu v3.2d, v3.2d
+; CHECK-GI-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    uzp1 v1.4s, v2.4s, v3.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptoui <8 x double> %a to <8 x i16>
   ret <8 x i16> %c
 }
 
 define <16 x i16> @fptos_v16f64_v16i16(<16 x double> %a) {
-; CHECK-LABEL: fptos_v16f64_v16i16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzs v3.2d, v3.2d
-; CHECK-NEXT:    fcvtzs v7.2d, v7.2d
-; CHECK-NEXT:    adrp x8, .LCPI56_0
-; CHECK-NEXT:    fcvtzs v2.2d, v2.2d
-; CHECK-NEXT:    fcvtzs v6.2d, v6.2d
-; CHECK-NEXT:    fcvtzs v1.2d, v1.2d
-; CHECK-NEXT:    fcvtzs v5.2d, v5.2d
-; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
-; CHECK-NEXT:    fcvtzs v4.2d, v4.2d
-; CHECK-NEXT:    xtn v19.2s, v3.2d
-; CHECK-NEXT:    xtn v23.2s, v7.2d
-; CHECK-NEXT:    xtn v18.2s, v2.2d
-; CHECK-NEXT:    xtn v22.2s, v6.2d
-; CHECK-NEXT:    xtn v17.2s, v1.2d
-; CHECK-NEXT:    xtn v21.2s, v5.2d
-; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI56_0]
-; CHECK-NEXT:    xtn v16.2s, v0.2d
-; CHECK-NEXT:    xtn v20.2s, v4.2d
-; CHECK-NEXT:    tbl v0.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b
-; CHECK-NEXT:    tbl v1.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v1.16b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptos_v16f64_v16i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtzs v3.2d, v3.2d
+; CHECK-SD-NEXT:    fcvtzs v7.2d, v7.2d
+; CHECK-SD-NEXT:    adrp x8, .LCPI56_0
+; CHECK-SD-NEXT:    fcvtzs v2.2d, v2.2d
+; CHECK-SD-NEXT:    fcvtzs v6.2d, v6.2d
+; CHECK-SD-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-SD-NEXT:    fcvtzs v5.2d, v5.2d
+; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-SD-NEXT:    fcvtzs v4.2d, v4.2d
+; CHECK-SD-NEXT:    xtn v19.2s, v3.2d
+; CHECK-SD-NEXT:    xtn v23.2s, v7.2d
+; CHECK-SD-NEXT:    xtn v18.2s, v2.2d
+; CHECK-SD-NEXT:    xtn v22.2s, v6.2d
+; CHECK-SD-NEXT:    xtn v17.2s, v1.2d
+; CHECK-SD-NEXT:    xtn v21.2s, v5.2d
+; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI56_0]
+; CHECK-SD-NEXT:    xtn v16.2s, v0.2d
+; CHECK-SD-NEXT:    xtn v20.2s, v4.2d
+; CHECK-SD-NEXT:    tbl v0.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b
+; CHECK-SD-NEXT:    tbl v1.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v1.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptos_v16f64_v16i16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-GI-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-GI-NEXT:    fcvtzs v2.2d, v2.2d
+; CHECK-GI-NEXT:    fcvtzs v3.2d, v3.2d
+; CHECK-GI-NEXT:    fcvtzs v4.2d, v4.2d
+; CHECK-GI-NEXT:    fcvtzs v5.2d, v5.2d
+; CHECK-GI-NEXT:    fcvtzs v6.2d, v6.2d
+; CHECK-GI-NEXT:    fcvtzs v7.2d, v7.2d
+; CHECK-GI-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    uzp1 v1.4s, v2.4s, v3.4s
+; CHECK-GI-NEXT:    uzp1 v2.4s, v4.4s, v5.4s
+; CHECK-GI-NEXT:    uzp1 v3.4s, v6.4s, v7.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    uzp1 v1.8h, v2.8h, v3.8h
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptosi <16 x double> %a to <16 x i16>
   ret <16 x i16> %c
 }
 
 define <16 x i16> @fptou_v16f64_v16i16(<16 x double> %a) {
-; CHECK-LABEL: fptou_v16f64_v16i16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzs v3.2d, v3.2d
-; CHECK-NEXT:    fcvtzs v7.2d, v7.2d
-; CHECK-NEXT:    adrp x8, .LCPI57_0
-; CHECK-NEXT:    fcvtzs v2.2d, v2.2d
-; CHECK-NEXT:    fcvtzs v6.2d, v6.2d
-; CHECK-NEXT:    fcvtzs v1.2d, v1.2d
-; CHECK-NEXT:    fcvtzs v5.2d, v5.2d
-; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
-; CHECK-NEXT:    fcvtzs v4.2d, v4.2d
-; CHECK-NEXT:    xtn v19.2s, v3.2d
-; CHECK-NEXT:    xtn v23.2s, v7.2d
-; CHECK-NEXT:    xtn v18.2s, v2.2d
-; CHECK-NEXT:    xtn v22.2s, v6.2d
-; CHECK-NEXT:    xtn v17.2s, v1.2d
-; CHECK-NEXT:    xtn v21.2s, v5.2d
-; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI57_0]
-; CHECK-NEXT:    xtn v16.2s, v0.2d
-; CHECK-NEXT:    xtn v20.2s, v4.2d
-; CHECK-NEXT:    tbl v0.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b
-; CHECK-NEXT:    tbl v1.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v1.16b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptou_v16f64_v16i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtzs v3.2d, v3.2d
+; CHECK-SD-NEXT:    fcvtzs v7.2d, v7.2d
+; CHECK-SD-NEXT:    adrp x8, .LCPI57_0
+; CHECK-SD-NEXT:    fcvtzs v2.2d, v2.2d
+; CHECK-SD-NEXT:    fcvtzs v6.2d, v6.2d
+; CHECK-SD-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-SD-NEXT:    fcvtzs v5.2d, v5.2d
+; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-SD-NEXT:    fcvtzs v4.2d, v4.2d
+; CHECK-SD-NEXT:    xtn v19.2s, v3.2d
+; CHECK-SD-NEXT:    xtn v23.2s, v7.2d
+; CHECK-SD-NEXT:    xtn v18.2s, v2.2d
+; CHECK-SD-NEXT:    xtn v22.2s, v6.2d
+; CHECK-SD-NEXT:    xtn v17.2s, v1.2d
+; CHECK-SD-NEXT:    xtn v21.2s, v5.2d
+; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI57_0]
+; CHECK-SD-NEXT:    xtn v16.2s, v0.2d
+; CHECK-SD-NEXT:    xtn v20.2s, v4.2d
+; CHECK-SD-NEXT:    tbl v0.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b
+; CHECK-SD-NEXT:    tbl v1.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v1.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptou_v16f64_v16i16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-GI-NEXT:    fcvtzu v1.2d, v1.2d
+; CHECK-GI-NEXT:    fcvtzu v2.2d, v2.2d
+; CHECK-GI-NEXT:    fcvtzu v3.2d, v3.2d
+; CHECK-GI-NEXT:    fcvtzu v4.2d, v4.2d
+; CHECK-GI-NEXT:    fcvtzu v5.2d, v5.2d
+; CHECK-GI-NEXT:    fcvtzu v6.2d, v6.2d
+; CHECK-GI-NEXT:    fcvtzu v7.2d, v7.2d
+; CHECK-GI-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    uzp1 v1.4s, v2.4s, v3.4s
+; CHECK-GI-NEXT:    uzp1 v2.4s, v4.4s, v5.4s
+; CHECK-GI-NEXT:    uzp1 v3.4s, v6.4s, v7.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    uzp1 v1.8h, v2.8h, v3.8h
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptoui <16 x double> %a to <16 x i16>
   ret <16 x i16> %c
 }
 
 define <32 x i16> @fptos_v32f64_v32i16(<32 x double> %a) {
-; CHECK-LABEL: fptos_v32f64_v32i16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    stp d15, d14, [sp, #-64]! // 16-byte Folded Spill
-; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 64
-; CHECK-NEXT:    .cfi_offset b8, -8
-; CHECK-NEXT:    .cfi_offset b9, -16
-; CHECK-NEXT:    .cfi_offset b10, -24
-; CHECK-NEXT:    .cfi_offset b11, -32
-; CHECK-NEXT:    .cfi_offset b12, -40
-; CHECK-NEXT:    .cfi_offset b13, -48
-; CHECK-NEXT:    .cfi_offset b14, -56
-; CHECK-NEXT:    .cfi_offset b15, -64
-; CHECK-NEXT:    fcvtzs v3.2d, v3.2d
-; CHECK-NEXT:    fcvtzs v18.2d, v2.2d
-; CHECK-NEXT:    adrp x8, .LCPI58_0
-; CHECK-NEXT:    fcvtzs v19.2d, v1.2d
-; CHECK-NEXT:    ldp q20, q21, [sp, #160]
-; CHECK-NEXT:    fcvtzs v22.2d, v0.2d
-; CHECK-NEXT:    ldp q23, q24, [sp, #96]
-; CHECK-NEXT:    fcvtzs v7.2d, v7.2d
-; CHECK-NEXT:    ldp q16, q17, [sp, #128]
-; CHECK-NEXT:    xtn v3.2s, v3.2d
-; CHECK-NEXT:    fcvtzs v21.2d, v21.2d
-; CHECK-NEXT:    fcvtzs v20.2d, v20.2d
-; CHECK-NEXT:    xtn v2.2s, v18.2d
-; CHECK-NEXT:    ldp q18, q25, [sp, #64]
-; CHECK-NEXT:    xtn v1.2s, v19.2d
-; CHECK-NEXT:    fcvtzs v19.2d, v24.2d
-; CHECK-NEXT:    fcvtzs v17.2d, v17.2d
-; CHECK-NEXT:    xtn v0.2s, v22.2d
-; CHECK-NEXT:    fcvtzs v22.2d, v23.2d
-; CHECK-NEXT:    xtn v29.2s, v7.2d
-; CHECK-NEXT:    fcvtzs v7.2d, v25.2d
-; CHECK-NEXT:    fcvtzs v6.2d, v6.2d
-; CHECK-NEXT:    fcvtzs v18.2d, v18.2d
-; CHECK-NEXT:    fcvtzs v16.2d, v16.2d
-; CHECK-NEXT:    fcvtzs v5.2d, v5.2d
-; CHECK-NEXT:    xtn v15.2s, v21.2d
-; CHECK-NEXT:    xtn v11.2s, v19.2d
-; CHECK-NEXT:    fcvtzs v4.2d, v4.2d
-; CHECK-NEXT:    xtn v14.2s, v20.2d
-; CHECK-NEXT:    xtn v10.2s, v22.2d
-; CHECK-NEXT:    xtn v13.2s, v17.2d
-; CHECK-NEXT:    xtn v9.2s, v7.2d
-; CHECK-NEXT:    xtn v28.2s, v6.2d
-; CHECK-NEXT:    xtn v8.2s, v18.2d
-; CHECK-NEXT:    xtn v12.2s, v16.2d
-; CHECK-NEXT:    xtn v27.2s, v5.2d
-; CHECK-NEXT:    xtn v26.2s, v4.2d
-; CHECK-NEXT:    ldr q4, [x8, :lo12:.LCPI58_0]
-; CHECK-NEXT:    tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b
-; CHECK-NEXT:    tbl v2.16b, { v8.16b, v9.16b, v10.16b, v11.16b }, v4.16b
-; CHECK-NEXT:    tbl v3.16b, { v12.16b, v13.16b, v14.16b, v15.16b }, v4.16b
-; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    tbl v1.16b, { v26.16b, v27.16b, v28.16b, v29.16b }, v4.16b
-; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp d15, d14, [sp], #64 // 16-byte Folded Reload
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptos_v32f64_v32i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    stp d15, d14, [sp, #-64]! // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-SD-NEXT:    .cfi_offset b8, -8
+; CHECK-SD-NEXT:    .cfi_offset b9, -16
+; CHECK-SD-NEXT:    .cfi_offset b10, -24
+; CHECK-SD-NEXT:    .cfi_offset b11, -32
+; CHECK-SD-NEXT:    .cfi_offset b12, -40
+; CHECK-SD-NEXT:    .cfi_offset b13, -48
+; CHECK-SD-NEXT:    .cfi_offset b14, -56
+; CHECK-SD-NEXT:    .cfi_offset b15, -64
+; CHECK-SD-NEXT:    fcvtzs v3.2d, v3.2d
+; CHECK-SD-NEXT:    fcvtzs v18.2d, v2.2d
+; CHECK-SD-NEXT:    adrp x8, .LCPI58_0
+; CHECK-SD-NEXT:    fcvtzs v19.2d, v1.2d
+; CHECK-SD-NEXT:    ldp q20, q21, [sp, #160]
+; CHECK-SD-NEXT:    fcvtzs v22.2d, v0.2d
+; CHECK-SD-NEXT:    ldp q23, q24, [sp, #96]
+; CHECK-SD-NEXT:    fcvtzs v7.2d, v7.2d
+; CHECK-SD-NEXT:    ldp q16, q17, [sp, #128]
+; CHECK-SD-NEXT:    xtn v3.2s, v3.2d
+; CHECK-SD-NEXT:    fcvtzs v21.2d, v21.2d
+; CHECK-SD-NEXT:    fcvtzs v20.2d, v20.2d
+; CHECK-SD-NEXT:    xtn v2.2s, v18.2d
+; CHECK-SD-NEXT:    ldp q18, q25, [sp, #64]
+; CHECK-SD-NEXT:    xtn v1.2s, v19.2d
+; CHECK-SD-NEXT:    fcvtzs v19.2d, v24.2d
+; CHECK-SD-NEXT:    fcvtzs v17.2d, v17.2d
+; CHECK-SD-NEXT:    xtn v0.2s, v22.2d
+; CHECK-SD-NEXT:    fcvtzs v22.2d, v23.2d
+; CHECK-SD-NEXT:    xtn v29.2s, v7.2d
+; CHECK-SD-NEXT:    fcvtzs v7.2d, v25.2d
+; CHECK-SD-NEXT:    fcvtzs v6.2d, v6.2d
+; CHECK-SD-NEXT:    fcvtzs v18.2d, v18.2d
+; CHECK-SD-NEXT:    fcvtzs v16.2d, v16.2d
+; CHECK-SD-NEXT:    fcvtzs v5.2d, v5.2d
+; CHECK-SD-NEXT:    xtn v15.2s, v21.2d
+; CHECK-SD-NEXT:    xtn v11.2s, v19.2d
+; CHECK-SD-NEXT:    fcvtzs v4.2d, v4.2d
+; CHECK-SD-NEXT:    xtn v14.2s, v20.2d
+; CHECK-SD-NEXT:    xtn v10.2s, v22.2d
+; CHECK-SD-NEXT:    xtn v13.2s, v17.2d
+; CHECK-SD-NEXT:    xtn v9.2s, v7.2d
+; CHECK-SD-NEXT:    xtn v28.2s, v6.2d
+; CHECK-SD-NEXT:    xtn v8.2s, v18.2d
+; CHECK-SD-NEXT:    xtn v12.2s, v16.2d
+; CHECK-SD-NEXT:    xtn v27.2s, v5.2d
+; CHECK-SD-NEXT:    xtn v26.2s, v4.2d
+; CHECK-SD-NEXT:    ldr q4, [x8, :lo12:.LCPI58_0]
+; CHECK-SD-NEXT:    tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b
+; CHECK-SD-NEXT:    tbl v2.16b, { v8.16b, v9.16b, v10.16b, v11.16b }, v4.16b
+; CHECK-SD-NEXT:    tbl v3.16b, { v12.16b, v13.16b, v14.16b, v15.16b }, v4.16b
+; CHECK-SD-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    tbl v1.16b, { v26.16b, v27.16b, v28.16b, v29.16b }, v4.16b
+; CHECK-SD-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp d15, d14, [sp], #64 // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptos_v32f64_v32i16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldp q16, q17, [sp]
+; CHECK-GI-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-GI-NEXT:    ldp q18, q19, [sp, #32]
+; CHECK-GI-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-GI-NEXT:    ldp q20, q21, [sp, #64]
+; CHECK-GI-NEXT:    fcvtzs v2.2d, v2.2d
+; CHECK-GI-NEXT:    ldp q22, q23, [sp, #96]
+; CHECK-GI-NEXT:    fcvtzs v3.2d, v3.2d
+; CHECK-GI-NEXT:    fcvtzs v4.2d, v4.2d
+; CHECK-GI-NEXT:    fcvtzs v5.2d, v5.2d
+; CHECK-GI-NEXT:    fcvtzs v6.2d, v6.2d
+; CHECK-GI-NEXT:    fcvtzs v7.2d, v7.2d
+; CHECK-GI-NEXT:    fcvtzs v16.2d, v16.2d
+; CHECK-GI-NEXT:    fcvtzs v17.2d, v17.2d
+; CHECK-GI-NEXT:    fcvtzs v18.2d, v18.2d
+; CHECK-GI-NEXT:    fcvtzs v19.2d, v19.2d
+; CHECK-GI-NEXT:    fcvtzs v20.2d, v20.2d
+; CHECK-GI-NEXT:    fcvtzs v21.2d, v21.2d
+; CHECK-GI-NEXT:    fcvtzs v22.2d, v22.2d
+; CHECK-GI-NEXT:    fcvtzs v23.2d, v23.2d
+; CHECK-GI-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    uzp1 v1.4s, v2.4s, v3.4s
+; CHECK-GI-NEXT:    uzp1 v2.4s, v4.4s, v5.4s
+; CHECK-GI-NEXT:    uzp1 v3.4s, v6.4s, v7.4s
+; CHECK-GI-NEXT:    uzp1 v4.4s, v16.4s, v17.4s
+; CHECK-GI-NEXT:    uzp1 v5.4s, v18.4s, v19.4s
+; CHECK-GI-NEXT:    uzp1 v6.4s, v20.4s, v21.4s
+; CHECK-GI-NEXT:    uzp1 v7.4s, v22.4s, v23.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    uzp1 v1.8h, v2.8h, v3.8h
+; CHECK-GI-NEXT:    uzp1 v2.8h, v4.8h, v5.8h
+; CHECK-GI-NEXT:    uzp1 v3.8h, v6.8h, v7.8h
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptosi <32 x double> %a to <32 x i16>
   ret <32 x i16> %c
 }
 
 define <32 x i16> @fptou_v32f64_v32i16(<32 x double> %a) {
-; CHECK-LABEL: fptou_v32f64_v32i16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    stp d15, d14, [sp, #-64]! // 16-byte Folded Spill
-; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 64
-; CHECK-NEXT:    .cfi_offset b8, -8
-; CHECK-NEXT:    .cfi_offset b9, -16
-; CHECK-NEXT:    .cfi_offset b10, -24
-; CHECK-NEXT:    .cfi_offset b11, -32
-; CHECK-NEXT:    .cfi_offset b12, -40
-; CHECK-NEXT:    .cfi_offset b13, -48
-; CHECK-NEXT:    .cfi_offset b14, -56
-; CHECK-NEXT:    .cfi_offset b15, -64
-; CHECK-NEXT:    fcvtzs v3.2d, v3.2d
-; CHECK-NEXT:    fcvtzs v18.2d, v2.2d
-; CHECK-NEXT:    adrp x8, .LCPI59_0
-; CHECK-NEXT:    fcvtzs v19.2d, v1.2d
-; CHECK-NEXT:    ldp q20, q21, [sp, #160]
-; CHECK-NEXT:    fcvtzs v22.2d, v0.2d
-; CHECK-NEXT:    ldp q23, q24, [sp, #96]
-; CHECK-NEXT:    fcvtzs v7.2d, v7.2d
-; CHECK-NEXT:    ldp q16, q17, [sp, #128]
-; CHECK-NEXT:    xtn v3.2s, v3.2d
-; CHECK-NEXT:    fcvtzs v21.2d, v21.2d
-; CHECK-NEXT:    fcvtzs v20.2d, v20.2d
-; CHECK-NEXT:    xtn v2.2s, v18.2d
-; CHECK-NEXT:    ldp q18, q25, [sp, #64]
-; CHECK-NEXT:    xtn v1.2s, v19.2d
-; CHECK-NEXT:    fcvtzs v19.2d, v24.2d
-; CHECK-NEXT:    fcvtzs v17.2d, v17.2d
-; CHECK-NEXT:    xtn v0.2s, v22.2d
-; CHECK-NEXT:    fcvtzs v22.2d, v23.2d
-; CHECK-NEXT:    xtn v29.2s, v7.2d
-; CHECK-NEXT:    fcvtzs v7.2d, v25.2d
-; CHECK-NEXT:    fcvtzs v6.2d, v6.2d
-; CHECK-NEXT:    fcvtzs v18.2d, v18.2d
-; CHECK-NEXT:    fcvtzs v16.2d, v16.2d
-; CHECK-NEXT:    fcvtzs v5.2d, v5.2d
-; CHECK-NEXT:    xtn v15.2s, v21.2d
-; CHECK-NEXT:    xtn v11.2s, v19.2d
-; CHECK-NEXT:    fcvtzs v4.2d, v4.2d
-; CHECK-NEXT:    xtn v14.2s, v20.2d
-; CHECK-NEXT:    xtn v10.2s, v22.2d
-; CHECK-NEXT:    xtn v13.2s, v17.2d
-; CHECK-NEXT:    xtn v9.2s, v7.2d
-; CHECK-NEXT:    xtn v28.2s, v6.2d
-; CHECK-NEXT:    xtn v8.2s, v18.2d
-; CHECK-NEXT:    xtn v12.2s, v16.2d
-; CHECK-NEXT:    xtn v27.2s, v5.2d
-; CHECK-NEXT:    xtn v26.2s, v4.2d
-; CHECK-NEXT:    ldr q4, [x8, :lo12:.LCPI59_0]
-; CHECK-NEXT:    tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b
-; CHECK-NEXT:    tbl v2.16b, { v8.16b, v9.16b, v10.16b, v11.16b }, v4.16b
-; CHECK-NEXT:    tbl v3.16b, { v12.16b, v13.16b, v14.16b, v15.16b }, v4.16b
-; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    tbl v1.16b, { v26.16b, v27.16b, v28.16b, v29.16b }, v4.16b
-; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp d15, d14, [sp], #64 // 16-byte Folded Reload
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptou_v32f64_v32i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    stp d15, d14, [sp, #-64]! // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-SD-NEXT:    .cfi_offset b8, -8
+; CHECK-SD-NEXT:    .cfi_offset b9, -16
+; CHECK-SD-NEXT:    .cfi_offset b10, -24
+; CHECK-SD-NEXT:    .cfi_offset b11, -32
+; CHECK-SD-NEXT:    .cfi_offset b12, -40
+; CHECK-SD-NEXT:    .cfi_offset b13, -48
+; CHECK-SD-NEXT:    .cfi_offset b14, -56
+; CHECK-SD-NEXT:    .cfi_offset b15, -64
+; CHECK-SD-NEXT:    fcvtzs v3.2d, v3.2d
+; CHECK-SD-NEXT:    fcvtzs v18.2d, v2.2d
+; CHECK-SD-NEXT:    adrp x8, .LCPI59_0
+; CHECK-SD-NEXT:    fcvtzs v19.2d, v1.2d
+; CHECK-SD-NEXT:    ldp q20, q21, [sp, #160]
+; CHECK-SD-NEXT:    fcvtzs v22.2d, v0.2d
+; CHECK-SD-NEXT:    ldp q23, q24, [sp, #96]
+; CHECK-SD-NEXT:    fcvtzs v7.2d, v7.2d
+; CHECK-SD-NEXT:    ldp q16, q17, [sp, #128]
+; CHECK-SD-NEXT:    xtn v3.2s, v3.2d
+; CHECK-SD-NEXT:    fcvtzs v21.2d, v21.2d
+; CHECK-SD-NEXT:    fcvtzs v20.2d, v20.2d
+; CHECK-SD-NEXT:    xtn v2.2s, v18.2d
+; CHECK-SD-NEXT:    ldp q18, q25, [sp, #64]
+; CHECK-SD-NEXT:    xtn v1.2s, v19.2d
+; CHECK-SD-NEXT:    fcvtzs v19.2d, v24.2d
+; CHECK-SD-NEXT:    fcvtzs v17.2d, v17.2d
+; CHECK-SD-NEXT:    xtn v0.2s, v22.2d
+; CHECK-SD-NEXT:    fcvtzs v22.2d, v23.2d
+; CHECK-SD-NEXT:    xtn v29.2s, v7.2d
+; CHECK-SD-NEXT:    fcvtzs v7.2d, v25.2d
+; CHECK-SD-NEXT:    fcvtzs v6.2d, v6.2d
+; CHECK-SD-NEXT:    fcvtzs v18.2d, v18.2d
+; CHECK-SD-NEXT:    fcvtzs v16.2d, v16.2d
+; CHECK-SD-NEXT:    fcvtzs v5.2d, v5.2d
+; CHECK-SD-NEXT:    xtn v15.2s, v21.2d
+; CHECK-SD-NEXT:    xtn v11.2s, v19.2d
+; CHECK-SD-NEXT:    fcvtzs v4.2d, v4.2d
+; CHECK-SD-NEXT:    xtn v14.2s, v20.2d
+; CHECK-SD-NEXT:    xtn v10.2s, v22.2d
+; CHECK-SD-NEXT:    xtn v13.2s, v17.2d
+; CHECK-SD-NEXT:    xtn v9.2s, v7.2d
+; CHECK-SD-NEXT:    xtn v28.2s, v6.2d
+; CHECK-SD-NEXT:    xtn v8.2s, v18.2d
+; CHECK-SD-NEXT:    xtn v12.2s, v16.2d
+; CHECK-SD-NEXT:    xtn v27.2s, v5.2d
+; CHECK-SD-NEXT:    xtn v26.2s, v4.2d
+; CHECK-SD-NEXT:    ldr q4, [x8, :lo12:.LCPI59_0]
+; CHECK-SD-NEXT:    tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b
+; CHECK-SD-NEXT:    tbl v2.16b, { v8.16b, v9.16b, v10.16b, v11.16b }, v4.16b
+; CHECK-SD-NEXT:    tbl v3.16b, { v12.16b, v13.16b, v14.16b, v15.16b }, v4.16b
+; CHECK-SD-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    tbl v1.16b, { v26.16b, v27.16b, v28.16b, v29.16b }, v4.16b
+; CHECK-SD-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp d15, d14, [sp], #64 // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptou_v32f64_v32i16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldp q16, q17, [sp]
+; CHECK-GI-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-GI-NEXT:    ldp q18, q19, [sp, #32]
+; CHECK-GI-NEXT:    fcvtzu v1.2d, v1.2d
+; CHECK-GI-NEXT:    ldp q20, q21, [sp, #64]
+; CHECK-GI-NEXT:    fcvtzu v2.2d, v2.2d
+; CHECK-GI-NEXT:    ldp q22, q23, [sp, #96]
+; CHECK-GI-NEXT:    fcvtzu v3.2d, v3.2d
+; CHECK-GI-NEXT:    fcvtzu v4.2d, v4.2d
+; CHECK-GI-NEXT:    fcvtzu v5.2d, v5.2d
+; CHECK-GI-NEXT:    fcvtzu v6.2d, v6.2d
+; CHECK-GI-NEXT:    fcvtzu v7.2d, v7.2d
+; CHECK-GI-NEXT:    fcvtzu v16.2d, v16.2d
+; CHECK-GI-NEXT:    fcvtzu v17.2d, v17.2d
+; CHECK-GI-NEXT:    fcvtzu v18.2d, v18.2d
+; CHECK-GI-NEXT:    fcvtzu v19.2d, v19.2d
+; CHECK-GI-NEXT:    fcvtzu v20.2d, v20.2d
+; CHECK-GI-NEXT:    fcvtzu v21.2d, v21.2d
+; CHECK-GI-NEXT:    fcvtzu v22.2d, v22.2d
+; CHECK-GI-NEXT:    fcvtzu v23.2d, v23.2d
+; CHECK-GI-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    uzp1 v1.4s, v2.4s, v3.4s
+; CHECK-GI-NEXT:    uzp1 v2.4s, v4.4s, v5.4s
+; CHECK-GI-NEXT:    uzp1 v3.4s, v6.4s, v7.4s
+; CHECK-GI-NEXT:    uzp1 v4.4s, v16.4s, v17.4s
+; CHECK-GI-NEXT:    uzp1 v5.4s, v18.4s, v19.4s
+; CHECK-GI-NEXT:    uzp1 v6.4s, v20.4s, v21.4s
+; CHECK-GI-NEXT:    uzp1 v7.4s, v22.4s, v23.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    uzp1 v1.8h, v2.8h, v3.8h
+; CHECK-GI-NEXT:    uzp1 v2.8h, v4.8h, v5.8h
+; CHECK-GI-NEXT:    uzp1 v3.8h, v6.8h, v7.8h
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptoui <32 x double> %a to <32 x i16>
   ret <32 x i16> %c
@@ -1267,303 +1575,497 @@ entry:
 }
 
 define <2 x i8> @fptou_v2f64_v2i8(<2 x double> %a) {
-; CHECK-LABEL: fptou_v2f64_v2i8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
-; CHECK-NEXT:    xtn v0.2s, v0.2d
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptou_v2f64_v2i8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-SD-NEXT:    xtn v0.2s, v0.2d
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptou_v2f64_v2i8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-GI-NEXT:    xtn v0.2s, v0.2d
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptoui <2 x double> %a to <2 x i8>
   ret <2 x i8> %c
 }
 
 define <3 x i8> @fptos_v3f64_v3i8(<3 x double> %a) {
-; CHECK-LABEL: fptos_v3f64_v3i8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT:    mov v0.d[1], v1.d[0]
-; CHECK-NEXT:    fcvtzs v1.2d, v2.2d
-; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
-; CHECK-NEXT:    xtn v1.2s, v1.2d
-; CHECK-NEXT:    xtn v0.2s, v0.2d
-; CHECK-NEXT:    uzp1 v0.4h, v0.4h, v1.4h
-; CHECK-NEXT:    umov w0, v0.h[0]
-; CHECK-NEXT:    umov w1, v0.h[1]
-; CHECK-NEXT:    umov w2, v0.h[2]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptos_v3f64_v3i8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT:    fcvtzs v1.2d, v2.2d
+; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-SD-NEXT:    xtn v1.2s, v1.2d
+; CHECK-SD-NEXT:    xtn v0.2s, v0.2d
+; CHECK-SD-NEXT:    uzp1 v0.4h, v0.4h, v1.4h
+; CHECK-SD-NEXT:    umov w0, v0.h[0]
+; CHECK-SD-NEXT:    umov w1, v0.h[1]
+; CHECK-SD-NEXT:    umov w2, v0.h[2]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptos_v3f64_v3i8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    fcvtzs v1.2d, v2.2d
+; CHECK-GI-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-GI-NEXT:    fmov x2, d1
+; CHECK-GI-NEXT:    // kill: def $w2 killed $w2 killed $x2
+; CHECK-GI-NEXT:    mov d2, v0.d[1]
+; CHECK-GI-NEXT:    fmov x0, d0
+; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-GI-NEXT:    fmov x1, d2
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 killed $x1
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptosi <3 x double> %a to <3 x i8>
   ret <3 x i8> %c
 }
 
 define <3 x i8> @fptou_v3f64_v3i8(<3 x double> %a) {
-; CHECK-LABEL: fptou_v3f64_v3i8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT:    mov v0.d[1], v1.d[0]
-; CHECK-NEXT:    fcvtzs v1.2d, v2.2d
-; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
-; CHECK-NEXT:    xtn v1.2s, v1.2d
-; CHECK-NEXT:    xtn v0.2s, v0.2d
-; CHECK-NEXT:    uzp1 v0.4h, v0.4h, v1.4h
-; CHECK-NEXT:    umov w0, v0.h[0]
-; CHECK-NEXT:    umov w1, v0.h[1]
-; CHECK-NEXT:    umov w2, v0.h[2]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptou_v3f64_v3i8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT:    fcvtzs v1.2d, v2.2d
+; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-SD-NEXT:    xtn v1.2s, v1.2d
+; CHECK-SD-NEXT:    xtn v0.2s, v0.2d
+; CHECK-SD-NEXT:    uzp1 v0.4h, v0.4h, v1.4h
+; CHECK-SD-NEXT:    umov w0, v0.h[0]
+; CHECK-SD-NEXT:    umov w1, v0.h[1]
+; CHECK-SD-NEXT:    umov w2, v0.h[2]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptou_v3f64_v3i8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    fcvtzu v1.2d, v2.2d
+; CHECK-GI-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-GI-NEXT:    fmov x2, d1
+; CHECK-GI-NEXT:    // kill: def $w2 killed $w2 killed $x2
+; CHECK-GI-NEXT:    mov d2, v0.d[1]
+; CHECK-GI-NEXT:    fmov x0, d0
+; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-GI-NEXT:    fmov x1, d2
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 killed $x1
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptoui <3 x double> %a to <3 x i8>
   ret <3 x i8> %c
 }
 
 define <4 x i8> @fptos_v4f64_v4i8(<4 x double> %a) {
-; CHECK-LABEL: fptos_v4f64_v4i8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzs v1.2d, v1.2d
-; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
-; CHECK-NEXT:    xtn v1.2s, v1.2d
-; CHECK-NEXT:    xtn v0.2s, v0.2d
-; CHECK-NEXT:    uzp1 v0.4h, v0.4h, v1.4h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptos_v4f64_v4i8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-SD-NEXT:    xtn v1.2s, v1.2d
+; CHECK-SD-NEXT:    xtn v0.2s, v0.2d
+; CHECK-SD-NEXT:    uzp1 v0.4h, v0.4h, v1.4h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptos_v4f64_v4i8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-GI-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-GI-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    xtn v0.4h, v0.4s
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptosi <4 x double> %a to <4 x i8>
   ret <4 x i8> %c
 }
 
 define <4 x i8> @fptou_v4f64_v4i8(<4 x double> %a) {
-; CHECK-LABEL: fptou_v4f64_v4i8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzs v1.2d, v1.2d
-; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
-; CHECK-NEXT:    xtn v1.2s, v1.2d
-; CHECK-NEXT:    xtn v0.2s, v0.2d
-; CHECK-NEXT:    uzp1 v0.4h, v0.4h, v1.4h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptou_v4f64_v4i8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-SD-NEXT:    xtn v1.2s, v1.2d
+; CHECK-SD-NEXT:    xtn v0.2s, v0.2d
+; CHECK-SD-NEXT:    uzp1 v0.4h, v0.4h, v1.4h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptou_v4f64_v4i8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-GI-NEXT:    fcvtzu v1.2d, v1.2d
+; CHECK-GI-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    xtn v0.4h, v0.4s
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptoui <4 x double> %a to <4 x i8>
   ret <4 x i8> %c
 }
 
 define <8 x i8> @fptos_v8f64_v8i8(<8 x double> %a) {
-; CHECK-LABEL: fptos_v8f64_v8i8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzs v3.2d, v3.2d
-; CHECK-NEXT:    fcvtzs v2.2d, v2.2d
-; CHECK-NEXT:    fcvtzs v1.2d, v1.2d
-; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
-; CHECK-NEXT:    xtn v3.2s, v3.2d
-; CHECK-NEXT:    xtn v2.2s, v2.2d
-; CHECK-NEXT:    xtn v1.2s, v1.2d
-; CHECK-NEXT:    xtn v0.2s, v0.2d
-; CHECK-NEXT:    uzp1 v2.4h, v2.4h, v3.4h
-; CHECK-NEXT:    uzp1 v0.4h, v0.4h, v1.4h
-; CHECK-NEXT:    uzp1 v0.8b, v0.8b, v2.8b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptos_v8f64_v8i8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtzs v3.2d, v3.2d
+; CHECK-SD-NEXT:    fcvtzs v2.2d, v2.2d
+; CHECK-SD-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-SD-NEXT:    xtn v3.2s, v3.2d
+; CHECK-SD-NEXT:    xtn v2.2s, v2.2d
+; CHECK-SD-NEXT:    xtn v1.2s, v1.2d
+; CHECK-SD-NEXT:    xtn v0.2s, v0.2d
+; CHECK-SD-NEXT:    uzp1 v2.4h, v2.4h, v3.4h
+; CHECK-SD-NEXT:    uzp1 v0.4h, v0.4h, v1.4h
+; CHECK-SD-NEXT:    uzp1 v0.8b, v0.8b, v2.8b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptos_v8f64_v8i8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-GI-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-GI-NEXT:    fcvtzs v2.2d, v2.2d
+; CHECK-GI-NEXT:    fcvtzs v3.2d, v3.2d
+; CHECK-GI-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    uzp1 v1.4s, v2.4s, v3.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    xtn v0.8b, v0.8h
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptosi <8 x double> %a to <8 x i8>
   ret <8 x i8> %c
 }
 
 define <8 x i8> @fptou_v8f64_v8i8(<8 x double> %a) {
-; CHECK-LABEL: fptou_v8f64_v8i8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzs v3.2d, v3.2d
-; CHECK-NEXT:    fcvtzs v2.2d, v2.2d
-; CHECK-NEXT:    fcvtzs v1.2d, v1.2d
-; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
-; CHECK-NEXT:    xtn v3.2s, v3.2d
-; CHECK-NEXT:    xtn v2.2s, v2.2d
-; CHECK-NEXT:    xtn v1.2s, v1.2d
-; CHECK-NEXT:    xtn v0.2s, v0.2d
-; CHECK-NEXT:    uzp1 v2.4h, v2.4h, v3.4h
-; CHECK-NEXT:    uzp1 v0.4h, v0.4h, v1.4h
-; CHECK-NEXT:    uzp1 v0.8b, v0.8b, v2.8b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptou_v8f64_v8i8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtzs v3.2d, v3.2d
+; CHECK-SD-NEXT:    fcvtzs v2.2d, v2.2d
+; CHECK-SD-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-SD-NEXT:    xtn v3.2s, v3.2d
+; CHECK-SD-NEXT:    xtn v2.2s, v2.2d
+; CHECK-SD-NEXT:    xtn v1.2s, v1.2d
+; CHECK-SD-NEXT:    xtn v0.2s, v0.2d
+; CHECK-SD-NEXT:    uzp1 v2.4h, v2.4h, v3.4h
+; CHECK-SD-NEXT:    uzp1 v0.4h, v0.4h, v1.4h
+; CHECK-SD-NEXT:    uzp1 v0.8b, v0.8b, v2.8b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptou_v8f64_v8i8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-GI-NEXT:    fcvtzu v1.2d, v1.2d
+; CHECK-GI-NEXT:    fcvtzu v2.2d, v2.2d
+; CHECK-GI-NEXT:    fcvtzu v3.2d, v3.2d
+; CHECK-GI-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    uzp1 v1.4s, v2.4s, v3.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    xtn v0.8b, v0.8h
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptoui <8 x double> %a to <8 x i8>
   ret <8 x i8> %c
 }
 
 define <16 x i8> @fptos_v16f64_v16i8(<16 x double> %a) {
-; CHECK-LABEL: fptos_v16f64_v16i8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzs v7.2d, v7.2d
-; CHECK-NEXT:    fcvtzs v6.2d, v6.2d
-; CHECK-NEXT:    fcvtzs v5.2d, v5.2d
-; CHECK-NEXT:    fcvtzs v4.2d, v4.2d
-; CHECK-NEXT:    fcvtzs v3.2d, v3.2d
-; CHECK-NEXT:    fcvtzs v2.2d, v2.2d
-; CHECK-NEXT:    fcvtzs v1.2d, v1.2d
-; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
-; CHECK-NEXT:    xtn v7.2s, v7.2d
-; CHECK-NEXT:    xtn v6.2s, v6.2d
-; CHECK-NEXT:    xtn v5.2s, v5.2d
-; CHECK-NEXT:    xtn v4.2s, v4.2d
-; CHECK-NEXT:    xtn v3.2s, v3.2d
-; CHECK-NEXT:    xtn v2.2s, v2.2d
-; CHECK-NEXT:    xtn v1.2s, v1.2d
-; CHECK-NEXT:    xtn v0.2s, v0.2d
-; CHECK-NEXT:    uzp1 v6.4h, v6.4h, v7.4h
-; CHECK-NEXT:    uzp1 v4.4h, v4.4h, v5.4h
-; CHECK-NEXT:    uzp1 v2.4h, v2.4h, v3.4h
-; CHECK-NEXT:    uzp1 v0.4h, v0.4h, v1.4h
-; CHECK-NEXT:    mov v4.d[1], v6.d[0]
-; CHECK-NEXT:    mov v0.d[1], v2.d[0]
-; CHECK-NEXT:    uzp1 v0.16b, v0.16b, v4.16b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptos_v16f64_v16i8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtzs v7.2d, v7.2d
+; CHECK-SD-NEXT:    fcvtzs v6.2d, v6.2d
+; CHECK-SD-NEXT:    fcvtzs v5.2d, v5.2d
+; CHECK-SD-NEXT:    fcvtzs v4.2d, v4.2d
+; CHECK-SD-NEXT:    fcvtzs v3.2d, v3.2d
+; CHECK-SD-NEXT:    fcvtzs v2.2d, v2.2d
+; CHECK-SD-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-SD-NEXT:    xtn v7.2s, v7.2d
+; CHECK-SD-NEXT:    xtn v6.2s, v6.2d
+; CHECK-SD-NEXT:    xtn v5.2s, v5.2d
+; CHECK-SD-NEXT:    xtn v4.2s, v4.2d
+; CHECK-SD-NEXT:    xtn v3.2s, v3.2d
+; CHECK-SD-NEXT:    xtn v2.2s, v2.2d
+; CHECK-SD-NEXT:    xtn v1.2s, v1.2d
+; CHECK-SD-NEXT:    xtn v0.2s, v0.2d
+; CHECK-SD-NEXT:    uzp1 v6.4h, v6.4h, v7.4h
+; CHECK-SD-NEXT:    uzp1 v4.4h, v4.4h, v5.4h
+; CHECK-SD-NEXT:    uzp1 v2.4h, v2.4h, v3.4h
+; CHECK-SD-NEXT:    uzp1 v0.4h, v0.4h, v1.4h
+; CHECK-SD-NEXT:    mov v4.d[1], v6.d[0]
+; CHECK-SD-NEXT:    mov v0.d[1], v2.d[0]
+; CHECK-SD-NEXT:    uzp1 v0.16b, v0.16b, v4.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptos_v16f64_v16i8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-GI-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-GI-NEXT:    fcvtzs v2.2d, v2.2d
+; CHECK-GI-NEXT:    fcvtzs v3.2d, v3.2d
+; CHECK-GI-NEXT:    fcvtzs v4.2d, v4.2d
+; CHECK-GI-NEXT:    fcvtzs v5.2d, v5.2d
+; CHECK-GI-NEXT:    fcvtzs v6.2d, v6.2d
+; CHECK-GI-NEXT:    fcvtzs v7.2d, v7.2d
+; CHECK-GI-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    uzp1 v1.4s, v2.4s, v3.4s
+; CHECK-GI-NEXT:    uzp1 v2.4s, v4.4s, v5.4s
+; CHECK-GI-NEXT:    uzp1 v3.4s, v6.4s, v7.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    uzp1 v1.8h, v2.8h, v3.8h
+; CHECK-GI-NEXT:    uzp1 v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptosi <16 x double> %a to <16 x i8>
   ret <16 x i8> %c
 }
 
 define <16 x i8> @fptou_v16f64_v16i8(<16 x double> %a) {
-; CHECK-LABEL: fptou_v16f64_v16i8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzs v7.2d, v7.2d
-; CHECK-NEXT:    fcvtzs v6.2d, v6.2d
-; CHECK-NEXT:    fcvtzs v5.2d, v5.2d
-; CHECK-NEXT:    fcvtzs v4.2d, v4.2d
-; CHECK-NEXT:    fcvtzs v3.2d, v3.2d
-; CHECK-NEXT:    fcvtzs v2.2d, v2.2d
-; CHECK-NEXT:    fcvtzs v1.2d, v1.2d
-; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
-; CHECK-NEXT:    xtn v7.2s, v7.2d
-; CHECK-NEXT:    xtn v6.2s, v6.2d
-; CHECK-NEXT:    xtn v5.2s, v5.2d
-; CHECK-NEXT:    xtn v4.2s, v4.2d
-; CHECK-NEXT:    xtn v3.2s, v3.2d
-; CHECK-NEXT:    xtn v2.2s, v2.2d
-; CHECK-NEXT:    xtn v1.2s, v1.2d
-; CHECK-NEXT:    xtn v0.2s, v0.2d
-; CHECK-NEXT:    uzp1 v6.4h, v6.4h, v7.4h
-; CHECK-NEXT:    uzp1 v4.4h, v4.4h, v5.4h
-; CHECK-NEXT:    uzp1 v2.4h, v2.4h, v3.4h
-; CHECK-NEXT:    uzp1 v0.4h, v0.4h, v1.4h
-; CHECK-NEXT:    mov v4.d[1], v6.d[0]
-; CHECK-NEXT:    mov v0.d[1], v2.d[0]
-; CHECK-NEXT:    uzp1 v0.16b, v0.16b, v4.16b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptou_v16f64_v16i8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtzs v7.2d, v7.2d
+; CHECK-SD-NEXT:    fcvtzs v6.2d, v6.2d
+; CHECK-SD-NEXT:    fcvtzs v5.2d, v5.2d
+; CHECK-SD-NEXT:    fcvtzs v4.2d, v4.2d
+; CHECK-SD-NEXT:    fcvtzs v3.2d, v3.2d
+; CHECK-SD-NEXT:    fcvtzs v2.2d, v2.2d
+; CHECK-SD-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-SD-NEXT:    xtn v7.2s, v7.2d
+; CHECK-SD-NEXT:    xtn v6.2s, v6.2d
+; CHECK-SD-NEXT:    xtn v5.2s, v5.2d
+; CHECK-SD-NEXT:    xtn v4.2s, v4.2d
+; CHECK-SD-NEXT:    xtn v3.2s, v3.2d
+; CHECK-SD-NEXT:    xtn v2.2s, v2.2d
+; CHECK-SD-NEXT:    xtn v1.2s, v1.2d
+; CHECK-SD-NEXT:    xtn v0.2s, v0.2d
+; CHECK-SD-NEXT:    uzp1 v6.4h, v6.4h, v7.4h
+; CHECK-SD-NEXT:    uzp1 v4.4h, v4.4h, v5.4h
+; CHECK-SD-NEXT:    uzp1 v2.4h, v2.4h, v3.4h
+; CHECK-SD-NEXT:    uzp1 v0.4h, v0.4h, v1.4h
+; CHECK-SD-NEXT:    mov v4.d[1], v6.d[0]
+; CHECK-SD-NEXT:    mov v0.d[1], v2.d[0]
+; CHECK-SD-NEXT:    uzp1 v0.16b, v0.16b, v4.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptou_v16f64_v16i8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-GI-NEXT:    fcvtzu v1.2d, v1.2d
+; CHECK-GI-NEXT:    fcvtzu v2.2d, v2.2d
+; CHECK-GI-NEXT:    fcvtzu v3.2d, v3.2d
+; CHECK-GI-NEXT:    fcvtzu v4.2d, v4.2d
+; CHECK-GI-NEXT:    fcvtzu v5.2d, v5.2d
+; CHECK-GI-NEXT:    fcvtzu v6.2d, v6.2d
+; CHECK-GI-NEXT:    fcvtzu v7.2d, v7.2d
+; CHECK-GI-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    uzp1 v1.4s, v2.4s, v3.4s
+; CHECK-GI-NEXT:    uzp1 v2.4s, v4.4s, v5.4s
+; CHECK-GI-NEXT:    uzp1 v3.4s, v6.4s, v7.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    uzp1 v1.8h, v2.8h, v3.8h
+; CHECK-GI-NEXT:    uzp1 v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptoui <16 x double> %a to <16 x i8>
   ret <16 x i8> %c
 }
 
 define <32 x i8> @fptos_v32f64_v32i8(<32 x double> %a) {
-; CHECK-LABEL: fptos_v32f64_v32i8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ldp q16, q17, [sp]
-; CHECK-NEXT:    fcvtzs v7.2d, v7.2d
-; CHECK-NEXT:    ldp q18, q19, [sp, #32]
-; CHECK-NEXT:    fcvtzs v6.2d, v6.2d
-; CHECK-NEXT:    ldp q20, q21, [sp, #64]
-; CHECK-NEXT:    fcvtzs v5.2d, v5.2d
-; CHECK-NEXT:    ldp q22, q23, [sp, #96]
-; CHECK-NEXT:    fcvtzs v4.2d, v4.2d
-; CHECK-NEXT:    fcvtzs v3.2d, v3.2d
-; CHECK-NEXT:    fcvtzs v2.2d, v2.2d
-; CHECK-NEXT:    fcvtzs v1.2d, v1.2d
-; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
-; CHECK-NEXT:    fcvtzs v21.2d, v21.2d
-; CHECK-NEXT:    fcvtzs v20.2d, v20.2d
-; CHECK-NEXT:    fcvtzs v23.2d, v23.2d
-; CHECK-NEXT:    fcvtzs v22.2d, v22.2d
-; CHECK-NEXT:    fcvtzs v19.2d, v19.2d
-; CHECK-NEXT:    fcvtzs v18.2d, v18.2d
-; CHECK-NEXT:    fcvtzs v17.2d, v17.2d
-; CHECK-NEXT:    fcvtzs v16.2d, v16.2d
-; CHECK-NEXT:    xtn v7.2s, v7.2d
-; CHECK-NEXT:    xtn v6.2s, v6.2d
-; CHECK-NEXT:    xtn v5.2s, v5.2d
-; CHECK-NEXT:    xtn v4.2s, v4.2d
-; CHECK-NEXT:    xtn v3.2s, v3.2d
-; CHECK-NEXT:    xtn v2.2s, v2.2d
-; CHECK-NEXT:    xtn v1.2s, v1.2d
-; CHECK-NEXT:    xtn v0.2s, v0.2d
-; CHECK-NEXT:    xtn v23.2s, v23.2d
-; CHECK-NEXT:    xtn v22.2s, v22.2d
-; CHECK-NEXT:    xtn v21.2s, v21.2d
-; CHECK-NEXT:    xtn v20.2s, v20.2d
-; CHECK-NEXT:    xtn v19.2s, v19.2d
-; CHECK-NEXT:    xtn v18.2s, v18.2d
-; CHECK-NEXT:    xtn v17.2s, v17.2d
-; CHECK-NEXT:    xtn v16.2s, v16.2d
-; CHECK-NEXT:    uzp1 v6.4h, v6.4h, v7.4h
-; CHECK-NEXT:    uzp1 v4.4h, v4.4h, v5.4h
-; CHECK-NEXT:    uzp1 v2.4h, v2.4h, v3.4h
-; CHECK-NEXT:    uzp1 v0.4h, v0.4h, v1.4h
-; CHECK-NEXT:    uzp1 v1.4h, v22.4h, v23.4h
-; CHECK-NEXT:    uzp1 v3.4h, v20.4h, v21.4h
-; CHECK-NEXT:    uzp1 v5.4h, v18.4h, v19.4h
-; CHECK-NEXT:    uzp1 v7.4h, v16.4h, v17.4h
-; CHECK-NEXT:    mov v4.d[1], v6.d[0]
-; CHECK-NEXT:    mov v0.d[1], v2.d[0]
-; CHECK-NEXT:    mov v3.d[1], v1.d[0]
-; CHECK-NEXT:    mov v7.d[1], v5.d[0]
-; CHECK-NEXT:    uzp1 v0.16b, v0.16b, v4.16b
-; CHECK-NEXT:    uzp1 v1.16b, v7.16b, v3.16b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptos_v32f64_v32i8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ldp q16, q17, [sp]
+; CHECK-SD-NEXT:    fcvtzs v7.2d, v7.2d
+; CHECK-SD-NEXT:    ldp q18, q19, [sp, #32]
+; CHECK-SD-NEXT:    fcvtzs v6.2d, v6.2d
+; CHECK-SD-NEXT:    ldp q20, q21, [sp, #64]
+; CHECK-SD-NEXT:    fcvtzs v5.2d, v5.2d
+; CHECK-SD-NEXT:    ldp q22, q23, [sp, #96]
+; CHECK-SD-NEXT:    fcvtzs v4.2d, v4.2d
+; CHECK-SD-NEXT:    fcvtzs v3.2d, v3.2d
+; CHECK-SD-NEXT:    fcvtzs v2.2d, v2.2d
+; CHECK-SD-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-SD-NEXT:    fcvtzs v21.2d, v21.2d
+; CHECK-SD-NEXT:    fcvtzs v20.2d, v20.2d
+; CHECK-SD-NEXT:    fcvtzs v23.2d, v23.2d
+; CHECK-SD-NEXT:    fcvtzs v22.2d, v22.2d
+; CHECK-SD-NEXT:    fcvtzs v19.2d, v19.2d
+; CHECK-SD-NEXT:    fcvtzs v18.2d, v18.2d
+; CHECK-SD-NEXT:    fcvtzs v17.2d, v17.2d
+; CHECK-SD-NEXT:    fcvtzs v16.2d, v16.2d
+; CHECK-SD-NEXT:    xtn v7.2s, v7.2d
+; CHECK-SD-NEXT:    xtn v6.2s, v6.2d
+; CHECK-SD-NEXT:    xtn v5.2s, v5.2d
+; CHECK-SD-NEXT:    xtn v4.2s, v4.2d
+; CHECK-SD-NEXT:    xtn v3.2s, v3.2d
+; CHECK-SD-NEXT:    xtn v2.2s, v2.2d
+; CHECK-SD-NEXT:    xtn v1.2s, v1.2d
+; CHECK-SD-NEXT:    xtn v0.2s, v0.2d
+; CHECK-SD-NEXT:    xtn v23.2s, v23.2d
+; CHECK-SD-NEXT:    xtn v22.2s, v22.2d
+; CHECK-SD-NEXT:    xtn v21.2s, v21.2d
+; CHECK-SD-NEXT:    xtn v20.2s, v20.2d
+; CHECK-SD-NEXT:    xtn v19.2s, v19.2d
+; CHECK-SD-NEXT:    xtn v18.2s, v18.2d
+; CHECK-SD-NEXT:    xtn v17.2s, v17.2d
+; CHECK-SD-NEXT:    xtn v16.2s, v16.2d
+; CHECK-SD-NEXT:    uzp1 v6.4h, v6.4h, v7.4h
+; CHECK-SD-NEXT:    uzp1 v4.4h, v4.4h, v5.4h
+; CHECK-SD-NEXT:    uzp1 v2.4h, v2.4h, v3.4h
+; CHECK-SD-NEXT:    uzp1 v0.4h, v0.4h, v1.4h
+; CHECK-SD-NEXT:    uzp1 v1.4h, v22.4h, v23.4h
+; CHECK-SD-NEXT:    uzp1 v3.4h, v20.4h, v21.4h
+; CHECK-SD-NEXT:    uzp1 v5.4h, v18.4h, v19.4h
+; CHECK-SD-NEXT:    uzp1 v7.4h, v16.4h, v17.4h
+; CHECK-SD-NEXT:    mov v4.d[1], v6.d[0]
+; CHECK-SD-NEXT:    mov v0.d[1], v2.d[0]
+; CHECK-SD-NEXT:    mov v3.d[1], v1.d[0]
+; CHECK-SD-NEXT:    mov v7.d[1], v5.d[0]
+; CHECK-SD-NEXT:    uzp1 v0.16b, v0.16b, v4.16b
+; CHECK-SD-NEXT:    uzp1 v1.16b, v7.16b, v3.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptos_v32f64_v32i8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldp q16, q17, [sp]
+; CHECK-GI-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-GI-NEXT:    ldp q18, q19, [sp, #32]
+; CHECK-GI-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-GI-NEXT:    ldp q20, q21, [sp, #64]
+; CHECK-GI-NEXT:    fcvtzs v2.2d, v2.2d
+; CHECK-GI-NEXT:    ldp q22, q23, [sp, #96]
+; CHECK-GI-NEXT:    fcvtzs v3.2d, v3.2d
+; CHECK-GI-NEXT:    fcvtzs v4.2d, v4.2d
+; CHECK-GI-NEXT:    fcvtzs v5.2d, v5.2d
+; CHECK-GI-NEXT:    fcvtzs v6.2d, v6.2d
+; CHECK-GI-NEXT:    fcvtzs v7.2d, v7.2d
+; CHECK-GI-NEXT:    fcvtzs v16.2d, v16.2d
+; CHECK-GI-NEXT:    fcvtzs v17.2d, v17.2d
+; CHECK-GI-NEXT:    fcvtzs v18.2d, v18.2d
+; CHECK-GI-NEXT:    fcvtzs v19.2d, v19.2d
+; CHECK-GI-NEXT:    fcvtzs v20.2d, v20.2d
+; CHECK-GI-NEXT:    fcvtzs v21.2d, v21.2d
+; CHECK-GI-NEXT:    fcvtzs v22.2d, v22.2d
+; CHECK-GI-NEXT:    fcvtzs v23.2d, v23.2d
+; CHECK-GI-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    uzp1 v1.4s, v2.4s, v3.4s
+; CHECK-GI-NEXT:    uzp1 v2.4s, v4.4s, v5.4s
+; CHECK-GI-NEXT:    uzp1 v3.4s, v6.4s, v7.4s
+; CHECK-GI-NEXT:    uzp1 v4.4s, v16.4s, v17.4s
+; CHECK-GI-NEXT:    uzp1 v5.4s, v18.4s, v19.4s
+; CHECK-GI-NEXT:    uzp1 v6.4s, v20.4s, v21.4s
+; CHECK-GI-NEXT:    uzp1 v7.4s, v22.4s, v23.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    uzp1 v1.8h, v2.8h, v3.8h
+; CHECK-GI-NEXT:    uzp1 v2.8h, v4.8h, v5.8h
+; CHECK-GI-NEXT:    uzp1 v3.8h, v6.8h, v7.8h
+; CHECK-GI-NEXT:    uzp1 v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT:    uzp1 v1.16b, v2.16b, v3.16b
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptosi <32 x double> %a to <32 x i8>
   ret <32 x i8> %c
 }
 
 define <32 x i8> @fptou_v32f64_v32i8(<32 x double> %a) {
-; CHECK-LABEL: fptou_v32f64_v32i8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ldp q16, q17, [sp]
-; CHECK-NEXT:    fcvtzs v7.2d, v7.2d
-; CHECK-NEXT:    ldp q18, q19, [sp, #32]
-; CHECK-NEXT:    fcvtzs v6.2d, v6.2d
-; CHECK-NEXT:    ldp q20, q21, [sp, #64]
-; CHECK-NEXT:    fcvtzs v5.2d, v5.2d
-; CHECK-NEXT:    ldp q22, q23, [sp, #96]
-; CHECK-NEXT:    fcvtzs v4.2d, v4.2d
-; CHECK-NEXT:    fcvtzs v3.2d, v3.2d
-; CHECK-NEXT:    fcvtzs v2.2d, v2.2d
-; CHECK-NEXT:    fcvtzs v1.2d, v1.2d
-; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
-; CHECK-NEXT:    fcvtzs v21.2d, v21.2d
-; CHECK-NEXT:    fcvtzs v20.2d, v20.2d
-; CHECK-NEXT:    fcvtzs v23.2d, v23.2d
-; CHECK-NEXT:    fcvtzs v22.2d, v22.2d
-; CHECK-NEXT:    fcvtzs v19.2d, v19.2d
-; CHECK-NEXT:    fcvtzs v18.2d, v18.2d
-; CHECK-NEXT:    fcvtzs v17.2d, v17.2d
-; CHECK-NEXT:    fcvtzs v16.2d, v16.2d
-; CHECK-NEXT:    xtn v7.2s, v7.2d
-; CHECK-NEXT:    xtn v6.2s, v6.2d
-; CHECK-NEXT:    xtn v5.2s, v5.2d
-; CHECK-NEXT:    xtn v4.2s, v4.2d
-; CHECK-NEXT:    xtn v3.2s, v3.2d
-; CHECK-NEXT:    xtn v2.2s, v2.2d
-; CHECK-NEXT:    xtn v1.2s, v1.2d
-; CHECK-NEXT:    xtn v0.2s, v0.2d
-; CHECK-NEXT:    xtn v23.2s, v23.2d
-; CHECK-NEXT:    xtn v22.2s, v22.2d
-; CHECK-NEXT:    xtn v21.2s, v21.2d
-; CHECK-NEXT:    xtn v20.2s, v20.2d
-; CHECK-NEXT:    xtn v19.2s, v19.2d
-; CHECK-NEXT:    xtn v18.2s, v18.2d
-; CHECK-NEXT:    xtn v17.2s, v17.2d
-; CHECK-NEXT:    xtn v16.2s, v16.2d
-; CHECK-NEXT:    uzp1 v6.4h, v6.4h, v7.4h
-; CHECK-NEXT:    uzp1 v4.4h, v4.4h, v5.4h
-; CHECK-NEXT:    uzp1 v2.4h, v2.4h, v3.4h
-; CHECK-NEXT:    uzp1 v0.4h, v0.4h, v1.4h
-; CHECK-NEXT:    uzp1 v1.4h, v22.4h, v23.4h
-; CHECK-NEXT:    uzp1 v3.4h, v20.4h, v21.4h
-; CHECK-NEXT:    uzp1 v5.4h, v18.4h, v19.4h
-; CHECK-NEXT:    uzp1 v7.4h, v16.4h, v17.4h
-; CHECK-NEXT:    mov v4.d[1], v6.d[0]
-; CHECK-NEXT:    mov v0.d[1], v2.d[0]
-; CHECK-NEXT:    mov v3.d[1], v1.d[0]
-; CHECK-NEXT:    mov v7.d[1], v5.d[0]
-; CHECK-NEXT:    uzp1 v0.16b, v0.16b, v4.16b
-; CHECK-NEXT:    uzp1 v1.16b, v7.16b, v3.16b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptou_v32f64_v32i8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ldp q16, q17, [sp]
+; CHECK-SD-NEXT:    fcvtzs v7.2d, v7.2d
+; CHECK-SD-NEXT:    ldp q18, q19, [sp, #32]
+; CHECK-SD-NEXT:    fcvtzs v6.2d, v6.2d
+; CHECK-SD-NEXT:    ldp q20, q21, [sp, #64]
+; CHECK-SD-NEXT:    fcvtzs v5.2d, v5.2d
+; CHECK-SD-NEXT:    ldp q22, q23, [sp, #96]
+; CHECK-SD-NEXT:    fcvtzs v4.2d, v4.2d
+; CHECK-SD-NEXT:    fcvtzs v3.2d, v3.2d
+; CHECK-SD-NEXT:    fcvtzs v2.2d, v2.2d
+; CHECK-SD-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-SD-NEXT:    fcvtzs v21.2d, v21.2d
+; CHECK-SD-NEXT:    fcvtzs v20.2d, v20.2d
+; CHECK-SD-NEXT:    fcvtzs v23.2d, v23.2d
+; CHECK-SD-NEXT:    fcvtzs v22.2d, v22.2d
+; CHECK-SD-NEXT:    fcvtzs v19.2d, v19.2d
+; CHECK-SD-NEXT:    fcvtzs v18.2d, v18.2d
+; CHECK-SD-NEXT:    fcvtzs v17.2d, v17.2d
+; CHECK-SD-NEXT:    fcvtzs v16.2d, v16.2d
+; CHECK-SD-NEXT:    xtn v7.2s, v7.2d
+; CHECK-SD-NEXT:    xtn v6.2s, v6.2d
+; CHECK-SD-NEXT:    xtn v5.2s, v5.2d
+; CHECK-SD-NEXT:    xtn v4.2s, v4.2d
+; CHECK-SD-NEXT:    xtn v3.2s, v3.2d
+; CHECK-SD-NEXT:    xtn v2.2s, v2.2d
+; CHECK-SD-NEXT:    xtn v1.2s, v1.2d
+; CHECK-SD-NEXT:    xtn v0.2s, v0.2d
+; CHECK-SD-NEXT:    xtn v23.2s, v23.2d
+; CHECK-SD-NEXT:    xtn v22.2s, v22.2d
+; CHECK-SD-NEXT:    xtn v21.2s, v21.2d
+; CHECK-SD-NEXT:    xtn v20.2s, v20.2d
+; CHECK-SD-NEXT:    xtn v19.2s, v19.2d
+; CHECK-SD-NEXT:    xtn v18.2s, v18.2d
+; CHECK-SD-NEXT:    xtn v17.2s, v17.2d
+; CHECK-SD-NEXT:    xtn v16.2s, v16.2d
+; CHECK-SD-NEXT:    uzp1 v6.4h, v6.4h, v7.4h
+; CHECK-SD-NEXT:    uzp1 v4.4h, v4.4h, v5.4h
+; CHECK-SD-NEXT:    uzp1 v2.4h, v2.4h, v3.4h
+; CHECK-SD-NEXT:    uzp1 v0.4h, v0.4h, v1.4h
+; CHECK-SD-NEXT:    uzp1 v1.4h, v22.4h, v23.4h
+; CHECK-SD-NEXT:    uzp1 v3.4h, v20.4h, v21.4h
+; CHECK-SD-NEXT:    uzp1 v5.4h, v18.4h, v19.4h
+; CHECK-SD-NEXT:    uzp1 v7.4h, v16.4h, v17.4h
+; CHECK-SD-NEXT:    mov v4.d[1], v6.d[0]
+; CHECK-SD-NEXT:    mov v0.d[1], v2.d[0]
+; CHECK-SD-NEXT:    mov v3.d[1], v1.d[0]
+; CHECK-SD-NEXT:    mov v7.d[1], v5.d[0]
+; CHECK-SD-NEXT:    uzp1 v0.16b, v0.16b, v4.16b
+; CHECK-SD-NEXT:    uzp1 v1.16b, v7.16b, v3.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptou_v32f64_v32i8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldp q16, q17, [sp]
+; CHECK-GI-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-GI-NEXT:    ldp q18, q19, [sp, #32]
+; CHECK-GI-NEXT:    fcvtzu v1.2d, v1.2d
+; CHECK-GI-NEXT:    ldp q20, q21, [sp, #64]
+; CHECK-GI-NEXT:    fcvtzu v2.2d, v2.2d
+; CHECK-GI-NEXT:    ldp q22, q23, [sp, #96]
+; CHECK-GI-NEXT:    fcvtzu v3.2d, v3.2d
+; CHECK-GI-NEXT:    fcvtzu v4.2d, v4.2d
+; CHECK-GI-NEXT:    fcvtzu v5.2d, v5.2d
+; CHECK-GI-NEXT:    fcvtzu v6.2d, v6.2d
+; CHECK-GI-NEXT:    fcvtzu v7.2d, v7.2d
+; CHECK-GI-NEXT:    fcvtzu v16.2d, v16.2d
+; CHECK-GI-NEXT:    fcvtzu v17.2d, v17.2d
+; CHECK-GI-NEXT:    fcvtzu v18.2d, v18.2d
+; CHECK-GI-NEXT:    fcvtzu v19.2d, v19.2d
+; CHECK-GI-NEXT:    fcvtzu v20.2d, v20.2d
+; CHECK-GI-NEXT:    fcvtzu v21.2d, v21.2d
+; CHECK-GI-NEXT:    fcvtzu v22.2d, v22.2d
+; CHECK-GI-NEXT:    fcvtzu v23.2d, v23.2d
+; CHECK-GI-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    uzp1 v1.4s, v2.4s, v3.4s
+; CHECK-GI-NEXT:    uzp1 v2.4s, v4.4s, v5.4s
+; CHECK-GI-NEXT:    uzp1 v3.4s, v6.4s, v7.4s
+; CHECK-GI-NEXT:    uzp1 v4.4s, v16.4s, v17.4s
+; CHECK-GI-NEXT:    uzp1 v5.4s, v18.4s, v19.4s
+; CHECK-GI-NEXT:    uzp1 v6.4s, v20.4s, v21.4s
+; CHECK-GI-NEXT:    uzp1 v7.4s, v22.4s, v23.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    uzp1 v1.8h, v2.8h, v3.8h
+; CHECK-GI-NEXT:    uzp1 v2.8h, v4.8h, v5.8h
+; CHECK-GI-NEXT:    uzp1 v3.8h, v6.8h, v7.8h
+; CHECK-GI-NEXT:    uzp1 v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT:    uzp1 v1.16b, v2.16b, v3.16b
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptoui <32 x double> %a to <32 x i8>
   ret <32 x i8> %c
@@ -1592,242 +2094,434 @@ entry:
 }
 
 define <3 x i64> @fptos_v3f32_v3i64(<3 x float> %a) {
-; CHECK-LABEL: fptos_v3f32_v3i64:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtl v1.2d, v0.2s
-; CHECK-NEXT:    fcvtl2 v0.2d, v0.4s
-; CHECK-NEXT:    fcvtzs v3.2d, v1.2d
-; CHECK-NEXT:    fcvtzs v2.2d, v0.2d
-; CHECK-NEXT:    // kill: def $d2 killed $d2 killed $q2
-; CHECK-NEXT:    fmov d0, d3
-; CHECK-NEXT:    ext v1.16b, v3.16b, v3.16b, #8
-; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $q1
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptos_v3f32_v3i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtl v1.2d, v0.2s
+; CHECK-SD-NEXT:    fcvtl2 v0.2d, v0.4s
+; CHECK-SD-NEXT:    fcvtzs v3.2d, v1.2d
+; CHECK-SD-NEXT:    fcvtzs v2.2d, v0.2d
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
+; CHECK-SD-NEXT:    fmov d0, d3
+; CHECK-SD-NEXT:    ext v1.16b, v3.16b, v3.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptos_v3f32_v3i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov s1, v0.s[2]
+; CHECK-GI-NEXT:    fcvtl v0.2d, v0.2s
+; CHECK-GI-NEXT:    fcvtl v1.2d, v1.2s
+; CHECK-GI-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-GI-NEXT:    fcvtzs v2.2d, v1.2d
+; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptosi <3 x float> %a to <3 x i64>
   ret <3 x i64> %c
 }
 
 define <3 x i64> @fptou_v3f32_v3i64(<3 x float> %a) {
-; CHECK-LABEL: fptou_v3f32_v3i64:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtl v1.2d, v0.2s
-; CHECK-NEXT:    fcvtl2 v0.2d, v0.4s
-; CHECK-NEXT:    fcvtzu v3.2d, v1.2d
-; CHECK-NEXT:    fcvtzu v2.2d, v0.2d
-; CHECK-NEXT:    // kill: def $d2 killed $d2 killed $q2
-; CHECK-NEXT:    fmov d0, d3
-; CHECK-NEXT:    ext v1.16b, v3.16b, v3.16b, #8
-; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $q1
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptou_v3f32_v3i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtl v1.2d, v0.2s
+; CHECK-SD-NEXT:    fcvtl2 v0.2d, v0.4s
+; CHECK-SD-NEXT:    fcvtzu v3.2d, v1.2d
+; CHECK-SD-NEXT:    fcvtzu v2.2d, v0.2d
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
+; CHECK-SD-NEXT:    fmov d0, d3
+; CHECK-SD-NEXT:    ext v1.16b, v3.16b, v3.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptou_v3f32_v3i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov s1, v0.s[2]
+; CHECK-GI-NEXT:    fcvtl v0.2d, v0.2s
+; CHECK-GI-NEXT:    fcvtl v1.2d, v1.2s
+; CHECK-GI-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-GI-NEXT:    fcvtzu v2.2d, v1.2d
+; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptoui <3 x float> %a to <3 x i64>
   ret <3 x i64> %c
 }
 
 define <4 x i64> @fptos_v4f32_v4i64(<4 x float> %a) {
-; CHECK-LABEL: fptos_v4f32_v4i64:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtl2 v1.2d, v0.4s
-; CHECK-NEXT:    fcvtl v0.2d, v0.2s
-; CHECK-NEXT:    fcvtzs v1.2d, v1.2d
-; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptos_v4f32_v4i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtl2 v1.2d, v0.4s
+; CHECK-SD-NEXT:    fcvtl v0.2d, v0.2s
+; CHECK-SD-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptos_v4f32_v4i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtl v1.2d, v0.2s
+; CHECK-GI-NEXT:    fcvtl2 v2.2d, v0.4s
+; CHECK-GI-NEXT:    fcvtzs v0.2d, v1.2d
+; CHECK-GI-NEXT:    fcvtzs v1.2d, v2.2d
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptosi <4 x float> %a to <4 x i64>
   ret <4 x i64> %c
 }
 
 define <4 x i64> @fptou_v4f32_v4i64(<4 x float> %a) {
-; CHECK-LABEL: fptou_v4f32_v4i64:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtl2 v1.2d, v0.4s
-; CHECK-NEXT:    fcvtl v0.2d, v0.2s
-; CHECK-NEXT:    fcvtzu v1.2d, v1.2d
-; CHECK-NEXT:    fcvtzu v0.2d, v0.2d
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptou_v4f32_v4i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtl2 v1.2d, v0.4s
+; CHECK-SD-NEXT:    fcvtl v0.2d, v0.2s
+; CHECK-SD-NEXT:    fcvtzu v1.2d, v1.2d
+; CHECK-SD-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptou_v4f32_v4i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtl v1.2d, v0.2s
+; CHECK-GI-NEXT:    fcvtl2 v2.2d, v0.4s
+; CHECK-GI-NEXT:    fcvtzu v0.2d, v1.2d
+; CHECK-GI-NEXT:    fcvtzu v1.2d, v2.2d
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptoui <4 x float> %a to <4 x i64>
   ret <4 x i64> %c
 }
 
 define <8 x i64> @fptos_v8f32_v8i64(<8 x float> %a) {
-; CHECK-LABEL: fptos_v8f32_v8i64:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtl v2.2d, v0.2s
-; CHECK-NEXT:    fcvtl2 v3.2d, v0.4s
-; CHECK-NEXT:    fcvtl2 v4.2d, v1.4s
-; CHECK-NEXT:    fcvtl v5.2d, v1.2s
-; CHECK-NEXT:    fcvtzs v0.2d, v2.2d
-; CHECK-NEXT:    fcvtzs v1.2d, v3.2d
-; CHECK-NEXT:    fcvtzs v3.2d, v4.2d
-; CHECK-NEXT:    fcvtzs v2.2d, v5.2d
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptos_v8f32_v8i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtl v2.2d, v0.2s
+; CHECK-SD-NEXT:    fcvtl2 v3.2d, v0.4s
+; CHECK-SD-NEXT:    fcvtl2 v4.2d, v1.4s
+; CHECK-SD-NEXT:    fcvtl v5.2d, v1.2s
+; CHECK-SD-NEXT:    fcvtzs v0.2d, v2.2d
+; CHECK-SD-NEXT:    fcvtzs v1.2d, v3.2d
+; CHECK-SD-NEXT:    fcvtzs v3.2d, v4.2d
+; CHECK-SD-NEXT:    fcvtzs v2.2d, v5.2d
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptos_v8f32_v8i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtl v2.2d, v0.2s
+; CHECK-GI-NEXT:    fcvtl2 v3.2d, v0.4s
+; CHECK-GI-NEXT:    fcvtl v4.2d, v1.2s
+; CHECK-GI-NEXT:    fcvtl2 v5.2d, v1.4s
+; CHECK-GI-NEXT:    fcvtzs v0.2d, v2.2d
+; CHECK-GI-NEXT:    fcvtzs v1.2d, v3.2d
+; CHECK-GI-NEXT:    fcvtzs v2.2d, v4.2d
+; CHECK-GI-NEXT:    fcvtzs v3.2d, v5.2d
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptosi <8 x float> %a to <8 x i64>
   ret <8 x i64> %c
 }
 
 define <8 x i64> @fptou_v8f32_v8i64(<8 x float> %a) {
-; CHECK-LABEL: fptou_v8f32_v8i64:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtl v2.2d, v0.2s
-; CHECK-NEXT:    fcvtl2 v3.2d, v0.4s
-; CHECK-NEXT:    fcvtl2 v4.2d, v1.4s
-; CHECK-NEXT:    fcvtl v5.2d, v1.2s
-; CHECK-NEXT:    fcvtzu v0.2d, v2.2d
-; CHECK-NEXT:    fcvtzu v1.2d, v3.2d
-; CHECK-NEXT:    fcvtzu v3.2d, v4.2d
-; CHECK-NEXT:    fcvtzu v2.2d, v5.2d
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptou_v8f32_v8i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtl v2.2d, v0.2s
+; CHECK-SD-NEXT:    fcvtl2 v3.2d, v0.4s
+; CHECK-SD-NEXT:    fcvtl2 v4.2d, v1.4s
+; CHECK-SD-NEXT:    fcvtl v5.2d, v1.2s
+; CHECK-SD-NEXT:    fcvtzu v0.2d, v2.2d
+; CHECK-SD-NEXT:    fcvtzu v1.2d, v3.2d
+; CHECK-SD-NEXT:    fcvtzu v3.2d, v4.2d
+; CHECK-SD-NEXT:    fcvtzu v2.2d, v5.2d
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptou_v8f32_v8i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtl v2.2d, v0.2s
+; CHECK-GI-NEXT:    fcvtl2 v3.2d, v0.4s
+; CHECK-GI-NEXT:    fcvtl v4.2d, v1.2s
+; CHECK-GI-NEXT:    fcvtl2 v5.2d, v1.4s
+; CHECK-GI-NEXT:    fcvtzu v0.2d, v2.2d
+; CHECK-GI-NEXT:    fcvtzu v1.2d, v3.2d
+; CHECK-GI-NEXT:    fcvtzu v2.2d, v4.2d
+; CHECK-GI-NEXT:    fcvtzu v3.2d, v5.2d
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptoui <8 x float> %a to <8 x i64>
   ret <8 x i64> %c
-}
-
-define <16 x i64> @fptos_v16f32_v16i64(<16 x float> %a) {
-; CHECK-LABEL: fptos_v16f32_v16i64:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtl2 v4.2d, v0.4s
-; CHECK-NEXT:    fcvtl v0.2d, v0.2s
-; CHECK-NEXT:    fcvtl2 v5.2d, v1.4s
-; CHECK-NEXT:    fcvtl v6.2d, v1.2s
-; CHECK-NEXT:    fcvtl v7.2d, v2.2s
-; CHECK-NEXT:    fcvtl2 v16.2d, v2.4s
-; CHECK-NEXT:    fcvtl2 v17.2d, v3.4s
-; CHECK-NEXT:    fcvtl v18.2d, v3.2s
-; CHECK-NEXT:    fcvtzs v1.2d, v4.2d
-; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
-; CHECK-NEXT:    fcvtzs v3.2d, v5.2d
-; CHECK-NEXT:    fcvtzs v2.2d, v6.2d
-; CHECK-NEXT:    fcvtzs v4.2d, v7.2d
-; CHECK-NEXT:    fcvtzs v5.2d, v16.2d
-; CHECK-NEXT:    fcvtzs v7.2d, v17.2d
-; CHECK-NEXT:    fcvtzs v6.2d, v18.2d
-; CHECK-NEXT:    ret
+}
+
+define <16 x i64> @fptos_v16f32_v16i64(<16 x float> %a) {
+; CHECK-SD-LABEL: fptos_v16f32_v16i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtl2 v4.2d, v0.4s
+; CHECK-SD-NEXT:    fcvtl v0.2d, v0.2s
+; CHECK-SD-NEXT:    fcvtl2 v5.2d, v1.4s
+; CHECK-SD-NEXT:    fcvtl v6.2d, v1.2s
+; CHECK-SD-NEXT:    fcvtl v7.2d, v2.2s
+; CHECK-SD-NEXT:    fcvtl2 v16.2d, v2.4s
+; CHECK-SD-NEXT:    fcvtl2 v17.2d, v3.4s
+; CHECK-SD-NEXT:    fcvtl v18.2d, v3.2s
+; CHECK-SD-NEXT:    fcvtzs v1.2d, v4.2d
+; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-SD-NEXT:    fcvtzs v3.2d, v5.2d
+; CHECK-SD-NEXT:    fcvtzs v2.2d, v6.2d
+; CHECK-SD-NEXT:    fcvtzs v4.2d, v7.2d
+; CHECK-SD-NEXT:    fcvtzs v5.2d, v16.2d
+; CHECK-SD-NEXT:    fcvtzs v7.2d, v17.2d
+; CHECK-SD-NEXT:    fcvtzs v6.2d, v18.2d
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptos_v16f32_v16i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtl v4.2d, v0.2s
+; CHECK-GI-NEXT:    fcvtl2 v5.2d, v0.4s
+; CHECK-GI-NEXT:    fcvtl v6.2d, v1.2s
+; CHECK-GI-NEXT:    fcvtl2 v7.2d, v1.4s
+; CHECK-GI-NEXT:    fcvtl v16.2d, v2.2s
+; CHECK-GI-NEXT:    fcvtl2 v17.2d, v2.4s
+; CHECK-GI-NEXT:    fcvtl v18.2d, v3.2s
+; CHECK-GI-NEXT:    fcvtl2 v19.2d, v3.4s
+; CHECK-GI-NEXT:    fcvtzs v0.2d, v4.2d
+; CHECK-GI-NEXT:    fcvtzs v1.2d, v5.2d
+; CHECK-GI-NEXT:    fcvtzs v2.2d, v6.2d
+; CHECK-GI-NEXT:    fcvtzs v3.2d, v7.2d
+; CHECK-GI-NEXT:    fcvtzs v4.2d, v16.2d
+; CHECK-GI-NEXT:    fcvtzs v5.2d, v17.2d
+; CHECK-GI-NEXT:    fcvtzs v6.2d, v18.2d
+; CHECK-GI-NEXT:    fcvtzs v7.2d, v19.2d
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptosi <16 x float> %a to <16 x i64>
   ret <16 x i64> %c
 }
 
 define <16 x i64> @fptou_v16f32_v16i64(<16 x float> %a) {
-; CHECK-LABEL: fptou_v16f32_v16i64:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtl2 v4.2d, v0.4s
-; CHECK-NEXT:    fcvtl v0.2d, v0.2s
-; CHECK-NEXT:    fcvtl2 v5.2d, v1.4s
-; CHECK-NEXT:    fcvtl v6.2d, v1.2s
-; CHECK-NEXT:    fcvtl v7.2d, v2.2s
-; CHECK-NEXT:    fcvtl2 v16.2d, v2.4s
-; CHECK-NEXT:    fcvtl2 v17.2d, v3.4s
-; CHECK-NEXT:    fcvtl v18.2d, v3.2s
-; CHECK-NEXT:    fcvtzu v1.2d, v4.2d
-; CHECK-NEXT:    fcvtzu v0.2d, v0.2d
-; CHECK-NEXT:    fcvtzu v3.2d, v5.2d
-; CHECK-NEXT:    fcvtzu v2.2d, v6.2d
-; CHECK-NEXT:    fcvtzu v4.2d, v7.2d
-; CHECK-NEXT:    fcvtzu v5.2d, v16.2d
-; CHECK-NEXT:    fcvtzu v7.2d, v17.2d
-; CHECK-NEXT:    fcvtzu v6.2d, v18.2d
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptou_v16f32_v16i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtl2 v4.2d, v0.4s
+; CHECK-SD-NEXT:    fcvtl v0.2d, v0.2s
+; CHECK-SD-NEXT:    fcvtl2 v5.2d, v1.4s
+; CHECK-SD-NEXT:    fcvtl v6.2d, v1.2s
+; CHECK-SD-NEXT:    fcvtl v7.2d, v2.2s
+; CHECK-SD-NEXT:    fcvtl2 v16.2d, v2.4s
+; CHECK-SD-NEXT:    fcvtl2 v17.2d, v3.4s
+; CHECK-SD-NEXT:    fcvtl v18.2d, v3.2s
+; CHECK-SD-NEXT:    fcvtzu v1.2d, v4.2d
+; CHECK-SD-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-SD-NEXT:    fcvtzu v3.2d, v5.2d
+; CHECK-SD-NEXT:    fcvtzu v2.2d, v6.2d
+; CHECK-SD-NEXT:    fcvtzu v4.2d, v7.2d
+; CHECK-SD-NEXT:    fcvtzu v5.2d, v16.2d
+; CHECK-SD-NEXT:    fcvtzu v7.2d, v17.2d
+; CHECK-SD-NEXT:    fcvtzu v6.2d, v18.2d
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptou_v16f32_v16i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtl v4.2d, v0.2s
+; CHECK-GI-NEXT:    fcvtl2 v5.2d, v0.4s
+; CHECK-GI-NEXT:    fcvtl v6.2d, v1.2s
+; CHECK-GI-NEXT:    fcvtl2 v7.2d, v1.4s
+; CHECK-GI-NEXT:    fcvtl v16.2d, v2.2s
+; CHECK-GI-NEXT:    fcvtl2 v17.2d, v2.4s
+; CHECK-GI-NEXT:    fcvtl v18.2d, v3.2s
+; CHECK-GI-NEXT:    fcvtl2 v19.2d, v3.4s
+; CHECK-GI-NEXT:    fcvtzu v0.2d, v4.2d
+; CHECK-GI-NEXT:    fcvtzu v1.2d, v5.2d
+; CHECK-GI-NEXT:    fcvtzu v2.2d, v6.2d
+; CHECK-GI-NEXT:    fcvtzu v3.2d, v7.2d
+; CHECK-GI-NEXT:    fcvtzu v4.2d, v16.2d
+; CHECK-GI-NEXT:    fcvtzu v5.2d, v17.2d
+; CHECK-GI-NEXT:    fcvtzu v6.2d, v18.2d
+; CHECK-GI-NEXT:    fcvtzu v7.2d, v19.2d
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptoui <16 x float> %a to <16 x i64>
   ret <16 x i64> %c
 }
 
 define <32 x i64> @fptos_v32f32_v32i64(<32 x float> %a) {
-; CHECK-LABEL: fptos_v32f32_v32i64:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtl2 v16.2d, v7.4s
-; CHECK-NEXT:    fcvtl v7.2d, v7.2s
-; CHECK-NEXT:    fcvtl2 v17.2d, v6.4s
-; CHECK-NEXT:    fcvtl v6.2d, v6.2s
-; CHECK-NEXT:    fcvtl2 v18.2d, v5.4s
-; CHECK-NEXT:    fcvtl v5.2d, v5.2s
-; CHECK-NEXT:    fcvtl2 v19.2d, v4.4s
-; CHECK-NEXT:    fcvtl v4.2d, v4.2s
-; CHECK-NEXT:    fcvtl2 v20.2d, v3.4s
-; CHECK-NEXT:    fcvtl v3.2d, v3.2s
-; CHECK-NEXT:    fcvtzs v16.2d, v16.2d
-; CHECK-NEXT:    fcvtzs v7.2d, v7.2d
-; CHECK-NEXT:    fcvtzs v17.2d, v17.2d
-; CHECK-NEXT:    fcvtzs v6.2d, v6.2d
-; CHECK-NEXT:    fcvtzs v18.2d, v18.2d
-; CHECK-NEXT:    fcvtzs v5.2d, v5.2d
-; CHECK-NEXT:    fcvtzs v4.2d, v4.2d
-; CHECK-NEXT:    fcvtzs v3.2d, v3.2d
-; CHECK-NEXT:    stp q7, q16, [x8, #224]
-; CHECK-NEXT:    fcvtl2 v7.2d, v2.4s
-; CHECK-NEXT:    fcvtzs v16.2d, v19.2d
-; CHECK-NEXT:    stp q5, q18, [x8, #160]
-; CHECK-NEXT:    fcvtl v2.2d, v2.2s
-; CHECK-NEXT:    fcvtl2 v5.2d, v0.4s
-; CHECK-NEXT:    stp q6, q17, [x8, #192]
-; CHECK-NEXT:    fcvtl2 v6.2d, v1.4s
-; CHECK-NEXT:    fcvtzs v17.2d, v20.2d
-; CHECK-NEXT:    fcvtl v1.2d, v1.2s
-; CHECK-NEXT:    fcvtl v0.2d, v0.2s
-; CHECK-NEXT:    stp q4, q16, [x8, #128]
-; CHECK-NEXT:    fcvtzs v7.2d, v7.2d
-; CHECK-NEXT:    fcvtzs v2.2d, v2.2d
-; CHECK-NEXT:    fcvtzs v4.2d, v6.2d
-; CHECK-NEXT:    stp q3, q17, [x8, #96]
-; CHECK-NEXT:    fcvtzs v3.2d, v5.2d
-; CHECK-NEXT:    fcvtzs v1.2d, v1.2d
-; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
-; CHECK-NEXT:    stp q2, q7, [x8, #64]
-; CHECK-NEXT:    stp q0, q3, [x8]
-; CHECK-NEXT:    stp q1, q4, [x8, #32]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptos_v32f32_v32i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtl2 v16.2d, v7.4s
+; CHECK-SD-NEXT:    fcvtl v7.2d, v7.2s
+; CHECK-SD-NEXT:    fcvtl2 v17.2d, v6.4s
+; CHECK-SD-NEXT:    fcvtl v6.2d, v6.2s
+; CHECK-SD-NEXT:    fcvtl2 v18.2d, v5.4s
+; CHECK-SD-NEXT:    fcvtl v5.2d, v5.2s
+; CHECK-SD-NEXT:    fcvtl2 v19.2d, v4.4s
+; CHECK-SD-NEXT:    fcvtl v4.2d, v4.2s
+; CHECK-SD-NEXT:    fcvtl2 v20.2d, v3.4s
+; CHECK-SD-NEXT:    fcvtl v3.2d, v3.2s
+; CHECK-SD-NEXT:    fcvtzs v16.2d, v16.2d
+; CHECK-SD-NEXT:    fcvtzs v7.2d, v7.2d
+; CHECK-SD-NEXT:    fcvtzs v17.2d, v17.2d
+; CHECK-SD-NEXT:    fcvtzs v6.2d, v6.2d
+; CHECK-SD-NEXT:    fcvtzs v18.2d, v18.2d
+; CHECK-SD-NEXT:    fcvtzs v5.2d, v5.2d
+; CHECK-SD-NEXT:    fcvtzs v4.2d, v4.2d
+; CHECK-SD-NEXT:    fcvtzs v3.2d, v3.2d
+; CHECK-SD-NEXT:    stp q7, q16, [x8, #224]
+; CHECK-SD-NEXT:    fcvtl2 v7.2d, v2.4s
+; CHECK-SD-NEXT:    fcvtzs v16.2d, v19.2d
+; CHECK-SD-NEXT:    stp q5, q18, [x8, #160]
+; CHECK-SD-NEXT:    fcvtl v2.2d, v2.2s
+; CHECK-SD-NEXT:    fcvtl2 v5.2d, v0.4s
+; CHECK-SD-NEXT:    stp q6, q17, [x8, #192]
+; CHECK-SD-NEXT:    fcvtl2 v6.2d, v1.4s
+; CHECK-SD-NEXT:    fcvtzs v17.2d, v20.2d
+; CHECK-SD-NEXT:    fcvtl v1.2d, v1.2s
+; CHECK-SD-NEXT:    fcvtl v0.2d, v0.2s
+; CHECK-SD-NEXT:    stp q4, q16, [x8, #128]
+; CHECK-SD-NEXT:    fcvtzs v7.2d, v7.2d
+; CHECK-SD-NEXT:    fcvtzs v2.2d, v2.2d
+; CHECK-SD-NEXT:    fcvtzs v4.2d, v6.2d
+; CHECK-SD-NEXT:    stp q3, q17, [x8, #96]
+; CHECK-SD-NEXT:    fcvtzs v3.2d, v5.2d
+; CHECK-SD-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-SD-NEXT:    stp q2, q7, [x8, #64]
+; CHECK-SD-NEXT:    stp q0, q3, [x8]
+; CHECK-SD-NEXT:    stp q1, q4, [x8, #32]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptos_v32f32_v32i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtl v16.2d, v0.2s
+; CHECK-GI-NEXT:    fcvtl2 v0.2d, v0.4s
+; CHECK-GI-NEXT:    fcvtl v17.2d, v1.2s
+; CHECK-GI-NEXT:    fcvtl2 v1.2d, v1.4s
+; CHECK-GI-NEXT:    fcvtl v18.2d, v2.2s
+; CHECK-GI-NEXT:    fcvtl2 v2.2d, v2.4s
+; CHECK-GI-NEXT:    fcvtl v19.2d, v3.2s
+; CHECK-GI-NEXT:    fcvtl2 v3.2d, v3.4s
+; CHECK-GI-NEXT:    fcvtl v20.2d, v4.2s
+; CHECK-GI-NEXT:    fcvtl2 v4.2d, v4.4s
+; CHECK-GI-NEXT:    fcvtzs v16.2d, v16.2d
+; CHECK-GI-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-GI-NEXT:    fcvtzs v17.2d, v17.2d
+; CHECK-GI-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-GI-NEXT:    fcvtzs v18.2d, v18.2d
+; CHECK-GI-NEXT:    fcvtzs v2.2d, v2.2d
+; CHECK-GI-NEXT:    fcvtzs v3.2d, v3.2d
+; CHECK-GI-NEXT:    fcvtzs v4.2d, v4.2d
+; CHECK-GI-NEXT:    stp q16, q0, [x8]
+; CHECK-GI-NEXT:    fcvtl v0.2d, v5.2s
+; CHECK-GI-NEXT:    fcvtl2 v5.2d, v5.4s
+; CHECK-GI-NEXT:    stp q17, q1, [x8, #32]
+; CHECK-GI-NEXT:    fcvtzs v16.2d, v19.2d
+; CHECK-GI-NEXT:    fcvtl v1.2d, v6.2s
+; CHECK-GI-NEXT:    stp q18, q2, [x8, #64]
+; CHECK-GI-NEXT:    fcvtl2 v6.2d, v6.4s
+; CHECK-GI-NEXT:    fcvtl v2.2d, v7.2s
+; CHECK-GI-NEXT:    fcvtl2 v7.2d, v7.4s
+; CHECK-GI-NEXT:    fcvtzs v17.2d, v20.2d
+; CHECK-GI-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-GI-NEXT:    fcvtzs v5.2d, v5.2d
+; CHECK-GI-NEXT:    stp q16, q3, [x8, #96]
+; CHECK-GI-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-GI-NEXT:    fcvtzs v3.2d, v6.2d
+; CHECK-GI-NEXT:    fcvtzs v2.2d, v2.2d
+; CHECK-GI-NEXT:    stp q17, q4, [x8, #128]
+; CHECK-GI-NEXT:    stp q0, q5, [x8, #160]
+; CHECK-GI-NEXT:    fcvtzs v0.2d, v7.2d
+; CHECK-GI-NEXT:    stp q1, q3, [x8, #192]
+; CHECK-GI-NEXT:    stp q2, q0, [x8, #224]
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptosi <32 x float> %a to <32 x i64>
   ret <32 x i64> %c
 }
 
 define <32 x i64> @fptou_v32f32_v32i64(<32 x float> %a) {
-; CHECK-LABEL: fptou_v32f32_v32i64:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtl2 v16.2d, v7.4s
-; CHECK-NEXT:    fcvtl v7.2d, v7.2s
-; CHECK-NEXT:    fcvtl2 v17.2d, v6.4s
-; CHECK-NEXT:    fcvtl v6.2d, v6.2s
-; CHECK-NEXT:    fcvtl2 v18.2d, v5.4s
-; CHECK-NEXT:    fcvtl v5.2d, v5.2s
-; CHECK-NEXT:    fcvtl2 v19.2d, v4.4s
-; CHECK-NEXT:    fcvtl v4.2d, v4.2s
-; CHECK-NEXT:    fcvtl2 v20.2d, v3.4s
-; CHECK-NEXT:    fcvtl v3.2d, v3.2s
-; CHECK-NEXT:    fcvtzu v16.2d, v16.2d
-; CHECK-NEXT:    fcvtzu v7.2d, v7.2d
-; CHECK-NEXT:    fcvtzu v17.2d, v17.2d
-; CHECK-NEXT:    fcvtzu v6.2d, v6.2d
-; CHECK-NEXT:    fcvtzu v18.2d, v18.2d
-; CHECK-NEXT:    fcvtzu v5.2d, v5.2d
-; CHECK-NEXT:    fcvtzu v4.2d, v4.2d
-; CHECK-NEXT:    fcvtzu v3.2d, v3.2d
-; CHECK-NEXT:    stp q7, q16, [x8, #224]
-; CHECK-NEXT:    fcvtl2 v7.2d, v2.4s
-; CHECK-NEXT:    fcvtzu v16.2d, v19.2d
-; CHECK-NEXT:    stp q5, q18, [x8, #160]
-; CHECK-NEXT:    fcvtl v2.2d, v2.2s
-; CHECK-NEXT:    fcvtl2 v5.2d, v0.4s
-; CHECK-NEXT:    stp q6, q17, [x8, #192]
-; CHECK-NEXT:    fcvtl2 v6.2d, v1.4s
-; CHECK-NEXT:    fcvtzu v17.2d, v20.2d
-; CHECK-NEXT:    fcvtl v1.2d, v1.2s
-; CHECK-NEXT:    fcvtl v0.2d, v0.2s
-; CHECK-NEXT:    stp q4, q16, [x8, #128]
-; CHECK-NEXT:    fcvtzu v7.2d, v7.2d
-; CHECK-NEXT:    fcvtzu v2.2d, v2.2d
-; CHECK-NEXT:    fcvtzu v4.2d, v6.2d
-; CHECK-NEXT:    stp q3, q17, [x8, #96]
-; CHECK-NEXT:    fcvtzu v3.2d, v5.2d
-; CHECK-NEXT:    fcvtzu v1.2d, v1.2d
-; CHECK-NEXT:    fcvtzu v0.2d, v0.2d
-; CHECK-NEXT:    stp q2, q7, [x8, #64]
-; CHECK-NEXT:    stp q0, q3, [x8]
-; CHECK-NEXT:    stp q1, q4, [x8, #32]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptou_v32f32_v32i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtl2 v16.2d, v7.4s
+; CHECK-SD-NEXT:    fcvtl v7.2d, v7.2s
+; CHECK-SD-NEXT:    fcvtl2 v17.2d, v6.4s
+; CHECK-SD-NEXT:    fcvtl v6.2d, v6.2s
+; CHECK-SD-NEXT:    fcvtl2 v18.2d, v5.4s
+; CHECK-SD-NEXT:    fcvtl v5.2d, v5.2s
+; CHECK-SD-NEXT:    fcvtl2 v19.2d, v4.4s
+; CHECK-SD-NEXT:    fcvtl v4.2d, v4.2s
+; CHECK-SD-NEXT:    fcvtl2 v20.2d, v3.4s
+; CHECK-SD-NEXT:    fcvtl v3.2d, v3.2s
+; CHECK-SD-NEXT:    fcvtzu v16.2d, v16.2d
+; CHECK-SD-NEXT:    fcvtzu v7.2d, v7.2d
+; CHECK-SD-NEXT:    fcvtzu v17.2d, v17.2d
+; CHECK-SD-NEXT:    fcvtzu v6.2d, v6.2d
+; CHECK-SD-NEXT:    fcvtzu v18.2d, v18.2d
+; CHECK-SD-NEXT:    fcvtzu v5.2d, v5.2d
+; CHECK-SD-NEXT:    fcvtzu v4.2d, v4.2d
+; CHECK-SD-NEXT:    fcvtzu v3.2d, v3.2d
+; CHECK-SD-NEXT:    stp q7, q16, [x8, #224]
+; CHECK-SD-NEXT:    fcvtl2 v7.2d, v2.4s
+; CHECK-SD-NEXT:    fcvtzu v16.2d, v19.2d
+; CHECK-SD-NEXT:    stp q5, q18, [x8, #160]
+; CHECK-SD-NEXT:    fcvtl v2.2d, v2.2s
+; CHECK-SD-NEXT:    fcvtl2 v5.2d, v0.4s
+; CHECK-SD-NEXT:    stp q6, q17, [x8, #192]
+; CHECK-SD-NEXT:    fcvtl2 v6.2d, v1.4s
+; CHECK-SD-NEXT:    fcvtzu v17.2d, v20.2d
+; CHECK-SD-NEXT:    fcvtl v1.2d, v1.2s
+; CHECK-SD-NEXT:    fcvtl v0.2d, v0.2s
+; CHECK-SD-NEXT:    stp q4, q16, [x8, #128]
+; CHECK-SD-NEXT:    fcvtzu v7.2d, v7.2d
+; CHECK-SD-NEXT:    fcvtzu v2.2d, v2.2d
+; CHECK-SD-NEXT:    fcvtzu v4.2d, v6.2d
+; CHECK-SD-NEXT:    stp q3, q17, [x8, #96]
+; CHECK-SD-NEXT:    fcvtzu v3.2d, v5.2d
+; CHECK-SD-NEXT:    fcvtzu v1.2d, v1.2d
+; CHECK-SD-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-SD-NEXT:    stp q2, q7, [x8, #64]
+; CHECK-SD-NEXT:    stp q0, q3, [x8]
+; CHECK-SD-NEXT:    stp q1, q4, [x8, #32]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptou_v32f32_v32i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtl v16.2d, v0.2s
+; CHECK-GI-NEXT:    fcvtl2 v0.2d, v0.4s
+; CHECK-GI-NEXT:    fcvtl v17.2d, v1.2s
+; CHECK-GI-NEXT:    fcvtl2 v1.2d, v1.4s
+; CHECK-GI-NEXT:    fcvtl v18.2d, v2.2s
+; CHECK-GI-NEXT:    fcvtl2 v2.2d, v2.4s
+; CHECK-GI-NEXT:    fcvtl v19.2d, v3.2s
+; CHECK-GI-NEXT:    fcvtl2 v3.2d, v3.4s
+; CHECK-GI-NEXT:    fcvtl v20.2d, v4.2s
+; CHECK-GI-NEXT:    fcvtl2 v4.2d, v4.4s
+; CHECK-GI-NEXT:    fcvtzu v16.2d, v16.2d
+; CHECK-GI-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-GI-NEXT:    fcvtzu v17.2d, v17.2d
+; CHECK-GI-NEXT:    fcvtzu v1.2d, v1.2d
+; CHECK-GI-NEXT:    fcvtzu v18.2d, v18.2d
+; CHECK-GI-NEXT:    fcvtzu v2.2d, v2.2d
+; CHECK-GI-NEXT:    fcvtzu v3.2d, v3.2d
+; CHECK-GI-NEXT:    fcvtzu v4.2d, v4.2d
+; CHECK-GI-NEXT:    stp q16, q0, [x8]
+; CHECK-GI-NEXT:    fcvtl v0.2d, v5.2s
+; CHECK-GI-NEXT:    fcvtl2 v5.2d, v5.4s
+; CHECK-GI-NEXT:    stp q17, q1, [x8, #32]
+; CHECK-GI-NEXT:    fcvtzu v16.2d, v19.2d
+; CHECK-GI-NEXT:    fcvtl v1.2d, v6.2s
+; CHECK-GI-NEXT:    stp q18, q2, [x8, #64]
+; CHECK-GI-NEXT:    fcvtl2 v6.2d, v6.4s
+; CHECK-GI-NEXT:    fcvtl v2.2d, v7.2s
+; CHECK-GI-NEXT:    fcvtl2 v7.2d, v7.4s
+; CHECK-GI-NEXT:    fcvtzu v17.2d, v20.2d
+; CHECK-GI-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-GI-NEXT:    fcvtzu v5.2d, v5.2d
+; CHECK-GI-NEXT:    stp q16, q3, [x8, #96]
+; CHECK-GI-NEXT:    fcvtzu v1.2d, v1.2d
+; CHECK-GI-NEXT:    fcvtzu v3.2d, v6.2d
+; CHECK-GI-NEXT:    fcvtzu v2.2d, v2.2d
+; CHECK-GI-NEXT:    stp q17, q4, [x8, #128]
+; CHECK-GI-NEXT:    stp q0, q5, [x8, #160]
+; CHECK-GI-NEXT:    fcvtzu v0.2d, v7.2d
+; CHECK-GI-NEXT:    stp q1, q3, [x8, #192]
+; CHECK-GI-NEXT:    stp q2, q0, [x8, #224]
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptoui <32 x float> %a to <32 x i64>
   ret <32 x i64> %c
@@ -1986,32 +2680,59 @@ entry:
 }
 
 define <2 x i16> @fptou_v2f32_v2i16(<2 x float> %a) {
-; CHECK-LABEL: fptou_v2f32_v2i16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzs v0.2s, v0.2s
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptou_v2f32_v2i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtzs v0.2s, v0.2s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptou_v2f32_v2i16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtzu v0.2s, v0.2s
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptoui <2 x float> %a to <2 x i16>
   ret <2 x i16> %c
 }
 
 define <3 x i16> @fptos_v3f32_v3i16(<3 x float> %a) {
-; CHECK-LABEL: fptos_v3f32_v3i16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-NEXT:    xtn v0.4h, v0.4s
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptos_v3f32_v3i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-SD-NEXT:    xtn v0.4h, v0.4s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptos_v3f32_v3i16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-GI-NEXT:    mov s1, v0.s[1]
+; CHECK-GI-NEXT:    mov s2, v0.s[2]
+; CHECK-GI-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-GI-NEXT:    mov v0.h[2], v2.h[0]
+; CHECK-GI-NEXT:    mov v0.h[3], v0.h[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptosi <3 x float> %a to <3 x i16>
   ret <3 x i16> %c
 }
 
 define <3 x i16> @fptou_v3f32_v3i16(<3 x float> %a) {
-; CHECK-LABEL: fptou_v3f32_v3i16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzu v0.4s, v0.4s
-; CHECK-NEXT:    xtn v0.4h, v0.4s
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptou_v3f32_v3i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-SD-NEXT:    xtn v0.4h, v0.4s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptou_v3f32_v3i16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-GI-NEXT:    mov s1, v0.s[1]
+; CHECK-GI-NEXT:    mov s2, v0.s[2]
+; CHECK-GI-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-GI-NEXT:    mov v0.h[2], v2.h[0]
+; CHECK-GI-NEXT:    mov v0.h[3], v0.h[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptoui <3 x float> %a to <3 x i16>
   ret <3 x i16> %c
@@ -2040,96 +2761,162 @@ entry:
 }
 
 define <8 x i16> @fptos_v8f32_v8i16(<8 x float> %a) {
-; CHECK-LABEL: fptos_v8f32_v8i16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzs v1.4s, v1.4s
-; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptos_v8f32_v8i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtzs v1.4s, v1.4s
+; CHECK-SD-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-SD-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptos_v8f32_v8i16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-GI-NEXT:    fcvtzs v1.4s, v1.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptosi <8 x float> %a to <8 x i16>
   ret <8 x i16> %c
 }
 
 define <8 x i16> @fptou_v8f32_v8i16(<8 x float> %a) {
-; CHECK-LABEL: fptou_v8f32_v8i16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzu v1.4s, v1.4s
-; CHECK-NEXT:    fcvtzu v0.4s, v0.4s
-; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptou_v8f32_v8i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtzu v1.4s, v1.4s
+; CHECK-SD-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-SD-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptou_v8f32_v8i16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-GI-NEXT:    fcvtzu v1.4s, v1.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptoui <8 x float> %a to <8 x i16>
   ret <8 x i16> %c
 }
 
 define <16 x i16> @fptos_v16f32_v16i16(<16 x float> %a) {
-; CHECK-LABEL: fptos_v16f32_v16i16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzs v1.4s, v1.4s
-; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-NEXT:    fcvtzs v3.4s, v3.4s
-; CHECK-NEXT:    fcvtzs v2.4s, v2.4s
-; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    uzp1 v1.8h, v2.8h, v3.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptos_v16f32_v16i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtzs v1.4s, v1.4s
+; CHECK-SD-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-SD-NEXT:    fcvtzs v3.4s, v3.4s
+; CHECK-SD-NEXT:    fcvtzs v2.4s, v2.4s
+; CHECK-SD-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT:    uzp1 v1.8h, v2.8h, v3.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptos_v16f32_v16i16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-GI-NEXT:    fcvtzs v1.4s, v1.4s
+; CHECK-GI-NEXT:    fcvtzs v2.4s, v2.4s
+; CHECK-GI-NEXT:    fcvtzs v3.4s, v3.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    uzp1 v1.8h, v2.8h, v3.8h
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptosi <16 x float> %a to <16 x i16>
   ret <16 x i16> %c
 }
 
 define <16 x i16> @fptou_v16f32_v16i16(<16 x float> %a) {
-; CHECK-LABEL: fptou_v16f32_v16i16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzu v1.4s, v1.4s
-; CHECK-NEXT:    fcvtzu v0.4s, v0.4s
-; CHECK-NEXT:    fcvtzu v3.4s, v3.4s
-; CHECK-NEXT:    fcvtzu v2.4s, v2.4s
-; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    uzp1 v1.8h, v2.8h, v3.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptou_v16f32_v16i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtzu v1.4s, v1.4s
+; CHECK-SD-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-SD-NEXT:    fcvtzu v3.4s, v3.4s
+; CHECK-SD-NEXT:    fcvtzu v2.4s, v2.4s
+; CHECK-SD-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT:    uzp1 v1.8h, v2.8h, v3.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptou_v16f32_v16i16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-GI-NEXT:    fcvtzu v1.4s, v1.4s
+; CHECK-GI-NEXT:    fcvtzu v2.4s, v2.4s
+; CHECK-GI-NEXT:    fcvtzu v3.4s, v3.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    uzp1 v1.8h, v2.8h, v3.8h
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptoui <16 x float> %a to <16 x i16>
   ret <16 x i16> %c
 }
 
 define <32 x i16> @fptos_v32f32_v32i16(<32 x float> %a) {
-; CHECK-LABEL: fptos_v32f32_v32i16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzs v3.4s, v3.4s
-; CHECK-NEXT:    fcvtzs v1.4s, v1.4s
-; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-NEXT:    fcvtzs v2.4s, v2.4s
-; CHECK-NEXT:    fcvtzs v5.4s, v5.4s
-; CHECK-NEXT:    fcvtzs v4.4s, v4.4s
-; CHECK-NEXT:    fcvtzs v7.4s, v7.4s
-; CHECK-NEXT:    fcvtzs v6.4s, v6.4s
-; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    uzp1 v1.8h, v2.8h, v3.8h
-; CHECK-NEXT:    uzp1 v2.8h, v4.8h, v5.8h
-; CHECK-NEXT:    uzp1 v3.8h, v6.8h, v7.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptos_v32f32_v32i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtzs v3.4s, v3.4s
+; CHECK-SD-NEXT:    fcvtzs v1.4s, v1.4s
+; CHECK-SD-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-SD-NEXT:    fcvtzs v2.4s, v2.4s
+; CHECK-SD-NEXT:    fcvtzs v5.4s, v5.4s
+; CHECK-SD-NEXT:    fcvtzs v4.4s, v4.4s
+; CHECK-SD-NEXT:    fcvtzs v7.4s, v7.4s
+; CHECK-SD-NEXT:    fcvtzs v6.4s, v6.4s
+; CHECK-SD-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT:    uzp1 v1.8h, v2.8h, v3.8h
+; CHECK-SD-NEXT:    uzp1 v2.8h, v4.8h, v5.8h
+; CHECK-SD-NEXT:    uzp1 v3.8h, v6.8h, v7.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptos_v32f32_v32i16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-GI-NEXT:    fcvtzs v1.4s, v1.4s
+; CHECK-GI-NEXT:    fcvtzs v2.4s, v2.4s
+; CHECK-GI-NEXT:    fcvtzs v3.4s, v3.4s
+; CHECK-GI-NEXT:    fcvtzs v4.4s, v4.4s
+; CHECK-GI-NEXT:    fcvtzs v5.4s, v5.4s
+; CHECK-GI-NEXT:    fcvtzs v6.4s, v6.4s
+; CHECK-GI-NEXT:    fcvtzs v7.4s, v7.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    uzp1 v1.8h, v2.8h, v3.8h
+; CHECK-GI-NEXT:    uzp1 v2.8h, v4.8h, v5.8h
+; CHECK-GI-NEXT:    uzp1 v3.8h, v6.8h, v7.8h
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptosi <32 x float> %a to <32 x i16>
   ret <32 x i16> %c
 }
 
 define <32 x i16> @fptou_v32f32_v32i16(<32 x float> %a) {
-; CHECK-LABEL: fptou_v32f32_v32i16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzu v3.4s, v3.4s
-; CHECK-NEXT:    fcvtzu v1.4s, v1.4s
-; CHECK-NEXT:    fcvtzu v0.4s, v0.4s
-; CHECK-NEXT:    fcvtzu v2.4s, v2.4s
-; CHECK-NEXT:    fcvtzu v5.4s, v5.4s
-; CHECK-NEXT:    fcvtzu v4.4s, v4.4s
-; CHECK-NEXT:    fcvtzu v7.4s, v7.4s
-; CHECK-NEXT:    fcvtzu v6.4s, v6.4s
-; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    uzp1 v1.8h, v2.8h, v3.8h
-; CHECK-NEXT:    uzp1 v2.8h, v4.8h, v5.8h
-; CHECK-NEXT:    uzp1 v3.8h, v6.8h, v7.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptou_v32f32_v32i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtzu v3.4s, v3.4s
+; CHECK-SD-NEXT:    fcvtzu v1.4s, v1.4s
+; CHECK-SD-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-SD-NEXT:    fcvtzu v2.4s, v2.4s
+; CHECK-SD-NEXT:    fcvtzu v5.4s, v5.4s
+; CHECK-SD-NEXT:    fcvtzu v4.4s, v4.4s
+; CHECK-SD-NEXT:    fcvtzu v7.4s, v7.4s
+; CHECK-SD-NEXT:    fcvtzu v6.4s, v6.4s
+; CHECK-SD-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT:    uzp1 v1.8h, v2.8h, v3.8h
+; CHECK-SD-NEXT:    uzp1 v2.8h, v4.8h, v5.8h
+; CHECK-SD-NEXT:    uzp1 v3.8h, v6.8h, v7.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptou_v32f32_v32i16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-GI-NEXT:    fcvtzu v1.4s, v1.4s
+; CHECK-GI-NEXT:    fcvtzu v2.4s, v2.4s
+; CHECK-GI-NEXT:    fcvtzu v3.4s, v3.4s
+; CHECK-GI-NEXT:    fcvtzu v4.4s, v4.4s
+; CHECK-GI-NEXT:    fcvtzu v5.4s, v5.4s
+; CHECK-GI-NEXT:    fcvtzu v6.4s, v6.4s
+; CHECK-GI-NEXT:    fcvtzu v7.4s, v7.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    uzp1 v1.8h, v2.8h, v3.8h
+; CHECK-GI-NEXT:    uzp1 v2.8h, v4.8h, v5.8h
+; CHECK-GI-NEXT:    uzp1 v3.8h, v6.8h, v7.8h
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptoui <32 x float> %a to <32 x i16>
   ret <32 x i16> %c
@@ -2146,38 +2933,63 @@ entry:
 }
 
 define <2 x i8> @fptou_v2f32_v2i8(<2 x float> %a) {
-; CHECK-LABEL: fptou_v2f32_v2i8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzs v0.2s, v0.2s
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptou_v2f32_v2i8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtzs v0.2s, v0.2s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptou_v2f32_v2i8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtzu v0.2s, v0.2s
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptoui <2 x float> %a to <2 x i8>
   ret <2 x i8> %c
 }
 
 define <3 x i8> @fptos_v3f32_v3i8(<3 x float> %a) {
-; CHECK-LABEL: fptos_v3f32_v3i8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-NEXT:    xtn v0.4h, v0.4s
-; CHECK-NEXT:    umov w0, v0.h[0]
-; CHECK-NEXT:    umov w1, v0.h[1]
-; CHECK-NEXT:    umov w2, v0.h[2]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptos_v3f32_v3i8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-SD-NEXT:    xtn v0.4h, v0.4s
+; CHECK-SD-NEXT:    umov w0, v0.h[0]
+; CHECK-SD-NEXT:    umov w1, v0.h[1]
+; CHECK-SD-NEXT:    umov w2, v0.h[2]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptos_v3f32_v3i8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-GI-NEXT:    mov s1, v0.s[1]
+; CHECK-GI-NEXT:    mov s2, v0.s[2]
+; CHECK-GI-NEXT:    fmov w0, s0
+; CHECK-GI-NEXT:    fmov w1, s1
+; CHECK-GI-NEXT:    fmov w2, s2
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptosi <3 x float> %a to <3 x i8>
   ret <3 x i8> %c
 }
 
 define <3 x i8> @fptou_v3f32_v3i8(<3 x float> %a) {
-; CHECK-LABEL: fptou_v3f32_v3i8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-NEXT:    xtn v0.4h, v0.4s
-; CHECK-NEXT:    umov w0, v0.h[0]
-; CHECK-NEXT:    umov w1, v0.h[1]
-; CHECK-NEXT:    umov w2, v0.h[2]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptou_v3f32_v3i8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-SD-NEXT:    xtn v0.4h, v0.4s
+; CHECK-SD-NEXT:    umov w0, v0.h[0]
+; CHECK-SD-NEXT:    umov w1, v0.h[1]
+; CHECK-SD-NEXT:    umov w2, v0.h[2]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptou_v3f32_v3i8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-GI-NEXT:    mov s1, v0.s[1]
+; CHECK-GI-NEXT:    mov s2, v0.s[2]
+; CHECK-GI-NEXT:    fmov w0, s0
+; CHECK-GI-NEXT:    fmov w1, s1
+; CHECK-GI-NEXT:    fmov w2, s2
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptoui <3 x float> %a to <3 x i8>
   ret <3 x i8> %c
@@ -2195,129 +3007,209 @@ entry:
 }
 
 define <4 x i8> @fptou_v4f32_v4i8(<4 x float> %a) {
-; CHECK-LABEL: fptou_v4f32_v4i8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-NEXT:    xtn v0.4h, v0.4s
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptou_v4f32_v4i8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-SD-NEXT:    xtn v0.4h, v0.4s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptou_v4f32_v4i8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-GI-NEXT:    xtn v0.4h, v0.4s
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptoui <4 x float> %a to <4 x i8>
   ret <4 x i8> %c
 }
 
 define <8 x i8> @fptos_v8f32_v8i8(<8 x float> %a) {
-; CHECK-LABEL: fptos_v8f32_v8i8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzs v1.4s, v1.4s
-; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-NEXT:    xtn v1.4h, v1.4s
-; CHECK-NEXT:    xtn v0.4h, v0.4s
-; CHECK-NEXT:    uzp1 v0.8b, v0.8b, v1.8b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptos_v8f32_v8i8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtzs v1.4s, v1.4s
+; CHECK-SD-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-SD-NEXT:    xtn v1.4h, v1.4s
+; CHECK-SD-NEXT:    xtn v0.4h, v0.4s
+; CHECK-SD-NEXT:    uzp1 v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptos_v8f32_v8i8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-GI-NEXT:    fcvtzs v1.4s, v1.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    xtn v0.8b, v0.8h
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptosi <8 x float> %a to <8 x i8>
   ret <8 x i8> %c
 }
 
 define <8 x i8> @fptou_v8f32_v8i8(<8 x float> %a) {
-; CHECK-LABEL: fptou_v8f32_v8i8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzs v1.4s, v1.4s
-; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-NEXT:    xtn v1.4h, v1.4s
-; CHECK-NEXT:    xtn v0.4h, v0.4s
-; CHECK-NEXT:    uzp1 v0.8b, v0.8b, v1.8b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptou_v8f32_v8i8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtzs v1.4s, v1.4s
+; CHECK-SD-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-SD-NEXT:    xtn v1.4h, v1.4s
+; CHECK-SD-NEXT:    xtn v0.4h, v0.4s
+; CHECK-SD-NEXT:    uzp1 v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptou_v8f32_v8i8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-GI-NEXT:    fcvtzu v1.4s, v1.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    xtn v0.8b, v0.8h
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptoui <8 x float> %a to <8 x i8>
   ret <8 x i8> %c
 }
 
 define <16 x i8> @fptos_v16f32_v16i8(<16 x float> %a) {
-; CHECK-LABEL: fptos_v16f32_v16i8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzs v3.4s, v3.4s
-; CHECK-NEXT:    fcvtzs v2.4s, v2.4s
-; CHECK-NEXT:    fcvtzs v1.4s, v1.4s
-; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-NEXT:    xtn v3.4h, v3.4s
-; CHECK-NEXT:    xtn v2.4h, v2.4s
-; CHECK-NEXT:    xtn v1.4h, v1.4s
-; CHECK-NEXT:    xtn v0.4h, v0.4s
-; CHECK-NEXT:    mov v2.d[1], v3.d[0]
-; CHECK-NEXT:    mov v0.d[1], v1.d[0]
-; CHECK-NEXT:    uzp1 v0.16b, v0.16b, v2.16b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptos_v16f32_v16i8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtzs v3.4s, v3.4s
+; CHECK-SD-NEXT:    fcvtzs v2.4s, v2.4s
+; CHECK-SD-NEXT:    fcvtzs v1.4s, v1.4s
+; CHECK-SD-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-SD-NEXT:    xtn v3.4h, v3.4s
+; CHECK-SD-NEXT:    xtn v2.4h, v2.4s
+; CHECK-SD-NEXT:    xtn v1.4h, v1.4s
+; CHECK-SD-NEXT:    xtn v0.4h, v0.4s
+; CHECK-SD-NEXT:    mov v2.d[1], v3.d[0]
+; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT:    uzp1 v0.16b, v0.16b, v2.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptos_v16f32_v16i8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-GI-NEXT:    fcvtzs v1.4s, v1.4s
+; CHECK-GI-NEXT:    fcvtzs v2.4s, v2.4s
+; CHECK-GI-NEXT:    fcvtzs v3.4s, v3.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    uzp1 v1.8h, v2.8h, v3.8h
+; CHECK-GI-NEXT:    uzp1 v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptosi <16 x float> %a to <16 x i8>
   ret <16 x i8> %c
 }
 
 define <16 x i8> @fptou_v16f32_v16i8(<16 x float> %a) {
-; CHECK-LABEL: fptou_v16f32_v16i8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzs v3.4s, v3.4s
-; CHECK-NEXT:    fcvtzs v2.4s, v2.4s
-; CHECK-NEXT:    fcvtzs v1.4s, v1.4s
-; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-NEXT:    uzp1 v2.8h, v2.8h, v3.8h
-; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    uzp1 v0.16b, v0.16b, v2.16b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptou_v16f32_v16i8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtzs v3.4s, v3.4s
+; CHECK-SD-NEXT:    fcvtzs v2.4s, v2.4s
+; CHECK-SD-NEXT:    fcvtzs v1.4s, v1.4s
+; CHECK-SD-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-SD-NEXT:    uzp1 v2.8h, v2.8h, v3.8h
+; CHECK-SD-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT:    uzp1 v0.16b, v0.16b, v2.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptou_v16f32_v16i8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-GI-NEXT:    fcvtzu v1.4s, v1.4s
+; CHECK-GI-NEXT:    fcvtzu v2.4s, v2.4s
+; CHECK-GI-NEXT:    fcvtzu v3.4s, v3.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    uzp1 v1.8h, v2.8h, v3.8h
+; CHECK-GI-NEXT:    uzp1 v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptoui <16 x float> %a to <16 x i8>
   ret <16 x i8> %c
 }
 
 define <32 x i8> @fptos_v32f32_v32i8(<32 x float> %a) {
-; CHECK-LABEL: fptos_v32f32_v32i8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzs v3.4s, v3.4s
-; CHECK-NEXT:    fcvtzs v2.4s, v2.4s
-; CHECK-NEXT:    fcvtzs v1.4s, v1.4s
-; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-NEXT:    fcvtzs v7.4s, v7.4s
-; CHECK-NEXT:    fcvtzs v6.4s, v6.4s
-; CHECK-NEXT:    fcvtzs v5.4s, v5.4s
-; CHECK-NEXT:    fcvtzs v4.4s, v4.4s
-; CHECK-NEXT:    xtn v3.4h, v3.4s
-; CHECK-NEXT:    xtn v2.4h, v2.4s
-; CHECK-NEXT:    xtn v1.4h, v1.4s
-; CHECK-NEXT:    xtn v0.4h, v0.4s
-; CHECK-NEXT:    xtn v7.4h, v7.4s
-; CHECK-NEXT:    xtn v6.4h, v6.4s
-; CHECK-NEXT:    xtn v5.4h, v5.4s
-; CHECK-NEXT:    xtn v4.4h, v4.4s
-; CHECK-NEXT:    mov v2.d[1], v3.d[0]
-; CHECK-NEXT:    mov v0.d[1], v1.d[0]
-; CHECK-NEXT:    mov v6.d[1], v7.d[0]
-; CHECK-NEXT:    mov v4.d[1], v5.d[0]
-; CHECK-NEXT:    uzp1 v0.16b, v0.16b, v2.16b
-; CHECK-NEXT:    uzp1 v1.16b, v4.16b, v6.16b
-; CHECK-NEXT:    ret
-entry:
-  %c = fptosi <32 x float> %a to <32 x i8>
-  ret <32 x i8> %c
-}
-
-define <32 x i8> @fptou_v32f32_v32i8(<32 x float> %a) {
-; CHECK-LABEL: fptou_v32f32_v32i8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzs v3.4s, v3.4s
-; CHECK-NEXT:    fcvtzs v2.4s, v2.4s
-; CHECK-NEXT:    fcvtzs v1.4s, v1.4s
-; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-NEXT:    fcvtzs v7.4s, v7.4s
-; CHECK-NEXT:    fcvtzs v6.4s, v6.4s
-; CHECK-NEXT:    fcvtzs v5.4s, v5.4s
-; CHECK-NEXT:    fcvtzs v4.4s, v4.4s
-; CHECK-NEXT:    uzp1 v2.8h, v2.8h, v3.8h
-; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    uzp1 v1.8h, v6.8h, v7.8h
-; CHECK-NEXT:    uzp1 v3.8h, v4.8h, v5.8h
-; CHECK-NEXT:    uzp1 v0.16b, v0.16b, v2.16b
-; CHECK-NEXT:    uzp1 v1.16b, v3.16b, v1.16b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptos_v32f32_v32i8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtzs v3.4s, v3.4s
+; CHECK-SD-NEXT:    fcvtzs v2.4s, v2.4s
+; CHECK-SD-NEXT:    fcvtzs v1.4s, v1.4s
+; CHECK-SD-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-SD-NEXT:    fcvtzs v7.4s, v7.4s
+; CHECK-SD-NEXT:    fcvtzs v6.4s, v6.4s
+; CHECK-SD-NEXT:    fcvtzs v5.4s, v5.4s
+; CHECK-SD-NEXT:    fcvtzs v4.4s, v4.4s
+; CHECK-SD-NEXT:    xtn v3.4h, v3.4s
+; CHECK-SD-NEXT:    xtn v2.4h, v2.4s
+; CHECK-SD-NEXT:    xtn v1.4h, v1.4s
+; CHECK-SD-NEXT:    xtn v0.4h, v0.4s
+; CHECK-SD-NEXT:    xtn v7.4h, v7.4s
+; CHECK-SD-NEXT:    xtn v6.4h, v6.4s
+; CHECK-SD-NEXT:    xtn v5.4h, v5.4s
+; CHECK-SD-NEXT:    xtn v4.4h, v4.4s
+; CHECK-SD-NEXT:    mov v2.d[1], v3.d[0]
+; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT:    mov v6.d[1], v7.d[0]
+; CHECK-SD-NEXT:    mov v4.d[1], v5.d[0]
+; CHECK-SD-NEXT:    uzp1 v0.16b, v0.16b, v2.16b
+; CHECK-SD-NEXT:    uzp1 v1.16b, v4.16b, v6.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptos_v32f32_v32i8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-GI-NEXT:    fcvtzs v1.4s, v1.4s
+; CHECK-GI-NEXT:    fcvtzs v2.4s, v2.4s
+; CHECK-GI-NEXT:    fcvtzs v3.4s, v3.4s
+; CHECK-GI-NEXT:    fcvtzs v4.4s, v4.4s
+; CHECK-GI-NEXT:    fcvtzs v5.4s, v5.4s
+; CHECK-GI-NEXT:    fcvtzs v6.4s, v6.4s
+; CHECK-GI-NEXT:    fcvtzs v7.4s, v7.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    uzp1 v1.8h, v2.8h, v3.8h
+; CHECK-GI-NEXT:    uzp1 v2.8h, v4.8h, v5.8h
+; CHECK-GI-NEXT:    uzp1 v3.8h, v6.8h, v7.8h
+; CHECK-GI-NEXT:    uzp1 v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT:    uzp1 v1.16b, v2.16b, v3.16b
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = fptosi <32 x float> %a to <32 x i8>
+  ret <32 x i8> %c
+}
+
+define <32 x i8> @fptou_v32f32_v32i8(<32 x float> %a) {
+; CHECK-SD-LABEL: fptou_v32f32_v32i8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtzs v3.4s, v3.4s
+; CHECK-SD-NEXT:    fcvtzs v2.4s, v2.4s
+; CHECK-SD-NEXT:    fcvtzs v1.4s, v1.4s
+; CHECK-SD-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-SD-NEXT:    fcvtzs v7.4s, v7.4s
+; CHECK-SD-NEXT:    fcvtzs v6.4s, v6.4s
+; CHECK-SD-NEXT:    fcvtzs v5.4s, v5.4s
+; CHECK-SD-NEXT:    fcvtzs v4.4s, v4.4s
+; CHECK-SD-NEXT:    uzp1 v2.8h, v2.8h, v3.8h
+; CHECK-SD-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT:    uzp1 v1.8h, v6.8h, v7.8h
+; CHECK-SD-NEXT:    uzp1 v3.8h, v4.8h, v5.8h
+; CHECK-SD-NEXT:    uzp1 v0.16b, v0.16b, v2.16b
+; CHECK-SD-NEXT:    uzp1 v1.16b, v3.16b, v1.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptou_v32f32_v32i8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-GI-NEXT:    fcvtzu v1.4s, v1.4s
+; CHECK-GI-NEXT:    fcvtzu v2.4s, v2.4s
+; CHECK-GI-NEXT:    fcvtzu v3.4s, v3.4s
+; CHECK-GI-NEXT:    fcvtzu v4.4s, v4.4s
+; CHECK-GI-NEXT:    fcvtzu v5.4s, v5.4s
+; CHECK-GI-NEXT:    fcvtzu v6.4s, v6.4s
+; CHECK-GI-NEXT:    fcvtzu v7.4s, v7.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    uzp1 v1.8h, v2.8h, v3.8h
+; CHECK-GI-NEXT:    uzp1 v2.8h, v4.8h, v5.8h
+; CHECK-GI-NEXT:    uzp1 v3.8h, v6.8h, v7.8h
+; CHECK-GI-NEXT:    uzp1 v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT:    uzp1 v1.16b, v2.16b, v3.16b
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptoui <32 x float> %a to <32 x i8>
   ret <32 x i8> %c
@@ -2348,24 +3240,26 @@ define <2 x i64> @fptos_v2f16_v2i64(<2 x half> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fptos_v2f16_v2i64:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NOFP16-NEXT:    fmov x8, d0
+; CHECK-GI-NOFP16-NEXT:    fmov s0, w8
 ; CHECK-GI-NOFP16-NEXT:    mov h1, v0.h[1]
-; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x8, s0
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x9, s1
-; CHECK-GI-NOFP16-NEXT:    fmov d0, x8
-; CHECK-GI-NOFP16-NEXT:    mov v0.d[1], x9
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[2], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[3], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v0.2d, v0.2s
+; CHECK-GI-NOFP16-NEXT:    fcvtzs v0.2d, v0.2d
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fptos_v2f16_v2i64:
 ; CHECK-GI-FP16:       // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-FP16-NEXT:    fmov x8, d0
+; CHECK-GI-FP16-NEXT:    fmov s0, w8
 ; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
-; CHECK-GI-FP16-NEXT:    fcvtzs x8, h0
-; CHECK-GI-FP16-NEXT:    fcvtzs x9, h1
-; CHECK-GI-FP16-NEXT:    fmov d0, x8
-; CHECK-GI-FP16-NEXT:    mov v0.d[1], x9
+; CHECK-GI-FP16-NEXT:    fcvt d0, h0
+; CHECK-GI-FP16-NEXT:    fcvt d1, h1
+; CHECK-GI-FP16-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-FP16-NEXT:    fcvtzs v0.2d, v0.2d
 ; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = fptosi <2 x half> %a to <2 x i64>
@@ -2397,24 +3291,26 @@ define <2 x i64> @fptou_v2f16_v2i64(<2 x half> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fptou_v2f16_v2i64:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NOFP16-NEXT:    fmov x8, d0
+; CHECK-GI-NOFP16-NEXT:    fmov s0, w8
 ; CHECK-GI-NOFP16-NEXT:    mov h1, v0.h[1]
-; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x8, s0
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x9, s1
-; CHECK-GI-NOFP16-NEXT:    fmov d0, x8
-; CHECK-GI-NOFP16-NEXT:    mov v0.d[1], x9
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[2], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[3], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v0.2d, v0.2s
+; CHECK-GI-NOFP16-NEXT:    fcvtzu v0.2d, v0.2d
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fptou_v2f16_v2i64:
 ; CHECK-GI-FP16:       // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-FP16-NEXT:    fmov x8, d0
+; CHECK-GI-FP16-NEXT:    fmov s0, w8
 ; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
-; CHECK-GI-FP16-NEXT:    fcvtzu x8, h0
-; CHECK-GI-FP16-NEXT:    fcvtzu x9, h1
-; CHECK-GI-FP16-NEXT:    fmov d0, x8
-; CHECK-GI-FP16-NEXT:    mov v0.d[1], x9
+; CHECK-GI-FP16-NEXT:    fcvt d0, h0
+; CHECK-GI-FP16-NEXT:    fcvt d1, h1
+; CHECK-GI-FP16-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-FP16-NEXT:    fcvtzu v0.2d, v0.2d
 ; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = fptoui <2 x half> %a to <2 x i64>
@@ -2453,31 +3349,33 @@ define <3 x i64> @fptos_v3f16_v3i64(<3 x half> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fptos_v3f16_v3i64:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NOFP16-NEXT:    mov h1, v0.h[1]
-; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[2]
-; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x8, s0
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x9, s1
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x10, s2
-; CHECK-GI-NOFP16-NEXT:    fmov d0, x8
-; CHECK-GI-NOFP16-NEXT:    fmov d1, x9
-; CHECK-GI-NOFP16-NEXT:    fmov d2, x10
+; CHECK-GI-NOFP16-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT:    mov s1, v0.s[2]
+; CHECK-GI-NOFP16-NEXT:    fcvtl v0.2d, v0.2s
+; CHECK-GI-NOFP16-NEXT:    fcvtl v1.2d, v1.2s
+; CHECK-GI-NOFP16-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-GI-NOFP16-NEXT:    fcvtzs v2.2d, v1.2d
+; CHECK-GI-NOFP16-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NOFP16-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fptos_v3f16_v3i64:
 ; CHECK-GI-FP16:       // %bb.0: // %entry
 ; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
-; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
-; CHECK-GI-FP16-NEXT:    fcvtzs x8, h0
-; CHECK-GI-FP16-NEXT:    fcvtzs x9, h1
-; CHECK-GI-FP16-NEXT:    fcvtzs x10, h2
-; CHECK-GI-FP16-NEXT:    fmov d0, x8
-; CHECK-GI-FP16-NEXT:    fmov d1, x9
-; CHECK-GI-FP16-NEXT:    fmov d2, x10
+; CHECK-GI-FP16-NEXT:    mov h2, v0.h[1]
+; CHECK-GI-FP16-NEXT:    fcvt d1, h0
+; CHECK-GI-FP16-NEXT:    mov h3, v0.h[2]
+; CHECK-GI-FP16-NEXT:    fcvt d0, h0
+; CHECK-GI-FP16-NEXT:    fcvt d2, h2
+; CHECK-GI-FP16-NEXT:    mov v0.d[1], v2.d[0]
+; CHECK-GI-FP16-NEXT:    fcvt d2, h3
+; CHECK-GI-FP16-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-GI-FP16-NEXT:    mov v2.d[1], v1.d[0]
+; CHECK-GI-FP16-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-FP16-NEXT:    fcvtzs v2.2d, v2.2d
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-FP16-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = fptosi <3 x half> %a to <3 x i64>
@@ -2516,31 +3414,33 @@ define <3 x i64> @fptou_v3f16_v3i64(<3 x half> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fptou_v3f16_v3i64:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NOFP16-NEXT:    mov h1, v0.h[1]
-; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[2]
-; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x8, s0
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x9, s1
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x10, s2
-; CHECK-GI-NOFP16-NEXT:    fmov d0, x8
-; CHECK-GI-NOFP16-NEXT:    fmov d1, x9
-; CHECK-GI-NOFP16-NEXT:    fmov d2, x10
+; CHECK-GI-NOFP16-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT:    mov s1, v0.s[2]
+; CHECK-GI-NOFP16-NEXT:    fcvtl v0.2d, v0.2s
+; CHECK-GI-NOFP16-NEXT:    fcvtl v1.2d, v1.2s
+; CHECK-GI-NOFP16-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-GI-NOFP16-NEXT:    fcvtzu v2.2d, v1.2d
+; CHECK-GI-NOFP16-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NOFP16-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fptou_v3f16_v3i64:
 ; CHECK-GI-FP16:       // %bb.0: // %entry
 ; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
-; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
-; CHECK-GI-FP16-NEXT:    fcvtzu x8, h0
-; CHECK-GI-FP16-NEXT:    fcvtzu x9, h1
-; CHECK-GI-FP16-NEXT:    fcvtzu x10, h2
-; CHECK-GI-FP16-NEXT:    fmov d0, x8
-; CHECK-GI-FP16-NEXT:    fmov d1, x9
-; CHECK-GI-FP16-NEXT:    fmov d2, x10
+; CHECK-GI-FP16-NEXT:    mov h2, v0.h[1]
+; CHECK-GI-FP16-NEXT:    fcvt d1, h0
+; CHECK-GI-FP16-NEXT:    mov h3, v0.h[2]
+; CHECK-GI-FP16-NEXT:    fcvt d0, h0
+; CHECK-GI-FP16-NEXT:    fcvt d2, h2
+; CHECK-GI-FP16-NEXT:    mov v0.d[1], v2.d[0]
+; CHECK-GI-FP16-NEXT:    fcvt d2, h3
+; CHECK-GI-FP16-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-GI-FP16-NEXT:    mov v2.d[1], v1.d[0]
+; CHECK-GI-FP16-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-FP16-NEXT:    fcvtzu v2.2d, v2.2d
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-FP16-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = fptoui <3 x half> %a to <3 x i64>
@@ -2586,38 +3486,27 @@ define <4 x i64> @fptos_v4f16_v4i64(<4 x half> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fptos_v4f16_v4i64:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NOFP16-NEXT:    mov h1, v0.h[2]
-; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[1]
-; CHECK-GI-NOFP16-NEXT:    mov h3, v0.h[3]
-; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
-; CHECK-GI-NOFP16-NEXT:    fcvt s3, h3
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x8, s0
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x9, s1
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x10, s2
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x11, s3
-; CHECK-GI-NOFP16-NEXT:    fmov d0, x8
-; CHECK-GI-NOFP16-NEXT:    fmov d1, x9
-; CHECK-GI-NOFP16-NEXT:    mov v0.d[1], x10
-; CHECK-GI-NOFP16-NEXT:    mov v1.d[1], x11
+; CHECK-GI-NOFP16-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v1.2d, v0.2s
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v2.2d, v0.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtzs v0.2d, v1.2d
+; CHECK-GI-NOFP16-NEXT:    fcvtzs v1.2d, v2.2d
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fptos_v4f16_v4i64:
 ; CHECK-GI-FP16:       // %bb.0: // %entry
 ; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-FP16-NEXT:    mov h1, v0.h[2]
+; CHECK-GI-FP16-NEXT:    mov s1, v0.s[1]
 ; CHECK-GI-FP16-NEXT:    mov h2, v0.h[1]
-; CHECK-GI-FP16-NEXT:    mov h3, v0.h[3]
-; CHECK-GI-FP16-NEXT:    fcvtzs x8, h0
-; CHECK-GI-FP16-NEXT:    fcvtzs x9, h1
-; CHECK-GI-FP16-NEXT:    fcvtzs x10, h2
-; CHECK-GI-FP16-NEXT:    fcvtzs x11, h3
-; CHECK-GI-FP16-NEXT:    fmov d0, x8
-; CHECK-GI-FP16-NEXT:    fmov d1, x9
-; CHECK-GI-FP16-NEXT:    mov v0.d[1], x10
-; CHECK-GI-FP16-NEXT:    mov v1.d[1], x11
+; CHECK-GI-FP16-NEXT:    fcvt d0, h0
+; CHECK-GI-FP16-NEXT:    mov h3, v1.h[1]
+; CHECK-GI-FP16-NEXT:    fcvt d2, h2
+; CHECK-GI-FP16-NEXT:    fcvt d1, h1
+; CHECK-GI-FP16-NEXT:    fcvt d3, h3
+; CHECK-GI-FP16-NEXT:    mov v0.d[1], v2.d[0]
+; CHECK-GI-FP16-NEXT:    mov v1.d[1], v3.d[0]
+; CHECK-GI-FP16-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-GI-FP16-NEXT:    fcvtzs v1.2d, v1.2d
 ; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = fptosi <4 x half> %a to <4 x i64>
@@ -2663,38 +3552,27 @@ define <4 x i64> @fptou_v4f16_v4i64(<4 x half> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fptou_v4f16_v4i64:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NOFP16-NEXT:    mov h1, v0.h[2]
-; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[1]
-; CHECK-GI-NOFP16-NEXT:    mov h3, v0.h[3]
-; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
-; CHECK-GI-NOFP16-NEXT:    fcvt s3, h3
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x8, s0
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x9, s1
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x10, s2
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x11, s3
-; CHECK-GI-NOFP16-NEXT:    fmov d0, x8
-; CHECK-GI-NOFP16-NEXT:    fmov d1, x9
-; CHECK-GI-NOFP16-NEXT:    mov v0.d[1], x10
-; CHECK-GI-NOFP16-NEXT:    mov v1.d[1], x11
+; CHECK-GI-NOFP16-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v1.2d, v0.2s
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v2.2d, v0.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtzu v0.2d, v1.2d
+; CHECK-GI-NOFP16-NEXT:    fcvtzu v1.2d, v2.2d
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fptou_v4f16_v4i64:
 ; CHECK-GI-FP16:       // %bb.0: // %entry
 ; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-FP16-NEXT:    mov h1, v0.h[2]
+; CHECK-GI-FP16-NEXT:    mov s1, v0.s[1]
 ; CHECK-GI-FP16-NEXT:    mov h2, v0.h[1]
-; CHECK-GI-FP16-NEXT:    mov h3, v0.h[3]
-; CHECK-GI-FP16-NEXT:    fcvtzu x8, h0
-; CHECK-GI-FP16-NEXT:    fcvtzu x9, h1
-; CHECK-GI-FP16-NEXT:    fcvtzu x10, h2
-; CHECK-GI-FP16-NEXT:    fcvtzu x11, h3
-; CHECK-GI-FP16-NEXT:    fmov d0, x8
-; CHECK-GI-FP16-NEXT:    fmov d1, x9
-; CHECK-GI-FP16-NEXT:    mov v0.d[1], x10
-; CHECK-GI-FP16-NEXT:    mov v1.d[1], x11
+; CHECK-GI-FP16-NEXT:    fcvt d0, h0
+; CHECK-GI-FP16-NEXT:    mov h3, v1.h[1]
+; CHECK-GI-FP16-NEXT:    fcvt d2, h2
+; CHECK-GI-FP16-NEXT:    fcvt d1, h1
+; CHECK-GI-FP16-NEXT:    fcvt d3, h3
+; CHECK-GI-FP16-NEXT:    mov v0.d[1], v2.d[0]
+; CHECK-GI-FP16-NEXT:    mov v1.d[1], v3.d[0]
+; CHECK-GI-FP16-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-GI-FP16-NEXT:    fcvtzu v1.2d, v1.2d
 ; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = fptoui <4 x half> %a to <4 x i64>
@@ -2766,64 +3644,43 @@ define <8 x i64> @fptos_v8f16_v8i64(<8 x half> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fptos_v8f16_v8i64:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-GI-NOFP16-NEXT:    mov h4, v0.h[2]
-; CHECK-GI-NOFP16-NEXT:    mov h3, v0.h[1]
-; CHECK-GI-NOFP16-NEXT:    mov h7, v0.h[3]
-; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT:    mov h2, v1.h[2]
-; CHECK-GI-NOFP16-NEXT:    mov h5, v1.h[1]
-; CHECK-GI-NOFP16-NEXT:    mov h6, v1.h[3]
-; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT:    fcvt s4, h4
-; CHECK-GI-NOFP16-NEXT:    fcvt s3, h3
-; CHECK-GI-NOFP16-NEXT:    fcvt s7, h7
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x9, s0
-; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
-; CHECK-GI-NOFP16-NEXT:    fcvt s5, h5
-; CHECK-GI-NOFP16-NEXT:    fcvt s6, h6
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x8, s1
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x12, s4
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x11, s3
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x15, s7
-; CHECK-GI-NOFP16-NEXT:    fmov d0, x9
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x10, s2
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x13, s5
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x14, s6
-; CHECK-GI-NOFP16-NEXT:    fmov d2, x8
-; CHECK-GI-NOFP16-NEXT:    fmov d1, x12
-; CHECK-GI-NOFP16-NEXT:    mov v0.d[1], x11
-; CHECK-GI-NOFP16-NEXT:    fmov d3, x10
-; CHECK-GI-NOFP16-NEXT:    mov v2.d[1], x13
-; CHECK-GI-NOFP16-NEXT:    mov v1.d[1], x15
-; CHECK-GI-NOFP16-NEXT:    mov v3.d[1], x14
+; CHECK-GI-NOFP16-NEXT:    fcvtl v1.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v0.4s, v0.8h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v2.2d, v1.2s
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v1.2d, v1.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtl v3.2d, v0.2s
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v4.2d, v0.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtzs v0.2d, v2.2d
+; CHECK-GI-NOFP16-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-GI-NOFP16-NEXT:    fcvtzs v2.2d, v3.2d
+; CHECK-GI-NOFP16-NEXT:    fcvtzs v3.2d, v4.2d
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fptos_v8f16_v8i64:
 ; CHECK-GI-FP16:       // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-GI-FP16-NEXT:    mov h4, v0.h[2]
-; CHECK-GI-FP16-NEXT:    mov h3, v0.h[1]
-; CHECK-GI-FP16-NEXT:    mov h7, v0.h[3]
-; CHECK-GI-FP16-NEXT:    fcvtzs x9, h0
-; CHECK-GI-FP16-NEXT:    mov h2, v1.h[2]
+; CHECK-GI-FP16-NEXT:    mov s1, v0.s[1]
+; CHECK-GI-FP16-NEXT:    mov s2, v0.s[2]
+; CHECK-GI-FP16-NEXT:    mov s3, v0.s[3]
+; CHECK-GI-FP16-NEXT:    mov h4, v0.h[1]
+; CHECK-GI-FP16-NEXT:    fcvt d0, h0
 ; CHECK-GI-FP16-NEXT:    mov h5, v1.h[1]
-; CHECK-GI-FP16-NEXT:    mov h6, v1.h[3]
-; CHECK-GI-FP16-NEXT:    fcvtzs x8, h1
-; CHECK-GI-FP16-NEXT:    fcvtzs x12, h4
-; CHECK-GI-FP16-NEXT:    fcvtzs x11, h3
-; CHECK-GI-FP16-NEXT:    fcvtzs x15, h7
-; CHECK-GI-FP16-NEXT:    fmov d0, x9
-; CHECK-GI-FP16-NEXT:    fcvtzs x10, h2
-; CHECK-GI-FP16-NEXT:    fcvtzs x13, h5
-; CHECK-GI-FP16-NEXT:    fcvtzs x14, h6
-; CHECK-GI-FP16-NEXT:    fmov d2, x8
-; CHECK-GI-FP16-NEXT:    fmov d1, x12
-; CHECK-GI-FP16-NEXT:    mov v0.d[1], x11
-; CHECK-GI-FP16-NEXT:    fmov d3, x10
-; CHECK-GI-FP16-NEXT:    mov v2.d[1], x13
-; CHECK-GI-FP16-NEXT:    mov v1.d[1], x15
-; CHECK-GI-FP16-NEXT:    mov v3.d[1], x14
+; CHECK-GI-FP16-NEXT:    mov h6, v2.h[1]
+; CHECK-GI-FP16-NEXT:    mov h7, v3.h[1]
+; CHECK-GI-FP16-NEXT:    fcvt d4, h4
+; CHECK-GI-FP16-NEXT:    fcvt d1, h1
+; CHECK-GI-FP16-NEXT:    fcvt d2, h2
+; CHECK-GI-FP16-NEXT:    fcvt d3, h3
+; CHECK-GI-FP16-NEXT:    fcvt d5, h5
+; CHECK-GI-FP16-NEXT:    fcvt d6, h6
+; CHECK-GI-FP16-NEXT:    fcvt d7, h7
+; CHECK-GI-FP16-NEXT:    mov v0.d[1], v4.d[0]
+; CHECK-GI-FP16-NEXT:    mov v1.d[1], v5.d[0]
+; CHECK-GI-FP16-NEXT:    mov v2.d[1], v6.d[0]
+; CHECK-GI-FP16-NEXT:    mov v3.d[1], v7.d[0]
+; CHECK-GI-FP16-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-GI-FP16-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-GI-FP16-NEXT:    fcvtzs v2.2d, v2.2d
+; CHECK-GI-FP16-NEXT:    fcvtzs v3.2d, v3.2d
 ; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = fptosi <8 x half> %a to <8 x i64>
@@ -2895,64 +3752,43 @@ define <8 x i64> @fptou_v8f16_v8i64(<8 x half> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fptou_v8f16_v8i64:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-GI-NOFP16-NEXT:    mov h4, v0.h[2]
-; CHECK-GI-NOFP16-NEXT:    mov h3, v0.h[1]
-; CHECK-GI-NOFP16-NEXT:    mov h7, v0.h[3]
-; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT:    mov h2, v1.h[2]
-; CHECK-GI-NOFP16-NEXT:    mov h5, v1.h[1]
-; CHECK-GI-NOFP16-NEXT:    mov h6, v1.h[3]
-; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT:    fcvt s4, h4
-; CHECK-GI-NOFP16-NEXT:    fcvt s3, h3
-; CHECK-GI-NOFP16-NEXT:    fcvt s7, h7
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x9, s0
-; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
-; CHECK-GI-NOFP16-NEXT:    fcvt s5, h5
-; CHECK-GI-NOFP16-NEXT:    fcvt s6, h6
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x8, s1
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x12, s4
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x11, s3
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x15, s7
-; CHECK-GI-NOFP16-NEXT:    fmov d0, x9
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x10, s2
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x13, s5
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x14, s6
-; CHECK-GI-NOFP16-NEXT:    fmov d2, x8
-; CHECK-GI-NOFP16-NEXT:    fmov d1, x12
-; CHECK-GI-NOFP16-NEXT:    mov v0.d[1], x11
-; CHECK-GI-NOFP16-NEXT:    fmov d3, x10
-; CHECK-GI-NOFP16-NEXT:    mov v2.d[1], x13
-; CHECK-GI-NOFP16-NEXT:    mov v1.d[1], x15
-; CHECK-GI-NOFP16-NEXT:    mov v3.d[1], x14
+; CHECK-GI-NOFP16-NEXT:    fcvtl v1.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v0.4s, v0.8h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v2.2d, v1.2s
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v1.2d, v1.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtl v3.2d, v0.2s
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v4.2d, v0.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtzu v0.2d, v2.2d
+; CHECK-GI-NOFP16-NEXT:    fcvtzu v1.2d, v1.2d
+; CHECK-GI-NOFP16-NEXT:    fcvtzu v2.2d, v3.2d
+; CHECK-GI-NOFP16-NEXT:    fcvtzu v3.2d, v4.2d
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fptou_v8f16_v8i64:
 ; CHECK-GI-FP16:       // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-GI-FP16-NEXT:    mov h4, v0.h[2]
-; CHECK-GI-FP16-NEXT:    mov h3, v0.h[1]
-; CHECK-GI-FP16-NEXT:    mov h7, v0.h[3]
-; CHECK-GI-FP16-NEXT:    fcvtzu x9, h0
-; CHECK-GI-FP16-NEXT:    mov h2, v1.h[2]
+; CHECK-GI-FP16-NEXT:    mov s1, v0.s[1]
+; CHECK-GI-FP16-NEXT:    mov s2, v0.s[2]
+; CHECK-GI-FP16-NEXT:    mov s3, v0.s[3]
+; CHECK-GI-FP16-NEXT:    mov h4, v0.h[1]
+; CHECK-GI-FP16-NEXT:    fcvt d0, h0
 ; CHECK-GI-FP16-NEXT:    mov h5, v1.h[1]
-; CHECK-GI-FP16-NEXT:    mov h6, v1.h[3]
-; CHECK-GI-FP16-NEXT:    fcvtzu x8, h1
-; CHECK-GI-FP16-NEXT:    fcvtzu x12, h4
-; CHECK-GI-FP16-NEXT:    fcvtzu x11, h3
-; CHECK-GI-FP16-NEXT:    fcvtzu x15, h7
-; CHECK-GI-FP16-NEXT:    fmov d0, x9
-; CHECK-GI-FP16-NEXT:    fcvtzu x10, h2
-; CHECK-GI-FP16-NEXT:    fcvtzu x13, h5
-; CHECK-GI-FP16-NEXT:    fcvtzu x14, h6
-; CHECK-GI-FP16-NEXT:    fmov d2, x8
-; CHECK-GI-FP16-NEXT:    fmov d1, x12
-; CHECK-GI-FP16-NEXT:    mov v0.d[1], x11
-; CHECK-GI-FP16-NEXT:    fmov d3, x10
-; CHECK-GI-FP16-NEXT:    mov v2.d[1], x13
-; CHECK-GI-FP16-NEXT:    mov v1.d[1], x15
-; CHECK-GI-FP16-NEXT:    mov v3.d[1], x14
+; CHECK-GI-FP16-NEXT:    mov h6, v2.h[1]
+; CHECK-GI-FP16-NEXT:    mov h7, v3.h[1]
+; CHECK-GI-FP16-NEXT:    fcvt d4, h4
+; CHECK-GI-FP16-NEXT:    fcvt d1, h1
+; CHECK-GI-FP16-NEXT:    fcvt d2, h2
+; CHECK-GI-FP16-NEXT:    fcvt d3, h3
+; CHECK-GI-FP16-NEXT:    fcvt d5, h5
+; CHECK-GI-FP16-NEXT:    fcvt d6, h6
+; CHECK-GI-FP16-NEXT:    fcvt d7, h7
+; CHECK-GI-FP16-NEXT:    mov v0.d[1], v4.d[0]
+; CHECK-GI-FP16-NEXT:    mov v1.d[1], v5.d[0]
+; CHECK-GI-FP16-NEXT:    mov v2.d[1], v6.d[0]
+; CHECK-GI-FP16-NEXT:    mov v3.d[1], v7.d[0]
+; CHECK-GI-FP16-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-GI-FP16-NEXT:    fcvtzu v1.2d, v1.2d
+; CHECK-GI-FP16-NEXT:    fcvtzu v2.2d, v2.2d
+; CHECK-GI-FP16-NEXT:    fcvtzu v3.2d, v3.2d
 ; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = fptoui <8 x half> %a to <8 x i64>
@@ -3078,118 +3914,76 @@ define <16 x i64> @fptos_v16f16_v16i64(<16 x half> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fptos_v16f16_v16i64:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    ext v2.16b, v0.16b, v0.16b, #8
-; CHECK-GI-NOFP16-NEXT:    ext v3.16b, v1.16b, v1.16b, #8
-; CHECK-GI-NOFP16-NEXT:    mov h4, v0.h[1]
-; CHECK-GI-NOFP16-NEXT:    fcvt s5, h0
-; CHECK-GI-NOFP16-NEXT:    mov h18, v0.h[2]
-; CHECK-GI-NOFP16-NEXT:    mov h0, v0.h[3]
-; CHECK-GI-NOFP16-NEXT:    fcvt s6, h2
-; CHECK-GI-NOFP16-NEXT:    mov h7, v2.h[1]
-; CHECK-GI-NOFP16-NEXT:    mov h16, v2.h[2]
-; CHECK-GI-NOFP16-NEXT:    mov h17, v3.h[2]
-; CHECK-GI-NOFP16-NEXT:    fcvt s19, h3
-; CHECK-GI-NOFP16-NEXT:    fcvt s4, h4
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x8, s5
-; CHECK-GI-NOFP16-NEXT:    mov h5, v1.h[1]
-; CHECK-GI-NOFP16-NEXT:    mov h2, v2.h[3]
-; CHECK-GI-NOFP16-NEXT:    fcvt s18, h18
-; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x9, s6
-; CHECK-GI-NOFP16-NEXT:    fcvt s6, h7
-; CHECK-GI-NOFP16-NEXT:    fcvt s7, h16
-; CHECK-GI-NOFP16-NEXT:    mov h16, v1.h[2]
-; CHECK-GI-NOFP16-NEXT:    fcvt s17, h17
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x10, s19
-; CHECK-GI-NOFP16-NEXT:    mov h19, v3.h[1]
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x11, s4
-; CHECK-GI-NOFP16-NEXT:    mov h4, v1.h[3]
-; CHECK-GI-NOFP16-NEXT:    mov h3, v3.h[3]
-; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT:    fcvt s5, h5
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x13, s7
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x12, s6
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x15, s18
-; CHECK-GI-NOFP16-NEXT:    fcvt s7, h16
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x14, s17
-; CHECK-GI-NOFP16-NEXT:    fcvt s16, h2
-; CHECK-GI-NOFP16-NEXT:    fcvt s17, h19
-; CHECK-GI-NOFP16-NEXT:    fcvt s4, h4
-; CHECK-GI-NOFP16-NEXT:    fmov d2, x9
-; CHECK-GI-NOFP16-NEXT:    fcvt s19, h3
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x9, s1
-; CHECK-GI-NOFP16-NEXT:    fmov d6, x10
-; CHECK-GI-NOFP16-NEXT:    fmov d3, x13
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x13, s0
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x16, s5
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x10, s7
-; CHECK-GI-NOFP16-NEXT:    fmov d7, x14
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x14, s16
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x17, s17
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x0, s4
-; CHECK-GI-NOFP16-NEXT:    fmov d0, x8
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x18, s19
-; CHECK-GI-NOFP16-NEXT:    fmov d1, x15
-; CHECK-GI-NOFP16-NEXT:    fmov d4, x9
-; CHECK-GI-NOFP16-NEXT:    mov v2.d[1], x12
-; CHECK-GI-NOFP16-NEXT:    fmov d5, x10
-; CHECK-GI-NOFP16-NEXT:    mov v0.d[1], x11
-; CHECK-GI-NOFP16-NEXT:    mov v3.d[1], x14
-; CHECK-GI-NOFP16-NEXT:    mov v1.d[1], x13
-; CHECK-GI-NOFP16-NEXT:    mov v4.d[1], x16
-; CHECK-GI-NOFP16-NEXT:    mov v6.d[1], x17
-; CHECK-GI-NOFP16-NEXT:    mov v7.d[1], x18
-; CHECK-GI-NOFP16-NEXT:    mov v5.d[1], x0
+; CHECK-GI-NOFP16-NEXT:    fcvtl v2.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v0.4s, v0.8h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v3.4s, v1.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v1.4s, v1.8h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v4.2d, v2.2s
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v2.2d, v2.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtl v5.2d, v0.2s
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v6.2d, v0.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtl v7.2d, v3.2s
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v16.2d, v3.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtl v17.2d, v1.2s
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v18.2d, v1.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtzs v0.2d, v4.2d
+; CHECK-GI-NOFP16-NEXT:    fcvtzs v1.2d, v2.2d
+; CHECK-GI-NOFP16-NEXT:    fcvtzs v2.2d, v5.2d
+; CHECK-GI-NOFP16-NEXT:    fcvtzs v3.2d, v6.2d
+; CHECK-GI-NOFP16-NEXT:    fcvtzs v4.2d, v7.2d
+; CHECK-GI-NOFP16-NEXT:    fcvtzs v5.2d, v16.2d
+; CHECK-GI-NOFP16-NEXT:    fcvtzs v6.2d, v17.2d
+; CHECK-GI-NOFP16-NEXT:    fcvtzs v7.2d, v18.2d
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fptos_v16f16_v16i64:
 ; CHECK-GI-FP16:       // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT:    ext v2.16b, v0.16b, v0.16b, #8
-; CHECK-GI-FP16-NEXT:    ext v3.16b, v1.16b, v1.16b, #8
-; CHECK-GI-FP16-NEXT:    mov h4, v0.h[1]
-; CHECK-GI-FP16-NEXT:    mov h5, v0.h[2]
-; CHECK-GI-FP16-NEXT:    fcvtzs x8, h0
-; CHECK-GI-FP16-NEXT:    mov h0, v0.h[3]
-; CHECK-GI-FP16-NEXT:    fcvtzs x9, h1
-; CHECK-GI-FP16-NEXT:    mov h7, v1.h[1]
-; CHECK-GI-FP16-NEXT:    mov h6, v2.h[2]
-; CHECK-GI-FP16-NEXT:    mov h16, v3.h[2]
-; CHECK-GI-FP16-NEXT:    fcvtzs x10, h4
-; CHECK-GI-FP16-NEXT:    mov h4, v1.h[2]
-; CHECK-GI-FP16-NEXT:    fcvtzs x11, h2
-; CHECK-GI-FP16-NEXT:    fcvtzs x12, h5
-; CHECK-GI-FP16-NEXT:    mov h5, v2.h[1]
-; CHECK-GI-FP16-NEXT:    mov h17, v2.h[3]
-; CHECK-GI-FP16-NEXT:    fcvtzs x13, h3
-; CHECK-GI-FP16-NEXT:    mov h18, v3.h[1]
-; CHECK-GI-FP16-NEXT:    mov h1, v1.h[3]
-; CHECK-GI-FP16-NEXT:    mov h19, v3.h[3]
-; CHECK-GI-FP16-NEXT:    fcvtzs x14, h6
-; CHECK-GI-FP16-NEXT:    fcvtzs x15, h16
-; CHECK-GI-FP16-NEXT:    fcvtzs x16, h0
-; CHECK-GI-FP16-NEXT:    fcvtzs x0, h4
-; CHECK-GI-FP16-NEXT:    fcvtzs x17, h7
-; CHECK-GI-FP16-NEXT:    fmov d2, x11
-; CHECK-GI-FP16-NEXT:    fcvtzs x11, h5
-; CHECK-GI-FP16-NEXT:    fcvtzs x18, h17
-; CHECK-GI-FP16-NEXT:    fmov d6, x13
-; CHECK-GI-FP16-NEXT:    fcvtzs x13, h18
-; CHECK-GI-FP16-NEXT:    fmov d0, x8
-; CHECK-GI-FP16-NEXT:    fmov d4, x9
-; CHECK-GI-FP16-NEXT:    fmov d3, x14
-; CHECK-GI-FP16-NEXT:    fmov d7, x15
-; CHECK-GI-FP16-NEXT:    fcvtzs x14, h19
-; CHECK-GI-FP16-NEXT:    fcvtzs x15, h1
-; CHECK-GI-FP16-NEXT:    fmov d1, x12
-; CHECK-GI-FP16-NEXT:    fmov d5, x0
-; CHECK-GI-FP16-NEXT:    mov v0.d[1], x10
-; CHECK-GI-FP16-NEXT:    mov v4.d[1], x17
-; CHECK-GI-FP16-NEXT:    mov v2.d[1], x11
-; CHECK-GI-FP16-NEXT:    mov v3.d[1], x18
-; CHECK-GI-FP16-NEXT:    mov v6.d[1], x13
-; CHECK-GI-FP16-NEXT:    mov v1.d[1], x16
-; CHECK-GI-FP16-NEXT:    mov v7.d[1], x14
-; CHECK-GI-FP16-NEXT:    mov v5.d[1], x15
+; CHECK-GI-FP16-NEXT:    mov h3, v0.h[1]
+; CHECK-GI-FP16-NEXT:    mov h4, v0.h[2]
+; CHECK-GI-FP16-NEXT:    mov h5, v0.h[3]
+; CHECK-GI-FP16-NEXT:    fcvt d2, h0
+; CHECK-GI-FP16-NEXT:    mov h6, v0.h[4]
+; CHECK-GI-FP16-NEXT:    mov h7, v0.h[5]
+; CHECK-GI-FP16-NEXT:    mov h16, v0.h[6]
+; CHECK-GI-FP16-NEXT:    mov h0, v0.h[7]
+; CHECK-GI-FP16-NEXT:    mov h17, v1.h[1]
+; CHECK-GI-FP16-NEXT:    mov h18, v1.h[2]
+; CHECK-GI-FP16-NEXT:    mov h19, v1.h[3]
+; CHECK-GI-FP16-NEXT:    mov h20, v1.h[4]
+; CHECK-GI-FP16-NEXT:    mov h21, v1.h[5]
+; CHECK-GI-FP16-NEXT:    mov h22, v1.h[6]
+; CHECK-GI-FP16-NEXT:    mov h23, v1.h[7]
+; CHECK-GI-FP16-NEXT:    fcvt d3, h3
+; CHECK-GI-FP16-NEXT:    fcvt d4, h4
+; CHECK-GI-FP16-NEXT:    fcvt d5, h5
+; CHECK-GI-FP16-NEXT:    fcvt d6, h6
+; CHECK-GI-FP16-NEXT:    fcvt d7, h7
+; CHECK-GI-FP16-NEXT:    fcvt d16, h16
+; CHECK-GI-FP16-NEXT:    fcvt d0, h0
+; CHECK-GI-FP16-NEXT:    fcvt d24, h1
+; CHECK-GI-FP16-NEXT:    fcvt d1, h17
+; CHECK-GI-FP16-NEXT:    fcvt d17, h18
+; CHECK-GI-FP16-NEXT:    fcvt d18, h19
+; CHECK-GI-FP16-NEXT:    fcvt d19, h20
+; CHECK-GI-FP16-NEXT:    fcvt d20, h21
+; CHECK-GI-FP16-NEXT:    fcvt d21, h22
+; CHECK-GI-FP16-NEXT:    fcvt d22, h23
+; CHECK-GI-FP16-NEXT:    mov v2.d[1], v3.d[0]
+; CHECK-GI-FP16-NEXT:    mov v4.d[1], v5.d[0]
+; CHECK-GI-FP16-NEXT:    mov v6.d[1], v7.d[0]
+; CHECK-GI-FP16-NEXT:    mov v16.d[1], v0.d[0]
+; CHECK-GI-FP16-NEXT:    mov v24.d[1], v1.d[0]
+; CHECK-GI-FP16-NEXT:    mov v17.d[1], v18.d[0]
+; CHECK-GI-FP16-NEXT:    mov v19.d[1], v20.d[0]
+; CHECK-GI-FP16-NEXT:    mov v21.d[1], v22.d[0]
+; CHECK-GI-FP16-NEXT:    fcvtzs v0.2d, v2.2d
+; CHECK-GI-FP16-NEXT:    fcvtzs v1.2d, v4.2d
+; CHECK-GI-FP16-NEXT:    fcvtzs v2.2d, v6.2d
+; CHECK-GI-FP16-NEXT:    fcvtzs v3.2d, v16.2d
+; CHECK-GI-FP16-NEXT:    fcvtzs v4.2d, v24.2d
+; CHECK-GI-FP16-NEXT:    fcvtzs v5.2d, v17.2d
+; CHECK-GI-FP16-NEXT:    fcvtzs v6.2d, v19.2d
+; CHECK-GI-FP16-NEXT:    fcvtzs v7.2d, v21.2d
 ; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = fptosi <16 x half> %a to <16 x i64>
@@ -3315,118 +4109,76 @@ define <16 x i64> @fptou_v16f16_v16i64(<16 x half> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fptou_v16f16_v16i64:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    ext v2.16b, v0.16b, v0.16b, #8
-; CHECK-GI-NOFP16-NEXT:    ext v3.16b, v1.16b, v1.16b, #8
-; CHECK-GI-NOFP16-NEXT:    mov h4, v0.h[1]
-; CHECK-GI-NOFP16-NEXT:    fcvt s5, h0
-; CHECK-GI-NOFP16-NEXT:    mov h18, v0.h[2]
-; CHECK-GI-NOFP16-NEXT:    mov h0, v0.h[3]
-; CHECK-GI-NOFP16-NEXT:    fcvt s6, h2
-; CHECK-GI-NOFP16-NEXT:    mov h7, v2.h[1]
-; CHECK-GI-NOFP16-NEXT:    mov h16, v2.h[2]
-; CHECK-GI-NOFP16-NEXT:    mov h17, v3.h[2]
-; CHECK-GI-NOFP16-NEXT:    fcvt s19, h3
-; CHECK-GI-NOFP16-NEXT:    fcvt s4, h4
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x8, s5
-; CHECK-GI-NOFP16-NEXT:    mov h5, v1.h[1]
-; CHECK-GI-NOFP16-NEXT:    mov h2, v2.h[3]
-; CHECK-GI-NOFP16-NEXT:    fcvt s18, h18
-; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x9, s6
-; CHECK-GI-NOFP16-NEXT:    fcvt s6, h7
-; CHECK-GI-NOFP16-NEXT:    fcvt s7, h16
-; CHECK-GI-NOFP16-NEXT:    mov h16, v1.h[2]
-; CHECK-GI-NOFP16-NEXT:    fcvt s17, h17
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x10, s19
-; CHECK-GI-NOFP16-NEXT:    mov h19, v3.h[1]
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x11, s4
-; CHECK-GI-NOFP16-NEXT:    mov h4, v1.h[3]
-; CHECK-GI-NOFP16-NEXT:    mov h3, v3.h[3]
-; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT:    fcvt s5, h5
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x13, s7
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x12, s6
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x15, s18
-; CHECK-GI-NOFP16-NEXT:    fcvt s7, h16
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x14, s17
-; CHECK-GI-NOFP16-NEXT:    fcvt s16, h2
-; CHECK-GI-NOFP16-NEXT:    fcvt s17, h19
-; CHECK-GI-NOFP16-NEXT:    fcvt s4, h4
-; CHECK-GI-NOFP16-NEXT:    fmov d2, x9
-; CHECK-GI-NOFP16-NEXT:    fcvt s19, h3
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x9, s1
-; CHECK-GI-NOFP16-NEXT:    fmov d6, x10
-; CHECK-GI-NOFP16-NEXT:    fmov d3, x13
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x13, s0
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x16, s5
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x10, s7
-; CHECK-GI-NOFP16-NEXT:    fmov d7, x14
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x14, s16
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x17, s17
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x0, s4
-; CHECK-GI-NOFP16-NEXT:    fmov d0, x8
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x18, s19
-; CHECK-GI-NOFP16-NEXT:    fmov d1, x15
-; CHECK-GI-NOFP16-NEXT:    fmov d4, x9
-; CHECK-GI-NOFP16-NEXT:    mov v2.d[1], x12
-; CHECK-GI-NOFP16-NEXT:    fmov d5, x10
-; CHECK-GI-NOFP16-NEXT:    mov v0.d[1], x11
-; CHECK-GI-NOFP16-NEXT:    mov v3.d[1], x14
-; CHECK-GI-NOFP16-NEXT:    mov v1.d[1], x13
-; CHECK-GI-NOFP16-NEXT:    mov v4.d[1], x16
-; CHECK-GI-NOFP16-NEXT:    mov v6.d[1], x17
-; CHECK-GI-NOFP16-NEXT:    mov v7.d[1], x18
-; CHECK-GI-NOFP16-NEXT:    mov v5.d[1], x0
+; CHECK-GI-NOFP16-NEXT:    fcvtl v2.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v0.4s, v0.8h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v3.4s, v1.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v1.4s, v1.8h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v4.2d, v2.2s
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v2.2d, v2.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtl v5.2d, v0.2s
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v6.2d, v0.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtl v7.2d, v3.2s
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v16.2d, v3.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtl v17.2d, v1.2s
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v18.2d, v1.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtzu v0.2d, v4.2d
+; CHECK-GI-NOFP16-NEXT:    fcvtzu v1.2d, v2.2d
+; CHECK-GI-NOFP16-NEXT:    fcvtzu v2.2d, v5.2d
+; CHECK-GI-NOFP16-NEXT:    fcvtzu v3.2d, v6.2d
+; CHECK-GI-NOFP16-NEXT:    fcvtzu v4.2d, v7.2d
+; CHECK-GI-NOFP16-NEXT:    fcvtzu v5.2d, v16.2d
+; CHECK-GI-NOFP16-NEXT:    fcvtzu v6.2d, v17.2d
+; CHECK-GI-NOFP16-NEXT:    fcvtzu v7.2d, v18.2d
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fptou_v16f16_v16i64:
 ; CHECK-GI-FP16:       // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT:    ext v2.16b, v0.16b, v0.16b, #8
-; CHECK-GI-FP16-NEXT:    ext v3.16b, v1.16b, v1.16b, #8
-; CHECK-GI-FP16-NEXT:    mov h4, v0.h[1]
-; CHECK-GI-FP16-NEXT:    mov h5, v0.h[2]
-; CHECK-GI-FP16-NEXT:    fcvtzu x8, h0
-; CHECK-GI-FP16-NEXT:    mov h0, v0.h[3]
-; CHECK-GI-FP16-NEXT:    fcvtzu x9, h1
-; CHECK-GI-FP16-NEXT:    mov h7, v1.h[1]
-; CHECK-GI-FP16-NEXT:    mov h6, v2.h[2]
-; CHECK-GI-FP16-NEXT:    mov h16, v3.h[2]
-; CHECK-GI-FP16-NEXT:    fcvtzu x10, h4
-; CHECK-GI-FP16-NEXT:    mov h4, v1.h[2]
-; CHECK-GI-FP16-NEXT:    fcvtzu x11, h2
-; CHECK-GI-FP16-NEXT:    fcvtzu x12, h5
-; CHECK-GI-FP16-NEXT:    mov h5, v2.h[1]
-; CHECK-GI-FP16-NEXT:    mov h17, v2.h[3]
-; CHECK-GI-FP16-NEXT:    fcvtzu x13, h3
-; CHECK-GI-FP16-NEXT:    mov h18, v3.h[1]
-; CHECK-GI-FP16-NEXT:    mov h1, v1.h[3]
-; CHECK-GI-FP16-NEXT:    mov h19, v3.h[3]
-; CHECK-GI-FP16-NEXT:    fcvtzu x14, h6
-; CHECK-GI-FP16-NEXT:    fcvtzu x15, h16
-; CHECK-GI-FP16-NEXT:    fcvtzu x16, h0
-; CHECK-GI-FP16-NEXT:    fcvtzu x0, h4
-; CHECK-GI-FP16-NEXT:    fcvtzu x17, h7
-; CHECK-GI-FP16-NEXT:    fmov d2, x11
-; CHECK-GI-FP16-NEXT:    fcvtzu x11, h5
-; CHECK-GI-FP16-NEXT:    fcvtzu x18, h17
-; CHECK-GI-FP16-NEXT:    fmov d6, x13
-; CHECK-GI-FP16-NEXT:    fcvtzu x13, h18
-; CHECK-GI-FP16-NEXT:    fmov d0, x8
-; CHECK-GI-FP16-NEXT:    fmov d4, x9
-; CHECK-GI-FP16-NEXT:    fmov d3, x14
-; CHECK-GI-FP16-NEXT:    fmov d7, x15
-; CHECK-GI-FP16-NEXT:    fcvtzu x14, h19
-; CHECK-GI-FP16-NEXT:    fcvtzu x15, h1
-; CHECK-GI-FP16-NEXT:    fmov d1, x12
-; CHECK-GI-FP16-NEXT:    fmov d5, x0
-; CHECK-GI-FP16-NEXT:    mov v0.d[1], x10
-; CHECK-GI-FP16-NEXT:    mov v4.d[1], x17
-; CHECK-GI-FP16-NEXT:    mov v2.d[1], x11
-; CHECK-GI-FP16-NEXT:    mov v3.d[1], x18
-; CHECK-GI-FP16-NEXT:    mov v6.d[1], x13
-; CHECK-GI-FP16-NEXT:    mov v1.d[1], x16
-; CHECK-GI-FP16-NEXT:    mov v7.d[1], x14
-; CHECK-GI-FP16-NEXT:    mov v5.d[1], x15
+; CHECK-GI-FP16-NEXT:    mov h3, v0.h[1]
+; CHECK-GI-FP16-NEXT:    mov h4, v0.h[2]
+; CHECK-GI-FP16-NEXT:    mov h5, v0.h[3]
+; CHECK-GI-FP16-NEXT:    fcvt d2, h0
+; CHECK-GI-FP16-NEXT:    mov h6, v0.h[4]
+; CHECK-GI-FP16-NEXT:    mov h7, v0.h[5]
+; CHECK-GI-FP16-NEXT:    mov h16, v0.h[6]
+; CHECK-GI-FP16-NEXT:    mov h0, v0.h[7]
+; CHECK-GI-FP16-NEXT:    mov h17, v1.h[1]
+; CHECK-GI-FP16-NEXT:    mov h18, v1.h[2]
+; CHECK-GI-FP16-NEXT:    mov h19, v1.h[3]
+; CHECK-GI-FP16-NEXT:    mov h20, v1.h[4]
+; CHECK-GI-FP16-NEXT:    mov h21, v1.h[5]
+; CHECK-GI-FP16-NEXT:    mov h22, v1.h[6]
+; CHECK-GI-FP16-NEXT:    mov h23, v1.h[7]
+; CHECK-GI-FP16-NEXT:    fcvt d3, h3
+; CHECK-GI-FP16-NEXT:    fcvt d4, h4
+; CHECK-GI-FP16-NEXT:    fcvt d5, h5
+; CHECK-GI-FP16-NEXT:    fcvt d6, h6
+; CHECK-GI-FP16-NEXT:    fcvt d7, h7
+; CHECK-GI-FP16-NEXT:    fcvt d16, h16
+; CHECK-GI-FP16-NEXT:    fcvt d0, h0
+; CHECK-GI-FP16-NEXT:    fcvt d24, h1
+; CHECK-GI-FP16-NEXT:    fcvt d1, h17
+; CHECK-GI-FP16-NEXT:    fcvt d17, h18
+; CHECK-GI-FP16-NEXT:    fcvt d18, h19
+; CHECK-GI-FP16-NEXT:    fcvt d19, h20
+; CHECK-GI-FP16-NEXT:    fcvt d20, h21
+; CHECK-GI-FP16-NEXT:    fcvt d21, h22
+; CHECK-GI-FP16-NEXT:    fcvt d22, h23
+; CHECK-GI-FP16-NEXT:    mov v2.d[1], v3.d[0]
+; CHECK-GI-FP16-NEXT:    mov v4.d[1], v5.d[0]
+; CHECK-GI-FP16-NEXT:    mov v6.d[1], v7.d[0]
+; CHECK-GI-FP16-NEXT:    mov v16.d[1], v0.d[0]
+; CHECK-GI-FP16-NEXT:    mov v24.d[1], v1.d[0]
+; CHECK-GI-FP16-NEXT:    mov v17.d[1], v18.d[0]
+; CHECK-GI-FP16-NEXT:    mov v19.d[1], v20.d[0]
+; CHECK-GI-FP16-NEXT:    mov v21.d[1], v22.d[0]
+; CHECK-GI-FP16-NEXT:    fcvtzu v0.2d, v2.2d
+; CHECK-GI-FP16-NEXT:    fcvtzu v1.2d, v4.2d
+; CHECK-GI-FP16-NEXT:    fcvtzu v2.2d, v6.2d
+; CHECK-GI-FP16-NEXT:    fcvtzu v3.2d, v16.2d
+; CHECK-GI-FP16-NEXT:    fcvtzu v4.2d, v24.2d
+; CHECK-GI-FP16-NEXT:    fcvtzu v5.2d, v17.2d
+; CHECK-GI-FP16-NEXT:    fcvtzu v6.2d, v19.2d
+; CHECK-GI-FP16-NEXT:    fcvtzu v7.2d, v21.2d
 ; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = fptoui <16 x half> %a to <16 x i64>
@@ -3676,242 +4428,158 @@ define <32 x i64> @fptos_v32f16_v32i64(<32 x half> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fptos_v32f16_v32i64:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    ext v4.16b, v1.16b, v1.16b, #8
-; CHECK-GI-NOFP16-NEXT:    ext v5.16b, v2.16b, v2.16b, #8
-; CHECK-GI-NOFP16-NEXT:    ext v6.16b, v3.16b, v3.16b, #8
-; CHECK-GI-NOFP16-NEXT:    ext v7.16b, v0.16b, v0.16b, #8
-; CHECK-GI-NOFP16-NEXT:    fcvt s21, h1
-; CHECK-GI-NOFP16-NEXT:    fcvt s22, h2
-; CHECK-GI-NOFP16-NEXT:    mov h26, v2.h[2]
-; CHECK-GI-NOFP16-NEXT:    fcvt s19, h0
-; CHECK-GI-NOFP16-NEXT:    mov h27, v3.h[2]
-; CHECK-GI-NOFP16-NEXT:    mov h20, v2.h[1]
-; CHECK-GI-NOFP16-NEXT:    mov h18, v1.h[1]
-; CHECK-GI-NOFP16-NEXT:    mov h16, v4.h[2]
-; CHECK-GI-NOFP16-NEXT:    mov h17, v5.h[2]
-; CHECK-GI-NOFP16-NEXT:    fcvt s23, h5
-; CHECK-GI-NOFP16-NEXT:    fcvt s24, h6
-; CHECK-GI-NOFP16-NEXT:    mov h25, v6.h[2]
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x9, s21
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x11, s22
-; CHECK-GI-NOFP16-NEXT:    fcvt s22, h7
-; CHECK-GI-NOFP16-NEXT:    mov h21, v3.h[3]
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x10, s19
-; CHECK-GI-NOFP16-NEXT:    fcvt s27, h27
-; CHECK-GI-NOFP16-NEXT:    fcvt s20, h20
-; CHECK-GI-NOFP16-NEXT:    fcvt s16, h16
-; CHECK-GI-NOFP16-NEXT:    fcvt s17, h17
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x12, s23
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x13, s24
-; CHECK-GI-NOFP16-NEXT:    fcvt s23, h25
-; CHECK-GI-NOFP16-NEXT:    fcvt s25, h26
-; CHECK-GI-NOFP16-NEXT:    mov h26, v3.h[1]
-; CHECK-GI-NOFP16-NEXT:    mov h24, v2.h[3]
-; CHECK-GI-NOFP16-NEXT:    fmov d19, x9
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x9, s22
-; CHECK-GI-NOFP16-NEXT:    fcvt s22, h3
-; CHECK-GI-NOFP16-NEXT:    fcvt s21, h21
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x14, s16
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x15, s17
-; CHECK-GI-NOFP16-NEXT:    fmov d2, x12
-; CHECK-GI-NOFP16-NEXT:    fmov d16, x13
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x12, s23
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x13, s25
-; CHECK-GI-NOFP16-NEXT:    mov h23, v1.h[2]
-; CHECK-GI-NOFP16-NEXT:    fcvt s25, h26
-; CHECK-GI-NOFP16-NEXT:    fcvt s24, h24
-; CHECK-GI-NOFP16-NEXT:    mov h1, v1.h[3]
-; CHECK-GI-NOFP16-NEXT:    fmov d26, x11
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x11, s21
-; CHECK-GI-NOFP16-NEXT:    fmov d3, x14
-; CHECK-GI-NOFP16-NEXT:    fmov d17, x15
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x14, s22
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x15, s27
-; CHECK-GI-NOFP16-NEXT:    mov h22, v0.h[2]
-; CHECK-GI-NOFP16-NEXT:    fcvt s18, h18
-; CHECK-GI-NOFP16-NEXT:    fcvt s21, h23
-; CHECK-GI-NOFP16-NEXT:    fmov d23, x13
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x13, s25
-; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT:    fmov d25, x14
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x14, s24
-; CHECK-GI-NOFP16-NEXT:    fmov d24, x15
-; CHECK-GI-NOFP16-NEXT:    fcvt s22, h22
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x15, s18
-; CHECK-GI-NOFP16-NEXT:    mov h18, v7.h[1]
-; CHECK-GI-NOFP16-NEXT:    mov v25.d[1], x13
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x13, s21
-; CHECK-GI-NOFP16-NEXT:    mov h21, v7.h[2]
-; CHECK-GI-NOFP16-NEXT:    mov v24.d[1], x11
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x11, s20
-; CHECK-GI-NOFP16-NEXT:    mov h20, v0.h[1]
-; CHECK-GI-NOFP16-NEXT:    mov h0, v0.h[3]
-; CHECK-GI-NOFP16-NEXT:    mov v23.d[1], x14
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x14, s1
-; CHECK-GI-NOFP16-NEXT:    mov h1, v6.h[3]
-; CHECK-GI-NOFP16-NEXT:    mov h6, v6.h[1]
-; CHECK-GI-NOFP16-NEXT:    mov v19.d[1], x15
-; CHECK-GI-NOFP16-NEXT:    mov h7, v7.h[3]
-; CHECK-GI-NOFP16-NEXT:    stp q25, q24, [x8, #192]
-; CHECK-GI-NOFP16-NEXT:    fmov d24, x13
-; CHECK-GI-NOFP16-NEXT:    fcvt s20, h20
-; CHECK-GI-NOFP16-NEXT:    mov v26.d[1], x11
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x11, s22
-; CHECK-GI-NOFP16-NEXT:    mov h22, v5.h[1]
-; CHECK-GI-NOFP16-NEXT:    mov h5, v5.h[3]
-; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT:    mov v24.d[1], x14
-; CHECK-GI-NOFP16-NEXT:    mov h25, v4.h[3]
-; CHECK-GI-NOFP16-NEXT:    fcvt s6, h6
-; CHECK-GI-NOFP16-NEXT:    stp q26, q23, [x8, #128]
-; CHECK-GI-NOFP16-NEXT:    fmov d23, x12
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x12, s20
-; CHECK-GI-NOFP16-NEXT:    mov h20, v4.h[1]
-; CHECK-GI-NOFP16-NEXT:    fcvt s5, h5
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x13, s0
-; CHECK-GI-NOFP16-NEXT:    stp q19, q24, [x8, #64]
-; CHECK-GI-NOFP16-NEXT:    fcvt s22, h22
-; CHECK-GI-NOFP16-NEXT:    fmov d0, x10
-; CHECK-GI-NOFP16-NEXT:    fmov d19, x11
-; CHECK-GI-NOFP16-NEXT:    fcvt s4, h4
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x10, s1
-; CHECK-GI-NOFP16-NEXT:    fcvt s1, h21
-; CHECK-GI-NOFP16-NEXT:    fcvt s24, h25
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x11, s6
-; CHECK-GI-NOFP16-NEXT:    fcvt s20, h20
-; CHECK-GI-NOFP16-NEXT:    fcvt s6, h7
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x14, s5
-; CHECK-GI-NOFP16-NEXT:    mov v19.d[1], x13
-; CHECK-GI-NOFP16-NEXT:    fcvt s5, h18
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x13, s22
-; CHECK-GI-NOFP16-NEXT:    mov v0.d[1], x12
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x12, s4
-; CHECK-GI-NOFP16-NEXT:    mov v23.d[1], x10
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x10, s1
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x15, s24
-; CHECK-GI-NOFP16-NEXT:    mov v16.d[1], x11
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x11, s20
-; CHECK-GI-NOFP16-NEXT:    mov v17.d[1], x14
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x14, s6
-; CHECK-GI-NOFP16-NEXT:    mov v2.d[1], x13
-; CHECK-GI-NOFP16-NEXT:    fcvtzs x13, s5
-; CHECK-GI-NOFP16-NEXT:    fmov d4, x9
-; CHECK-GI-NOFP16-NEXT:    stp q0, q19, [x8]
-; CHECK-GI-NOFP16-NEXT:    fmov d0, x12
-; CHECK-GI-NOFP16-NEXT:    stp q16, q23, [x8, #224]
-; CHECK-GI-NOFP16-NEXT:    fmov d1, x10
-; CHECK-GI-NOFP16-NEXT:    mov v3.d[1], x15
-; CHECK-GI-NOFP16-NEXT:    stp q2, q17, [x8, #160]
-; CHECK-GI-NOFP16-NEXT:    mov v0.d[1], x11
-; CHECK-GI-NOFP16-NEXT:    mov v4.d[1], x13
-; CHECK-GI-NOFP16-NEXT:    mov v1.d[1], x14
-; CHECK-GI-NOFP16-NEXT:    stp q0, q3, [x8, #96]
-; CHECK-GI-NOFP16-NEXT:    stp q4, q1, [x8, #32]
+; CHECK-GI-NOFP16-NEXT:    fcvtl v4.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v0.4s, v0.8h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v5.4s, v1.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v1.4s, v1.8h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v16.4s, v2.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v2.4s, v2.8h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v18.4s, v3.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v3.4s, v3.8h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v6.2d, v4.2s
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v4.2d, v4.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtl v7.2d, v0.2s
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v0.2d, v0.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtl v17.2d, v5.2s
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v5.2d, v5.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtl v19.2d, v1.2s
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v1.2d, v1.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtl v20.2d, v16.2s
+; CHECK-GI-NOFP16-NEXT:    fcvtzs v6.2d, v6.2d
+; CHECK-GI-NOFP16-NEXT:    fcvtzs v4.2d, v4.2d
+; CHECK-GI-NOFP16-NEXT:    fcvtzs v7.2d, v7.2d
+; CHECK-GI-NOFP16-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-GI-NOFP16-NEXT:    fcvtzs v17.2d, v17.2d
+; CHECK-GI-NOFP16-NEXT:    fcvtzs v5.2d, v5.2d
+; CHECK-GI-NOFP16-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-GI-NOFP16-NEXT:    stp q6, q4, [x8]
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v6.2d, v16.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtzs v16.2d, v19.2d
+; CHECK-GI-NOFP16-NEXT:    fcvtl v4.2d, v2.2s
+; CHECK-GI-NOFP16-NEXT:    stp q7, q0, [x8, #32]
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v2.2d, v2.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtl v0.2d, v18.2s
+; CHECK-GI-NOFP16-NEXT:    stp q17, q5, [x8, #64]
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v5.2d, v18.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtl v17.2d, v3.2s
+; CHECK-GI-NOFP16-NEXT:    fcvtzs v7.2d, v20.2d
+; CHECK-GI-NOFP16-NEXT:    stp q16, q1, [x8, #96]
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v1.2d, v3.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtzs v6.2d, v6.2d
+; CHECK-GI-NOFP16-NEXT:    fcvtzs v4.2d, v4.2d
+; CHECK-GI-NOFP16-NEXT:    fcvtzs v2.2d, v2.2d
+; CHECK-GI-NOFP16-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-GI-NOFP16-NEXT:    fcvtzs v3.2d, v5.2d
+; CHECK-GI-NOFP16-NEXT:    fcvtzs v5.2d, v17.2d
+; CHECK-GI-NOFP16-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-GI-NOFP16-NEXT:    stp q7, q6, [x8, #128]
+; CHECK-GI-NOFP16-NEXT:    stp q4, q2, [x8, #160]
+; CHECK-GI-NOFP16-NEXT:    stp q0, q3, [x8, #192]
+; CHECK-GI-NOFP16-NEXT:    stp q5, q1, [x8, #224]
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fptos_v32f16_v32i64:
 ; CHECK-GI-FP16:       // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT:    ext v4.16b, v1.16b, v1.16b, #8
-; CHECK-GI-FP16-NEXT:    ext v5.16b, v2.16b, v2.16b, #8
-; CHECK-GI-FP16-NEXT:    ext v6.16b, v3.16b, v3.16b, #8
-; CHECK-GI-FP16-NEXT:    mov h16, v3.h[2]
-; CHECK-GI-FP16-NEXT:    fcvtzs x9, h0
-; CHECK-GI-FP16-NEXT:    mov h23, v3.h[3]
-; CHECK-GI-FP16-NEXT:    mov h25, v3.h[1]
-; CHECK-GI-FP16-NEXT:    fcvtzs x15, h3
-; CHECK-GI-FP16-NEXT:    mov h24, v2.h[2]
-; CHECK-GI-FP16-NEXT:    mov h19, v1.h[2]
-; CHECK-GI-FP16-NEXT:    mov h21, v2.h[1]
-; CHECK-GI-FP16-NEXT:    mov h26, v2.h[3]
-; CHECK-GI-FP16-NEXT:    mov h17, v4.h[2]
-; CHECK-GI-FP16-NEXT:    mov h18, v5.h[2]
-; CHECK-GI-FP16-NEXT:    mov h22, v6.h[2]
-; CHECK-GI-FP16-NEXT:    fcvtzs x10, h5
-; CHECK-GI-FP16-NEXT:    fcvtzs x12, h16
-; CHECK-GI-FP16-NEXT:    fcvtzs x11, h6
-; CHECK-GI-FP16-NEXT:    mov h7, v1.h[1]
-; CHECK-GI-FP16-NEXT:    mov h20, v1.h[3]
-; CHECK-GI-FP16-NEXT:    fcvtzs x13, h17
-; CHECK-GI-FP16-NEXT:    fcvtzs x14, h18
-; CHECK-GI-FP16-NEXT:    fmov d18, x9
-; CHECK-GI-FP16-NEXT:    fcvtzs x9, h22
-; CHECK-GI-FP16-NEXT:    fmov d3, x10
-; CHECK-GI-FP16-NEXT:    fcvtzs x10, h23
-; CHECK-GI-FP16-NEXT:    fmov d22, x12
-; CHECK-GI-FP16-NEXT:    fcvtzs x12, h25
-; CHECK-GI-FP16-NEXT:    fmov d23, x15
-; CHECK-GI-FP16-NEXT:    fmov d16, x11
-; CHECK-GI-FP16-NEXT:    fcvtzs x11, h2
-; CHECK-GI-FP16-NEXT:    fcvtzs x15, h21
-; CHECK-GI-FP16-NEXT:    fmov d2, x13
-; CHECK-GI-FP16-NEXT:    fcvtzs x13, h24
-; CHECK-GI-FP16-NEXT:    fmov d17, x14
-; CHECK-GI-FP16-NEXT:    fcvtzs x14, h19
-; CHECK-GI-FP16-NEXT:    mov v22.d[1], x10
-; CHECK-GI-FP16-NEXT:    fcvtzs x10, h1
-; CHECK-GI-FP16-NEXT:    mov v23.d[1], x12
-; CHECK-GI-FP16-NEXT:    fmov d19, x9
-; CHECK-GI-FP16-NEXT:    fcvtzs x9, h26
-; CHECK-GI-FP16-NEXT:    fcvtzs x12, h20
-; CHECK-GI-FP16-NEXT:    mov h20, v0.h[2]
-; CHECK-GI-FP16-NEXT:    fmov d21, x11
-; CHECK-GI-FP16-NEXT:    fmov d1, x13
-; CHECK-GI-FP16-NEXT:    fcvtzs x13, h7
-; CHECK-GI-FP16-NEXT:    mov h24, v0.h[3]
-; CHECK-GI-FP16-NEXT:    fmov d7, x14
-; CHECK-GI-FP16-NEXT:    stp q23, q22, [x8, #192]
-; CHECK-GI-FP16-NEXT:    fmov d22, x10
-; CHECK-GI-FP16-NEXT:    mov v21.d[1], x15
-; CHECK-GI-FP16-NEXT:    mov v1.d[1], x9
-; CHECK-GI-FP16-NEXT:    mov h23, v0.h[1]
-; CHECK-GI-FP16-NEXT:    fcvtzs x9, h20
-; CHECK-GI-FP16-NEXT:    mov v7.d[1], x12
-; CHECK-GI-FP16-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-GI-FP16-NEXT:    mov h20, v6.h[3]
-; CHECK-GI-FP16-NEXT:    mov v22.d[1], x13
-; CHECK-GI-FP16-NEXT:    mov h6, v6.h[1]
-; CHECK-GI-FP16-NEXT:    fcvtzs x10, h24
-; CHECK-GI-FP16-NEXT:    stp q21, q1, [x8, #128]
-; CHECK-GI-FP16-NEXT:    mov h1, v5.h[1]
-; CHECK-GI-FP16-NEXT:    mov h5, v5.h[3]
-; CHECK-GI-FP16-NEXT:    fcvtzs x12, h20
-; CHECK-GI-FP16-NEXT:    mov h20, v0.h[2]
-; CHECK-GI-FP16-NEXT:    fcvtzs x11, h0
-; CHECK-GI-FP16-NEXT:    stp q22, q7, [x8, #64]
-; CHECK-GI-FP16-NEXT:    fmov d7, x9
-; CHECK-GI-FP16-NEXT:    fcvtzs x9, h23
-; CHECK-GI-FP16-NEXT:    mov h21, v4.h[3]
-; CHECK-GI-FP16-NEXT:    mov h22, v4.h[1]
-; CHECK-GI-FP16-NEXT:    fcvtzs x13, h6
+; CHECK-GI-FP16-NEXT:    mov h4, v0.h[1]
+; CHECK-GI-FP16-NEXT:    mov h5, v0.h[2]
 ; CHECK-GI-FP16-NEXT:    mov h6, v0.h[3]
-; CHECK-GI-FP16-NEXT:    fcvtzs x14, h5
-; CHECK-GI-FP16-NEXT:    mov h0, v0.h[1]
-; CHECK-GI-FP16-NEXT:    mov v7.d[1], x10
-; CHECK-GI-FP16-NEXT:    fcvtzs x10, h1
-; CHECK-GI-FP16-NEXT:    mov v19.d[1], x12
-; CHECK-GI-FP16-NEXT:    mov v18.d[1], x9
-; CHECK-GI-FP16-NEXT:    fcvtzs x9, h4
-; CHECK-GI-FP16-NEXT:    fcvtzs x12, h20
-; CHECK-GI-FP16-NEXT:    fcvtzs x15, h21
-; CHECK-GI-FP16-NEXT:    mov v16.d[1], x13
-; CHECK-GI-FP16-NEXT:    fcvtzs x13, h22
-; CHECK-GI-FP16-NEXT:    mov v17.d[1], x14
-; CHECK-GI-FP16-NEXT:    fcvtzs x14, h6
-; CHECK-GI-FP16-NEXT:    fmov d4, x11
-; CHECK-GI-FP16-NEXT:    mov v3.d[1], x10
-; CHECK-GI-FP16-NEXT:    fcvtzs x10, h0
-; CHECK-GI-FP16-NEXT:    stp q18, q7, [x8]
-; CHECK-GI-FP16-NEXT:    fmov d0, x9
-; CHECK-GI-FP16-NEXT:    fmov d1, x12
-; CHECK-GI-FP16-NEXT:    stp q16, q19, [x8, #224]
-; CHECK-GI-FP16-NEXT:    mov v2.d[1], x15
-; CHECK-GI-FP16-NEXT:    stp q3, q17, [x8, #160]
-; CHECK-GI-FP16-NEXT:    mov v0.d[1], x13
-; CHECK-GI-FP16-NEXT:    mov v1.d[1], x14
-; CHECK-GI-FP16-NEXT:    mov v4.d[1], x10
-; CHECK-GI-FP16-NEXT:    stp q0, q2, [x8, #96]
-; CHECK-GI-FP16-NEXT:    stp q4, q1, [x8, #32]
+; CHECK-GI-FP16-NEXT:    mov h17, v0.h[4]
+; CHECK-GI-FP16-NEXT:    mov h18, v0.h[5]
+; CHECK-GI-FP16-NEXT:    mov h19, v0.h[6]
+; CHECK-GI-FP16-NEXT:    mov h20, v0.h[7]
+; CHECK-GI-FP16-NEXT:    mov h21, v1.h[1]
+; CHECK-GI-FP16-NEXT:    fcvt d16, h0
+; CHECK-GI-FP16-NEXT:    fcvt d0, h1
+; CHECK-GI-FP16-NEXT:    mov h23, v2.h[2]
+; CHECK-GI-FP16-NEXT:    mov h24, v2.h[3]
+; CHECK-GI-FP16-NEXT:    fcvt d4, h4
+; CHECK-GI-FP16-NEXT:    fcvt d7, h5
+; CHECK-GI-FP16-NEXT:    fcvt d22, h6
+; CHECK-GI-FP16-NEXT:    fcvt d6, h17
+; CHECK-GI-FP16-NEXT:    fcvt d17, h18
+; CHECK-GI-FP16-NEXT:    fcvt d5, h19
+; CHECK-GI-FP16-NEXT:    fcvt d18, h20
+; CHECK-GI-FP16-NEXT:    fcvt d19, h21
+; CHECK-GI-FP16-NEXT:    mov h20, v1.h[2]
+; CHECK-GI-FP16-NEXT:    mov h21, v1.h[3]
+; CHECK-GI-FP16-NEXT:    mov h25, v2.h[4]
+; CHECK-GI-FP16-NEXT:    mov h26, v2.h[5]
+; CHECK-GI-FP16-NEXT:    mov v16.d[1], v4.d[0]
+; CHECK-GI-FP16-NEXT:    mov v7.d[1], v22.d[0]
+; CHECK-GI-FP16-NEXT:    mov h22, v2.h[1]
+; CHECK-GI-FP16-NEXT:    mov v6.d[1], v17.d[0]
+; CHECK-GI-FP16-NEXT:    mov h17, v1.h[4]
+; CHECK-GI-FP16-NEXT:    mov h27, v2.h[6]
+; CHECK-GI-FP16-NEXT:    mov v5.d[1], v18.d[0]
+; CHECK-GI-FP16-NEXT:    mov v0.d[1], v19.d[0]
+; CHECK-GI-FP16-NEXT:    fcvt d4, h20
+; CHECK-GI-FP16-NEXT:    mov h18, v1.h[5]
+; CHECK-GI-FP16-NEXT:    mov h19, v1.h[6]
+; CHECK-GI-FP16-NEXT:    mov h20, v1.h[7]
+; CHECK-GI-FP16-NEXT:    fcvt d21, h21
+; CHECK-GI-FP16-NEXT:    mov h28, v2.h[7]
+; CHECK-GI-FP16-NEXT:    fcvt d22, h22
+; CHECK-GI-FP16-NEXT:    fcvt d1, h17
+; CHECK-GI-FP16-NEXT:    fcvt d17, h23
+; CHECK-GI-FP16-NEXT:    fcvt d23, h24
+; CHECK-GI-FP16-NEXT:    fcvtzs v16.2d, v16.2d
+; CHECK-GI-FP16-NEXT:    fcvtzs v7.2d, v7.2d
+; CHECK-GI-FP16-NEXT:    fcvtzs v6.2d, v6.2d
+; CHECK-GI-FP16-NEXT:    fcvt d29, h18
+; CHECK-GI-FP16-NEXT:    fcvt d19, h19
+; CHECK-GI-FP16-NEXT:    fcvt d30, h20
+; CHECK-GI-FP16-NEXT:    fcvt d20, h2
+; CHECK-GI-FP16-NEXT:    fcvtzs v5.2d, v5.2d
+; CHECK-GI-FP16-NEXT:    fcvt d18, h25
+; CHECK-GI-FP16-NEXT:    fcvt d24, h26
+; CHECK-GI-FP16-NEXT:    fcvt d2, h27
+; CHECK-GI-FP16-NEXT:    fcvt d25, h28
+; CHECK-GI-FP16-NEXT:    stp q16, q7, [x8]
+; CHECK-GI-FP16-NEXT:    mov v4.d[1], v21.d[0]
+; CHECK-GI-FP16-NEXT:    mov v17.d[1], v23.d[0]
+; CHECK-GI-FP16-NEXT:    mov v1.d[1], v29.d[0]
+; CHECK-GI-FP16-NEXT:    mov v19.d[1], v30.d[0]
+; CHECK-GI-FP16-NEXT:    mov h21, v3.h[1]
+; CHECK-GI-FP16-NEXT:    stp q6, q5, [x8, #32]
+; CHECK-GI-FP16-NEXT:    mov v20.d[1], v22.d[0]
+; CHECK-GI-FP16-NEXT:    mov h16, v3.h[2]
+; CHECK-GI-FP16-NEXT:    mov h7, v3.h[3]
+; CHECK-GI-FP16-NEXT:    mov h22, v3.h[4]
+; CHECK-GI-FP16-NEXT:    mov h23, v3.h[5]
+; CHECK-GI-FP16-NEXT:    mov h6, v3.h[6]
+; CHECK-GI-FP16-NEXT:    mov h5, v3.h[7]
+; CHECK-GI-FP16-NEXT:    mov v18.d[1], v24.d[0]
+; CHECK-GI-FP16-NEXT:    mov v2.d[1], v25.d[0]
+; CHECK-GI-FP16-NEXT:    fcvt d3, h3
+; CHECK-GI-FP16-NEXT:    fcvt d21, h21
+; CHECK-GI-FP16-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-GI-FP16-NEXT:    fcvt d16, h16
+; CHECK-GI-FP16-NEXT:    fcvtzs v4.2d, v4.2d
+; CHECK-GI-FP16-NEXT:    fcvt d7, h7
+; CHECK-GI-FP16-NEXT:    fcvt d22, h22
+; CHECK-GI-FP16-NEXT:    fcvt d23, h23
+; CHECK-GI-FP16-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-GI-FP16-NEXT:    fcvt d6, h6
+; CHECK-GI-FP16-NEXT:    fcvt d5, h5
+; CHECK-GI-FP16-NEXT:    fcvtzs v19.2d, v19.2d
+; CHECK-GI-FP16-NEXT:    mov v3.d[1], v21.d[0]
+; CHECK-GI-FP16-NEXT:    fcvtzs v20.2d, v20.2d
+; CHECK-GI-FP16-NEXT:    stp q0, q4, [x8, #64]
+; CHECK-GI-FP16-NEXT:    fcvtzs v0.2d, v17.2d
+; CHECK-GI-FP16-NEXT:    fcvtzs v4.2d, v18.2d
+; CHECK-GI-FP16-NEXT:    mov v16.d[1], v7.d[0]
+; CHECK-GI-FP16-NEXT:    mov v22.d[1], v23.d[0]
+; CHECK-GI-FP16-NEXT:    mov v6.d[1], v5.d[0]
+; CHECK-GI-FP16-NEXT:    stp q1, q19, [x8, #96]
+; CHECK-GI-FP16-NEXT:    fcvtzs v1.2d, v2.2d
+; CHECK-GI-FP16-NEXT:    fcvtzs v2.2d, v3.2d
+; CHECK-GI-FP16-NEXT:    stp q20, q0, [x8, #128]
+; CHECK-GI-FP16-NEXT:    fcvtzs v0.2d, v16.2d
+; CHECK-GI-FP16-NEXT:    fcvtzs v3.2d, v22.2d
+; CHECK-GI-FP16-NEXT:    stp q4, q1, [x8, #160]
+; CHECK-GI-FP16-NEXT:    fcvtzs v1.2d, v6.2d
+; CHECK-GI-FP16-NEXT:    stp q2, q0, [x8, #192]
+; CHECK-GI-FP16-NEXT:    stp q3, q1, [x8, #224]
 ; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = fptosi <32 x half> %a to <32 x i64>
@@ -4161,242 +4829,158 @@ define <32 x i64> @fptou_v32f16_v32i64(<32 x half> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fptou_v32f16_v32i64:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    ext v4.16b, v1.16b, v1.16b, #8
-; CHECK-GI-NOFP16-NEXT:    ext v5.16b, v2.16b, v2.16b, #8
-; CHECK-GI-NOFP16-NEXT:    ext v6.16b, v3.16b, v3.16b, #8
-; CHECK-GI-NOFP16-NEXT:    ext v7.16b, v0.16b, v0.16b, #8
-; CHECK-GI-NOFP16-NEXT:    fcvt s21, h1
-; CHECK-GI-NOFP16-NEXT:    fcvt s22, h2
-; CHECK-GI-NOFP16-NEXT:    mov h26, v2.h[2]
-; CHECK-GI-NOFP16-NEXT:    fcvt s19, h0
-; CHECK-GI-NOFP16-NEXT:    mov h27, v3.h[2]
-; CHECK-GI-NOFP16-NEXT:    mov h20, v2.h[1]
-; CHECK-GI-NOFP16-NEXT:    mov h18, v1.h[1]
-; CHECK-GI-NOFP16-NEXT:    mov h16, v4.h[2]
-; CHECK-GI-NOFP16-NEXT:    mov h17, v5.h[2]
-; CHECK-GI-NOFP16-NEXT:    fcvt s23, h5
-; CHECK-GI-NOFP16-NEXT:    fcvt s24, h6
-; CHECK-GI-NOFP16-NEXT:    mov h25, v6.h[2]
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x9, s21
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x11, s22
-; CHECK-GI-NOFP16-NEXT:    fcvt s22, h7
-; CHECK-GI-NOFP16-NEXT:    mov h21, v3.h[3]
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x10, s19
-; CHECK-GI-NOFP16-NEXT:    fcvt s27, h27
-; CHECK-GI-NOFP16-NEXT:    fcvt s20, h20
-; CHECK-GI-NOFP16-NEXT:    fcvt s16, h16
-; CHECK-GI-NOFP16-NEXT:    fcvt s17, h17
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x12, s23
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x13, s24
-; CHECK-GI-NOFP16-NEXT:    fcvt s23, h25
-; CHECK-GI-NOFP16-NEXT:    fcvt s25, h26
-; CHECK-GI-NOFP16-NEXT:    mov h26, v3.h[1]
-; CHECK-GI-NOFP16-NEXT:    mov h24, v2.h[3]
-; CHECK-GI-NOFP16-NEXT:    fmov d19, x9
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x9, s22
-; CHECK-GI-NOFP16-NEXT:    fcvt s22, h3
-; CHECK-GI-NOFP16-NEXT:    fcvt s21, h21
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x14, s16
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x15, s17
-; CHECK-GI-NOFP16-NEXT:    fmov d2, x12
-; CHECK-GI-NOFP16-NEXT:    fmov d16, x13
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x12, s23
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x13, s25
-; CHECK-GI-NOFP16-NEXT:    mov h23, v1.h[2]
-; CHECK-GI-NOFP16-NEXT:    fcvt s25, h26
-; CHECK-GI-NOFP16-NEXT:    fcvt s24, h24
-; CHECK-GI-NOFP16-NEXT:    mov h1, v1.h[3]
-; CHECK-GI-NOFP16-NEXT:    fmov d26, x11
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x11, s21
-; CHECK-GI-NOFP16-NEXT:    fmov d3, x14
-; CHECK-GI-NOFP16-NEXT:    fmov d17, x15
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x14, s22
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x15, s27
-; CHECK-GI-NOFP16-NEXT:    mov h22, v0.h[2]
-; CHECK-GI-NOFP16-NEXT:    fcvt s18, h18
-; CHECK-GI-NOFP16-NEXT:    fcvt s21, h23
-; CHECK-GI-NOFP16-NEXT:    fmov d23, x13
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x13, s25
-; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT:    fmov d25, x14
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x14, s24
-; CHECK-GI-NOFP16-NEXT:    fmov d24, x15
-; CHECK-GI-NOFP16-NEXT:    fcvt s22, h22
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x15, s18
-; CHECK-GI-NOFP16-NEXT:    mov h18, v7.h[1]
-; CHECK-GI-NOFP16-NEXT:    mov v25.d[1], x13
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x13, s21
-; CHECK-GI-NOFP16-NEXT:    mov h21, v7.h[2]
-; CHECK-GI-NOFP16-NEXT:    mov v24.d[1], x11
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x11, s20
-; CHECK-GI-NOFP16-NEXT:    mov h20, v0.h[1]
-; CHECK-GI-NOFP16-NEXT:    mov h0, v0.h[3]
-; CHECK-GI-NOFP16-NEXT:    mov v23.d[1], x14
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x14, s1
-; CHECK-GI-NOFP16-NEXT:    mov h1, v6.h[3]
-; CHECK-GI-NOFP16-NEXT:    mov h6, v6.h[1]
-; CHECK-GI-NOFP16-NEXT:    mov v19.d[1], x15
-; CHECK-GI-NOFP16-NEXT:    mov h7, v7.h[3]
-; CHECK-GI-NOFP16-NEXT:    stp q25, q24, [x8, #192]
-; CHECK-GI-NOFP16-NEXT:    fmov d24, x13
-; CHECK-GI-NOFP16-NEXT:    fcvt s20, h20
-; CHECK-GI-NOFP16-NEXT:    mov v26.d[1], x11
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x11, s22
-; CHECK-GI-NOFP16-NEXT:    mov h22, v5.h[1]
-; CHECK-GI-NOFP16-NEXT:    mov h5, v5.h[3]
-; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT:    mov v24.d[1], x14
-; CHECK-GI-NOFP16-NEXT:    mov h25, v4.h[3]
-; CHECK-GI-NOFP16-NEXT:    fcvt s6, h6
-; CHECK-GI-NOFP16-NEXT:    stp q26, q23, [x8, #128]
-; CHECK-GI-NOFP16-NEXT:    fmov d23, x12
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x12, s20
-; CHECK-GI-NOFP16-NEXT:    mov h20, v4.h[1]
-; CHECK-GI-NOFP16-NEXT:    fcvt s5, h5
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x13, s0
-; CHECK-GI-NOFP16-NEXT:    stp q19, q24, [x8, #64]
-; CHECK-GI-NOFP16-NEXT:    fcvt s22, h22
-; CHECK-GI-NOFP16-NEXT:    fmov d0, x10
-; CHECK-GI-NOFP16-NEXT:    fmov d19, x11
-; CHECK-GI-NOFP16-NEXT:    fcvt s4, h4
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x10, s1
-; CHECK-GI-NOFP16-NEXT:    fcvt s1, h21
-; CHECK-GI-NOFP16-NEXT:    fcvt s24, h25
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x11, s6
-; CHECK-GI-NOFP16-NEXT:    fcvt s20, h20
-; CHECK-GI-NOFP16-NEXT:    fcvt s6, h7
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x14, s5
-; CHECK-GI-NOFP16-NEXT:    mov v19.d[1], x13
-; CHECK-GI-NOFP16-NEXT:    fcvt s5, h18
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x13, s22
-; CHECK-GI-NOFP16-NEXT:    mov v0.d[1], x12
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x12, s4
-; CHECK-GI-NOFP16-NEXT:    mov v23.d[1], x10
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x10, s1
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x15, s24
-; CHECK-GI-NOFP16-NEXT:    mov v16.d[1], x11
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x11, s20
-; CHECK-GI-NOFP16-NEXT:    mov v17.d[1], x14
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x14, s6
-; CHECK-GI-NOFP16-NEXT:    mov v2.d[1], x13
-; CHECK-GI-NOFP16-NEXT:    fcvtzu x13, s5
-; CHECK-GI-NOFP16-NEXT:    fmov d4, x9
-; CHECK-GI-NOFP16-NEXT:    stp q0, q19, [x8]
-; CHECK-GI-NOFP16-NEXT:    fmov d0, x12
-; CHECK-GI-NOFP16-NEXT:    stp q16, q23, [x8, #224]
-; CHECK-GI-NOFP16-NEXT:    fmov d1, x10
-; CHECK-GI-NOFP16-NEXT:    mov v3.d[1], x15
-; CHECK-GI-NOFP16-NEXT:    stp q2, q17, [x8, #160]
-; CHECK-GI-NOFP16-NEXT:    mov v0.d[1], x11
-; CHECK-GI-NOFP16-NEXT:    mov v4.d[1], x13
-; CHECK-GI-NOFP16-NEXT:    mov v1.d[1], x14
-; CHECK-GI-NOFP16-NEXT:    stp q0, q3, [x8, #96]
-; CHECK-GI-NOFP16-NEXT:    stp q4, q1, [x8, #32]
+; CHECK-GI-NOFP16-NEXT:    fcvtl v4.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v0.4s, v0.8h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v5.4s, v1.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v1.4s, v1.8h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v16.4s, v2.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v2.4s, v2.8h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v18.4s, v3.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v3.4s, v3.8h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v6.2d, v4.2s
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v4.2d, v4.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtl v7.2d, v0.2s
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v0.2d, v0.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtl v17.2d, v5.2s
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v5.2d, v5.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtl v19.2d, v1.2s
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v1.2d, v1.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtl v20.2d, v16.2s
+; CHECK-GI-NOFP16-NEXT:    fcvtzu v6.2d, v6.2d
+; CHECK-GI-NOFP16-NEXT:    fcvtzu v4.2d, v4.2d
+; CHECK-GI-NOFP16-NEXT:    fcvtzu v7.2d, v7.2d
+; CHECK-GI-NOFP16-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-GI-NOFP16-NEXT:    fcvtzu v17.2d, v17.2d
+; CHECK-GI-NOFP16-NEXT:    fcvtzu v5.2d, v5.2d
+; CHECK-GI-NOFP16-NEXT:    fcvtzu v1.2d, v1.2d
+; CHECK-GI-NOFP16-NEXT:    stp q6, q4, [x8]
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v6.2d, v16.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtzu v16.2d, v19.2d
+; CHECK-GI-NOFP16-NEXT:    fcvtl v4.2d, v2.2s
+; CHECK-GI-NOFP16-NEXT:    stp q7, q0, [x8, #32]
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v2.2d, v2.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtl v0.2d, v18.2s
+; CHECK-GI-NOFP16-NEXT:    stp q17, q5, [x8, #64]
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v5.2d, v18.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtl v17.2d, v3.2s
+; CHECK-GI-NOFP16-NEXT:    fcvtzu v7.2d, v20.2d
+; CHECK-GI-NOFP16-NEXT:    stp q16, q1, [x8, #96]
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v1.2d, v3.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtzu v6.2d, v6.2d
+; CHECK-GI-NOFP16-NEXT:    fcvtzu v4.2d, v4.2d
+; CHECK-GI-NOFP16-NEXT:    fcvtzu v2.2d, v2.2d
+; CHECK-GI-NOFP16-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-GI-NOFP16-NEXT:    fcvtzu v3.2d, v5.2d
+; CHECK-GI-NOFP16-NEXT:    fcvtzu v5.2d, v17.2d
+; CHECK-GI-NOFP16-NEXT:    fcvtzu v1.2d, v1.2d
+; CHECK-GI-NOFP16-NEXT:    stp q7, q6, [x8, #128]
+; CHECK-GI-NOFP16-NEXT:    stp q4, q2, [x8, #160]
+; CHECK-GI-NOFP16-NEXT:    stp q0, q3, [x8, #192]
+; CHECK-GI-NOFP16-NEXT:    stp q5, q1, [x8, #224]
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fptou_v32f16_v32i64:
 ; CHECK-GI-FP16:       // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT:    ext v4.16b, v1.16b, v1.16b, #8
-; CHECK-GI-FP16-NEXT:    ext v5.16b, v2.16b, v2.16b, #8
-; CHECK-GI-FP16-NEXT:    ext v6.16b, v3.16b, v3.16b, #8
-; CHECK-GI-FP16-NEXT:    mov h16, v3.h[2]
-; CHECK-GI-FP16-NEXT:    fcvtzu x9, h0
-; CHECK-GI-FP16-NEXT:    mov h23, v3.h[3]
-; CHECK-GI-FP16-NEXT:    mov h25, v3.h[1]
-; CHECK-GI-FP16-NEXT:    fcvtzu x15, h3
-; CHECK-GI-FP16-NEXT:    mov h24, v2.h[2]
-; CHECK-GI-FP16-NEXT:    mov h19, v1.h[2]
-; CHECK-GI-FP16-NEXT:    mov h21, v2.h[1]
-; CHECK-GI-FP16-NEXT:    mov h26, v2.h[3]
-; CHECK-GI-FP16-NEXT:    mov h17, v4.h[2]
-; CHECK-GI-FP16-NEXT:    mov h18, v5.h[2]
-; CHECK-GI-FP16-NEXT:    mov h22, v6.h[2]
-; CHECK-GI-FP16-NEXT:    fcvtzu x10, h5
-; CHECK-GI-FP16-NEXT:    fcvtzu x12, h16
-; CHECK-GI-FP16-NEXT:    fcvtzu x11, h6
-; CHECK-GI-FP16-NEXT:    mov h7, v1.h[1]
-; CHECK-GI-FP16-NEXT:    mov h20, v1.h[3]
-; CHECK-GI-FP16-NEXT:    fcvtzu x13, h17
-; CHECK-GI-FP16-NEXT:    fcvtzu x14, h18
-; CHECK-GI-FP16-NEXT:    fmov d18, x9
-; CHECK-GI-FP16-NEXT:    fcvtzu x9, h22
-; CHECK-GI-FP16-NEXT:    fmov d3, x10
-; CHECK-GI-FP16-NEXT:    fcvtzu x10, h23
-; CHECK-GI-FP16-NEXT:    fmov d22, x12
-; CHECK-GI-FP16-NEXT:    fcvtzu x12, h25
-; CHECK-GI-FP16-NEXT:    fmov d23, x15
-; CHECK-GI-FP16-NEXT:    fmov d16, x11
-; CHECK-GI-FP16-NEXT:    fcvtzu x11, h2
-; CHECK-GI-FP16-NEXT:    fcvtzu x15, h21
-; CHECK-GI-FP16-NEXT:    fmov d2, x13
-; CHECK-GI-FP16-NEXT:    fcvtzu x13, h24
-; CHECK-GI-FP16-NEXT:    fmov d17, x14
-; CHECK-GI-FP16-NEXT:    fcvtzu x14, h19
-; CHECK-GI-FP16-NEXT:    mov v22.d[1], x10
-; CHECK-GI-FP16-NEXT:    fcvtzu x10, h1
-; CHECK-GI-FP16-NEXT:    mov v23.d[1], x12
-; CHECK-GI-FP16-NEXT:    fmov d19, x9
-; CHECK-GI-FP16-NEXT:    fcvtzu x9, h26
-; CHECK-GI-FP16-NEXT:    fcvtzu x12, h20
-; CHECK-GI-FP16-NEXT:    mov h20, v0.h[2]
-; CHECK-GI-FP16-NEXT:    fmov d21, x11
-; CHECK-GI-FP16-NEXT:    fmov d1, x13
-; CHECK-GI-FP16-NEXT:    fcvtzu x13, h7
-; CHECK-GI-FP16-NEXT:    mov h24, v0.h[3]
-; CHECK-GI-FP16-NEXT:    fmov d7, x14
-; CHECK-GI-FP16-NEXT:    stp q23, q22, [x8, #192]
-; CHECK-GI-FP16-NEXT:    fmov d22, x10
-; CHECK-GI-FP16-NEXT:    mov v21.d[1], x15
-; CHECK-GI-FP16-NEXT:    mov v1.d[1], x9
-; CHECK-GI-FP16-NEXT:    mov h23, v0.h[1]
-; CHECK-GI-FP16-NEXT:    fcvtzu x9, h20
-; CHECK-GI-FP16-NEXT:    mov v7.d[1], x12
-; CHECK-GI-FP16-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-GI-FP16-NEXT:    mov h20, v6.h[3]
-; CHECK-GI-FP16-NEXT:    mov v22.d[1], x13
-; CHECK-GI-FP16-NEXT:    mov h6, v6.h[1]
-; CHECK-GI-FP16-NEXT:    fcvtzu x10, h24
-; CHECK-GI-FP16-NEXT:    stp q21, q1, [x8, #128]
-; CHECK-GI-FP16-NEXT:    mov h1, v5.h[1]
-; CHECK-GI-FP16-NEXT:    mov h5, v5.h[3]
-; CHECK-GI-FP16-NEXT:    fcvtzu x12, h20
-; CHECK-GI-FP16-NEXT:    mov h20, v0.h[2]
-; CHECK-GI-FP16-NEXT:    fcvtzu x11, h0
-; CHECK-GI-FP16-NEXT:    stp q22, q7, [x8, #64]
-; CHECK-GI-FP16-NEXT:    fmov d7, x9
-; CHECK-GI-FP16-NEXT:    fcvtzu x9, h23
-; CHECK-GI-FP16-NEXT:    mov h21, v4.h[3]
-; CHECK-GI-FP16-NEXT:    mov h22, v4.h[1]
-; CHECK-GI-FP16-NEXT:    fcvtzu x13, h6
+; CHECK-GI-FP16-NEXT:    mov h4, v0.h[1]
+; CHECK-GI-FP16-NEXT:    mov h5, v0.h[2]
 ; CHECK-GI-FP16-NEXT:    mov h6, v0.h[3]
-; CHECK-GI-FP16-NEXT:    fcvtzu x14, h5
-; CHECK-GI-FP16-NEXT:    mov h0, v0.h[1]
-; CHECK-GI-FP16-NEXT:    mov v7.d[1], x10
-; CHECK-GI-FP16-NEXT:    fcvtzu x10, h1
-; CHECK-GI-FP16-NEXT:    mov v19.d[1], x12
-; CHECK-GI-FP16-NEXT:    mov v18.d[1], x9
-; CHECK-GI-FP16-NEXT:    fcvtzu x9, h4
-; CHECK-GI-FP16-NEXT:    fcvtzu x12, h20
-; CHECK-GI-FP16-NEXT:    fcvtzu x15, h21
-; CHECK-GI-FP16-NEXT:    mov v16.d[1], x13
-; CHECK-GI-FP16-NEXT:    fcvtzu x13, h22
-; CHECK-GI-FP16-NEXT:    mov v17.d[1], x14
-; CHECK-GI-FP16-NEXT:    fcvtzu x14, h6
-; CHECK-GI-FP16-NEXT:    fmov d4, x11
-; CHECK-GI-FP16-NEXT:    mov v3.d[1], x10
-; CHECK-GI-FP16-NEXT:    fcvtzu x10, h0
-; CHECK-GI-FP16-NEXT:    stp q18, q7, [x8]
-; CHECK-GI-FP16-NEXT:    fmov d0, x9
-; CHECK-GI-FP16-NEXT:    fmov d1, x12
-; CHECK-GI-FP16-NEXT:    stp q16, q19, [x8, #224]
-; CHECK-GI-FP16-NEXT:    mov v2.d[1], x15
-; CHECK-GI-FP16-NEXT:    stp q3, q17, [x8, #160]
-; CHECK-GI-FP16-NEXT:    mov v0.d[1], x13
-; CHECK-GI-FP16-NEXT:    mov v1.d[1], x14
-; CHECK-GI-FP16-NEXT:    mov v4.d[1], x10
-; CHECK-GI-FP16-NEXT:    stp q0, q2, [x8, #96]
-; CHECK-GI-FP16-NEXT:    stp q4, q1, [x8, #32]
+; CHECK-GI-FP16-NEXT:    mov h17, v0.h[4]
+; CHECK-GI-FP16-NEXT:    mov h18, v0.h[5]
+; CHECK-GI-FP16-NEXT:    mov h19, v0.h[6]
+; CHECK-GI-FP16-NEXT:    mov h20, v0.h[7]
+; CHECK-GI-FP16-NEXT:    mov h21, v1.h[1]
+; CHECK-GI-FP16-NEXT:    fcvt d16, h0
+; CHECK-GI-FP16-NEXT:    fcvt d0, h1
+; CHECK-GI-FP16-NEXT:    mov h23, v2.h[2]
+; CHECK-GI-FP16-NEXT:    mov h24, v2.h[3]
+; CHECK-GI-FP16-NEXT:    fcvt d4, h4
+; CHECK-GI-FP16-NEXT:    fcvt d7, h5
+; CHECK-GI-FP16-NEXT:    fcvt d22, h6
+; CHECK-GI-FP16-NEXT:    fcvt d6, h17
+; CHECK-GI-FP16-NEXT:    fcvt d17, h18
+; CHECK-GI-FP16-NEXT:    fcvt d5, h19
+; CHECK-GI-FP16-NEXT:    fcvt d18, h20
+; CHECK-GI-FP16-NEXT:    fcvt d19, h21
+; CHECK-GI-FP16-NEXT:    mov h20, v1.h[2]
+; CHECK-GI-FP16-NEXT:    mov h21, v1.h[3]
+; CHECK-GI-FP16-NEXT:    mov h25, v2.h[4]
+; CHECK-GI-FP16-NEXT:    mov h26, v2.h[5]
+; CHECK-GI-FP16-NEXT:    mov v16.d[1], v4.d[0]
+; CHECK-GI-FP16-NEXT:    mov v7.d[1], v22.d[0]
+; CHECK-GI-FP16-NEXT:    mov h22, v2.h[1]
+; CHECK-GI-FP16-NEXT:    mov v6.d[1], v17.d[0]
+; CHECK-GI-FP16-NEXT:    mov h17, v1.h[4]
+; CHECK-GI-FP16-NEXT:    mov h27, v2.h[6]
+; CHECK-GI-FP16-NEXT:    mov v5.d[1], v18.d[0]
+; CHECK-GI-FP16-NEXT:    mov v0.d[1], v19.d[0]
+; CHECK-GI-FP16-NEXT:    fcvt d4, h20
+; CHECK-GI-FP16-NEXT:    mov h18, v1.h[5]
+; CHECK-GI-FP16-NEXT:    mov h19, v1.h[6]
+; CHECK-GI-FP16-NEXT:    mov h20, v1.h[7]
+; CHECK-GI-FP16-NEXT:    fcvt d21, h21
+; CHECK-GI-FP16-NEXT:    mov h28, v2.h[7]
+; CHECK-GI-FP16-NEXT:    fcvt d22, h22
+; CHECK-GI-FP16-NEXT:    fcvt d1, h17
+; CHECK-GI-FP16-NEXT:    fcvt d17, h23
+; CHECK-GI-FP16-NEXT:    fcvt d23, h24
+; CHECK-GI-FP16-NEXT:    fcvtzu v16.2d, v16.2d
+; CHECK-GI-FP16-NEXT:    fcvtzu v7.2d, v7.2d
+; CHECK-GI-FP16-NEXT:    fcvtzu v6.2d, v6.2d
+; CHECK-GI-FP16-NEXT:    fcvt d29, h18
+; CHECK-GI-FP16-NEXT:    fcvt d19, h19
+; CHECK-GI-FP16-NEXT:    fcvt d30, h20
+; CHECK-GI-FP16-NEXT:    fcvt d20, h2
+; CHECK-GI-FP16-NEXT:    fcvtzu v5.2d, v5.2d
+; CHECK-GI-FP16-NEXT:    fcvt d18, h25
+; CHECK-GI-FP16-NEXT:    fcvt d24, h26
+; CHECK-GI-FP16-NEXT:    fcvt d2, h27
+; CHECK-GI-FP16-NEXT:    fcvt d25, h28
+; CHECK-GI-FP16-NEXT:    stp q16, q7, [x8]
+; CHECK-GI-FP16-NEXT:    mov v4.d[1], v21.d[0]
+; CHECK-GI-FP16-NEXT:    mov v17.d[1], v23.d[0]
+; CHECK-GI-FP16-NEXT:    mov v1.d[1], v29.d[0]
+; CHECK-GI-FP16-NEXT:    mov v19.d[1], v30.d[0]
+; CHECK-GI-FP16-NEXT:    mov h21, v3.h[1]
+; CHECK-GI-FP16-NEXT:    stp q6, q5, [x8, #32]
+; CHECK-GI-FP16-NEXT:    mov v20.d[1], v22.d[0]
+; CHECK-GI-FP16-NEXT:    mov h16, v3.h[2]
+; CHECK-GI-FP16-NEXT:    mov h7, v3.h[3]
+; CHECK-GI-FP16-NEXT:    mov h22, v3.h[4]
+; CHECK-GI-FP16-NEXT:    mov h23, v3.h[5]
+; CHECK-GI-FP16-NEXT:    mov h6, v3.h[6]
+; CHECK-GI-FP16-NEXT:    mov h5, v3.h[7]
+; CHECK-GI-FP16-NEXT:    mov v18.d[1], v24.d[0]
+; CHECK-GI-FP16-NEXT:    mov v2.d[1], v25.d[0]
+; CHECK-GI-FP16-NEXT:    fcvt d3, h3
+; CHECK-GI-FP16-NEXT:    fcvt d21, h21
+; CHECK-GI-FP16-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-GI-FP16-NEXT:    fcvt d16, h16
+; CHECK-GI-FP16-NEXT:    fcvtzu v4.2d, v4.2d
+; CHECK-GI-FP16-NEXT:    fcvt d7, h7
+; CHECK-GI-FP16-NEXT:    fcvt d22, h22
+; CHECK-GI-FP16-NEXT:    fcvt d23, h23
+; CHECK-GI-FP16-NEXT:    fcvtzu v1.2d, v1.2d
+; CHECK-GI-FP16-NEXT:    fcvt d6, h6
+; CHECK-GI-FP16-NEXT:    fcvt d5, h5
+; CHECK-GI-FP16-NEXT:    fcvtzu v19.2d, v19.2d
+; CHECK-GI-FP16-NEXT:    mov v3.d[1], v21.d[0]
+; CHECK-GI-FP16-NEXT:    fcvtzu v20.2d, v20.2d
+; CHECK-GI-FP16-NEXT:    stp q0, q4, [x8, #64]
+; CHECK-GI-FP16-NEXT:    fcvtzu v0.2d, v17.2d
+; CHECK-GI-FP16-NEXT:    fcvtzu v4.2d, v18.2d
+; CHECK-GI-FP16-NEXT:    mov v16.d[1], v7.d[0]
+; CHECK-GI-FP16-NEXT:    mov v22.d[1], v23.d[0]
+; CHECK-GI-FP16-NEXT:    mov v6.d[1], v5.d[0]
+; CHECK-GI-FP16-NEXT:    stp q1, q19, [x8, #96]
+; CHECK-GI-FP16-NEXT:    fcvtzu v1.2d, v2.2d
+; CHECK-GI-FP16-NEXT:    fcvtzu v2.2d, v3.2d
+; CHECK-GI-FP16-NEXT:    stp q20, q0, [x8, #128]
+; CHECK-GI-FP16-NEXT:    fcvtzu v0.2d, v16.2d
+; CHECK-GI-FP16-NEXT:    fcvtzu v3.2d, v22.2d
+; CHECK-GI-FP16-NEXT:    stp q4, q1, [x8, #160]
+; CHECK-GI-FP16-NEXT:    fcvtzu v1.2d, v6.2d
+; CHECK-GI-FP16-NEXT:    stp q2, q0, [x8, #192]
+; CHECK-GI-FP16-NEXT:    stp q3, q1, [x8, #224]
 ; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = fptoui <32 x half> %a to <32 x i64>
@@ -4404,24 +4988,48 @@ entry:
 }
 
 define <2 x i32> @fptos_v2f16_v2i32(<2 x half> %a) {
-; CHECK-LABEL: fptos_v2f16_v2i32:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptos_v2f16_v2i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-SD-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptos_v2f16_v2i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fmov x8, d0
+; CHECK-GI-NEXT:    fmov s0, w8
+; CHECK-GI-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-GI-NEXT:    mov v0.h[2], v0.h[0]
+; CHECK-GI-NEXT:    mov v0.h[3], v0.h[0]
+; CHECK-GI-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-GI-NEXT:    fcvtzs v0.2s, v0.2s
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptosi <2 x half> %a to <2 x i32>
   ret <2 x i32> %c
 }
 
 define <2 x i32> @fptou_v2f16_v2i32(<2 x half> %a) {
-; CHECK-LABEL: fptou_v2f16_v2i32:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-NEXT:    fcvtzu v0.4s, v0.4s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptou_v2f16_v2i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-SD-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptou_v2f16_v2i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fmov x8, d0
+; CHECK-GI-NEXT:    fmov s0, w8
+; CHECK-GI-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-GI-NEXT:    mov v0.h[2], v0.h[0]
+; CHECK-GI-NEXT:    mov v0.h[3], v0.h[0]
+; CHECK-GI-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-GI-NEXT:    fcvtzu v0.2s, v0.2s
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptoui <2 x half> %a to <2 x i32>
   ret <2 x i32> %c
@@ -4472,134 +5080,252 @@ entry:
 }
 
 define <8 x i32> @fptos_v8f16_v8i32(<8 x half> %a) {
-; CHECK-LABEL: fptos_v8f16_v8i32:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtl2 v1.4s, v0.8h
-; CHECK-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-NEXT:    fcvtzs v1.4s, v1.4s
-; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptos_v8f16_v8i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtl2 v1.4s, v0.8h
+; CHECK-SD-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-SD-NEXT:    fcvtzs v1.4s, v1.4s
+; CHECK-SD-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptos_v8f16_v8i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtl v1.4s, v0.4h
+; CHECK-GI-NEXT:    fcvtl2 v2.4s, v0.8h
+; CHECK-GI-NEXT:    fcvtzs v0.4s, v1.4s
+; CHECK-GI-NEXT:    fcvtzs v1.4s, v2.4s
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptosi <8 x half> %a to <8 x i32>
   ret <8 x i32> %c
 }
 
 define <8 x i32> @fptou_v8f16_v8i32(<8 x half> %a) {
-; CHECK-LABEL: fptou_v8f16_v8i32:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtl2 v1.4s, v0.8h
-; CHECK-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-NEXT:    fcvtzu v1.4s, v1.4s
-; CHECK-NEXT:    fcvtzu v0.4s, v0.4s
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptou_v8f16_v8i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtl2 v1.4s, v0.8h
+; CHECK-SD-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-SD-NEXT:    fcvtzu v1.4s, v1.4s
+; CHECK-SD-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptou_v8f16_v8i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtl v1.4s, v0.4h
+; CHECK-GI-NEXT:    fcvtl2 v2.4s, v0.8h
+; CHECK-GI-NEXT:    fcvtzu v0.4s, v1.4s
+; CHECK-GI-NEXT:    fcvtzu v1.4s, v2.4s
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptoui <8 x half> %a to <8 x i32>
   ret <8 x i32> %c
 }
 
 define <16 x i32> @fptos_v16f16_v16i32(<16 x half> %a) {
-; CHECK-LABEL: fptos_v16f16_v16i32:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtl v2.4s, v0.4h
-; CHECK-NEXT:    fcvtl2 v3.4s, v0.8h
-; CHECK-NEXT:    fcvtl2 v4.4s, v1.8h
-; CHECK-NEXT:    fcvtl v5.4s, v1.4h
-; CHECK-NEXT:    fcvtzs v0.4s, v2.4s
-; CHECK-NEXT:    fcvtzs v1.4s, v3.4s
-; CHECK-NEXT:    fcvtzs v3.4s, v4.4s
-; CHECK-NEXT:    fcvtzs v2.4s, v5.4s
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptos_v16f16_v16i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtl v2.4s, v0.4h
+; CHECK-SD-NEXT:    fcvtl2 v3.4s, v0.8h
+; CHECK-SD-NEXT:    fcvtl2 v4.4s, v1.8h
+; CHECK-SD-NEXT:    fcvtl v5.4s, v1.4h
+; CHECK-SD-NEXT:    fcvtzs v0.4s, v2.4s
+; CHECK-SD-NEXT:    fcvtzs v1.4s, v3.4s
+; CHECK-SD-NEXT:    fcvtzs v3.4s, v4.4s
+; CHECK-SD-NEXT:    fcvtzs v2.4s, v5.4s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptos_v16f16_v16i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtl v2.4s, v0.4h
+; CHECK-GI-NEXT:    fcvtl2 v3.4s, v0.8h
+; CHECK-GI-NEXT:    fcvtl v4.4s, v1.4h
+; CHECK-GI-NEXT:    fcvtl2 v5.4s, v1.8h
+; CHECK-GI-NEXT:    fcvtzs v0.4s, v2.4s
+; CHECK-GI-NEXT:    fcvtzs v1.4s, v3.4s
+; CHECK-GI-NEXT:    fcvtzs v2.4s, v4.4s
+; CHECK-GI-NEXT:    fcvtzs v3.4s, v5.4s
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptosi <16 x half> %a to <16 x i32>
   ret <16 x i32> %c
 }
 
 define <16 x i32> @fptou_v16f16_v16i32(<16 x half> %a) {
-; CHECK-LABEL: fptou_v16f16_v16i32:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtl v2.4s, v0.4h
-; CHECK-NEXT:    fcvtl2 v3.4s, v0.8h
-; CHECK-NEXT:    fcvtl2 v4.4s, v1.8h
-; CHECK-NEXT:    fcvtl v5.4s, v1.4h
-; CHECK-NEXT:    fcvtzu v0.4s, v2.4s
-; CHECK-NEXT:    fcvtzu v1.4s, v3.4s
-; CHECK-NEXT:    fcvtzu v3.4s, v4.4s
-; CHECK-NEXT:    fcvtzu v2.4s, v5.4s
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptou_v16f16_v16i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtl v2.4s, v0.4h
+; CHECK-SD-NEXT:    fcvtl2 v3.4s, v0.8h
+; CHECK-SD-NEXT:    fcvtl2 v4.4s, v1.8h
+; CHECK-SD-NEXT:    fcvtl v5.4s, v1.4h
+; CHECK-SD-NEXT:    fcvtzu v0.4s, v2.4s
+; CHECK-SD-NEXT:    fcvtzu v1.4s, v3.4s
+; CHECK-SD-NEXT:    fcvtzu v3.4s, v4.4s
+; CHECK-SD-NEXT:    fcvtzu v2.4s, v5.4s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptou_v16f16_v16i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtl v2.4s, v0.4h
+; CHECK-GI-NEXT:    fcvtl2 v3.4s, v0.8h
+; CHECK-GI-NEXT:    fcvtl v4.4s, v1.4h
+; CHECK-GI-NEXT:    fcvtl2 v5.4s, v1.8h
+; CHECK-GI-NEXT:    fcvtzu v0.4s, v2.4s
+; CHECK-GI-NEXT:    fcvtzu v1.4s, v3.4s
+; CHECK-GI-NEXT:    fcvtzu v2.4s, v4.4s
+; CHECK-GI-NEXT:    fcvtzu v3.4s, v5.4s
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptoui <16 x half> %a to <16 x i32>
   ret <16 x i32> %c
 }
 
 define <32 x i32> @fptos_v32f16_v32i32(<32 x half> %a) {
-; CHECK-LABEL: fptos_v32f16_v32i32:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtl2 v4.4s, v0.8h
-; CHECK-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-NEXT:    fcvtl2 v5.4s, v1.8h
-; CHECK-NEXT:    fcvtl v6.4s, v1.4h
-; CHECK-NEXT:    fcvtl v7.4s, v2.4h
-; CHECK-NEXT:    fcvtl2 v16.4s, v2.8h
-; CHECK-NEXT:    fcvtl2 v17.4s, v3.8h
-; CHECK-NEXT:    fcvtl v18.4s, v3.4h
-; CHECK-NEXT:    fcvtzs v1.4s, v4.4s
-; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-NEXT:    fcvtzs v3.4s, v5.4s
-; CHECK-NEXT:    fcvtzs v2.4s, v6.4s
-; CHECK-NEXT:    fcvtzs v4.4s, v7.4s
-; CHECK-NEXT:    fcvtzs v5.4s, v16.4s
-; CHECK-NEXT:    fcvtzs v7.4s, v17.4s
-; CHECK-NEXT:    fcvtzs v6.4s, v18.4s
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptos_v32f16_v32i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtl2 v4.4s, v0.8h
+; CHECK-SD-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-SD-NEXT:    fcvtl2 v5.4s, v1.8h
+; CHECK-SD-NEXT:    fcvtl v6.4s, v1.4h
+; CHECK-SD-NEXT:    fcvtl v7.4s, v2.4h
+; CHECK-SD-NEXT:    fcvtl2 v16.4s, v2.8h
+; CHECK-SD-NEXT:    fcvtl2 v17.4s, v3.8h
+; CHECK-SD-NEXT:    fcvtl v18.4s, v3.4h
+; CHECK-SD-NEXT:    fcvtzs v1.4s, v4.4s
+; CHECK-SD-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-SD-NEXT:    fcvtzs v3.4s, v5.4s
+; CHECK-SD-NEXT:    fcvtzs v2.4s, v6.4s
+; CHECK-SD-NEXT:    fcvtzs v4.4s, v7.4s
+; CHECK-SD-NEXT:    fcvtzs v5.4s, v16.4s
+; CHECK-SD-NEXT:    fcvtzs v7.4s, v17.4s
+; CHECK-SD-NEXT:    fcvtzs v6.4s, v18.4s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptos_v32f16_v32i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtl v4.4s, v0.4h
+; CHECK-GI-NEXT:    fcvtl2 v5.4s, v0.8h
+; CHECK-GI-NEXT:    fcvtl v6.4s, v1.4h
+; CHECK-GI-NEXT:    fcvtl2 v7.4s, v1.8h
+; CHECK-GI-NEXT:    fcvtl v16.4s, v2.4h
+; CHECK-GI-NEXT:    fcvtl2 v17.4s, v2.8h
+; CHECK-GI-NEXT:    fcvtl v18.4s, v3.4h
+; CHECK-GI-NEXT:    fcvtl2 v19.4s, v3.8h
+; CHECK-GI-NEXT:    fcvtzs v0.4s, v4.4s
+; CHECK-GI-NEXT:    fcvtzs v1.4s, v5.4s
+; CHECK-GI-NEXT:    fcvtzs v2.4s, v6.4s
+; CHECK-GI-NEXT:    fcvtzs v3.4s, v7.4s
+; CHECK-GI-NEXT:    fcvtzs v4.4s, v16.4s
+; CHECK-GI-NEXT:    fcvtzs v5.4s, v17.4s
+; CHECK-GI-NEXT:    fcvtzs v6.4s, v18.4s
+; CHECK-GI-NEXT:    fcvtzs v7.4s, v19.4s
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptosi <32 x half> %a to <32 x i32>
   ret <32 x i32> %c
 }
 
 define <32 x i32> @fptou_v32f16_v32i32(<32 x half> %a) {
-; CHECK-LABEL: fptou_v32f16_v32i32:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtl2 v4.4s, v0.8h
-; CHECK-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-NEXT:    fcvtl2 v5.4s, v1.8h
-; CHECK-NEXT:    fcvtl v6.4s, v1.4h
-; CHECK-NEXT:    fcvtl v7.4s, v2.4h
-; CHECK-NEXT:    fcvtl2 v16.4s, v2.8h
-; CHECK-NEXT:    fcvtl2 v17.4s, v3.8h
-; CHECK-NEXT:    fcvtl v18.4s, v3.4h
-; CHECK-NEXT:    fcvtzu v1.4s, v4.4s
-; CHECK-NEXT:    fcvtzu v0.4s, v0.4s
-; CHECK-NEXT:    fcvtzu v3.4s, v5.4s
-; CHECK-NEXT:    fcvtzu v2.4s, v6.4s
-; CHECK-NEXT:    fcvtzu v4.4s, v7.4s
-; CHECK-NEXT:    fcvtzu v5.4s, v16.4s
-; CHECK-NEXT:    fcvtzu v7.4s, v17.4s
-; CHECK-NEXT:    fcvtzu v6.4s, v18.4s
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptou_v32f16_v32i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtl2 v4.4s, v0.8h
+; CHECK-SD-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-SD-NEXT:    fcvtl2 v5.4s, v1.8h
+; CHECK-SD-NEXT:    fcvtl v6.4s, v1.4h
+; CHECK-SD-NEXT:    fcvtl v7.4s, v2.4h
+; CHECK-SD-NEXT:    fcvtl2 v16.4s, v2.8h
+; CHECK-SD-NEXT:    fcvtl2 v17.4s, v3.8h
+; CHECK-SD-NEXT:    fcvtl v18.4s, v3.4h
+; CHECK-SD-NEXT:    fcvtzu v1.4s, v4.4s
+; CHECK-SD-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-SD-NEXT:    fcvtzu v3.4s, v5.4s
+; CHECK-SD-NEXT:    fcvtzu v2.4s, v6.4s
+; CHECK-SD-NEXT:    fcvtzu v4.4s, v7.4s
+; CHECK-SD-NEXT:    fcvtzu v5.4s, v16.4s
+; CHECK-SD-NEXT:    fcvtzu v7.4s, v17.4s
+; CHECK-SD-NEXT:    fcvtzu v6.4s, v18.4s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptou_v32f16_v32i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fcvtl v4.4s, v0.4h
+; CHECK-GI-NEXT:    fcvtl2 v5.4s, v0.8h
+; CHECK-GI-NEXT:    fcvtl v6.4s, v1.4h
+; CHECK-GI-NEXT:    fcvtl2 v7.4s, v1.8h
+; CHECK-GI-NEXT:    fcvtl v16.4s, v2.4h
+; CHECK-GI-NEXT:    fcvtl2 v17.4s, v2.8h
+; CHECK-GI-NEXT:    fcvtl v18.4s, v3.4h
+; CHECK-GI-NEXT:    fcvtl2 v19.4s, v3.8h
+; CHECK-GI-NEXT:    fcvtzu v0.4s, v4.4s
+; CHECK-GI-NEXT:    fcvtzu v1.4s, v5.4s
+; CHECK-GI-NEXT:    fcvtzu v2.4s, v6.4s
+; CHECK-GI-NEXT:    fcvtzu v3.4s, v7.4s
+; CHECK-GI-NEXT:    fcvtzu v4.4s, v16.4s
+; CHECK-GI-NEXT:    fcvtzu v5.4s, v17.4s
+; CHECK-GI-NEXT:    fcvtzu v6.4s, v18.4s
+; CHECK-GI-NEXT:    fcvtzu v7.4s, v19.4s
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptoui <32 x half> %a to <32 x i32>
   ret <32 x i32> %c
 }
 
 define <2 x i16> @fptos_v2f16_v2i16(<2 x half> %a) {
-; CHECK-LABEL: fptos_v2f16_v2i16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptos_v2f16_v2i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-SD-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-NOFP16-LABEL: fptos_v2f16_v2i16:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    fmov x8, d0
+; CHECK-GI-NOFP16-NEXT:    fmov s0, w8
+; CHECK-GI-NOFP16-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[2], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[3], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtzs v0.2s, v0.2s
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: fptos_v2f16_v2i16:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-GI-FP16-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = fptosi <2 x half> %a to <2 x i16>
   ret <2 x i16> %c
 }
 
 define <2 x i16> @fptou_v2f16_v2i16(<2 x half> %a) {
-; CHECK-LABEL: fptou_v2f16_v2i16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptou_v2f16_v2i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-SD-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-NOFP16-LABEL: fptou_v2f16_v2i16:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    fmov x8, d0
+; CHECK-GI-NOFP16-NEXT:    fmov s0, w8
+; CHECK-GI-NOFP16-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[2], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[3], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtzu v0.2s, v0.2s
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: fptou_v2f16_v2i16:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-GI-FP16-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = fptoui <2 x half> %a to <2 x i16>
   ret <2 x i16> %c
@@ -4622,7 +5348,12 @@ define <3 x i16> @fptos_v3f16_v3i16(<3 x half> %a) {
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
 ; CHECK-GI-NOFP16-NEXT:    fcvtl v0.4s, v0.4h
 ; CHECK-GI-NOFP16-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-GI-NOFP16-NEXT:    xtn v0.4h, v0.4s
+; CHECK-GI-NOFP16-NEXT:    mov s1, v0.s[1]
+; CHECK-GI-NOFP16-NEXT:    mov s2, v0.s[2]
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[2], v2.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[3], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fptos_v3f16_v3i16:
@@ -4651,7 +5382,12 @@ define <3 x i16> @fptou_v3f16_v3i16(<3 x half> %a) {
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
 ; CHECK-GI-NOFP16-NEXT:    fcvtl v0.4s, v0.4h
 ; CHECK-GI-NOFP16-NEXT:    fcvtzu v0.4s, v0.4s
-; CHECK-GI-NOFP16-NEXT:    xtn v0.4h, v0.4s
+; CHECK-GI-NOFP16-NEXT:    mov s1, v0.s[1]
+; CHECK-GI-NOFP16-NEXT:    mov s2, v0.s[2]
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[2], v2.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[3], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fptou_v3f16_v3i16:
@@ -4738,11 +5474,11 @@ define <8 x i16> @fptos_v8f16_v8i16(<8 x half> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fptos_v8f16_v8i16:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    fcvtl2 v1.4s, v0.8h
-; CHECK-GI-NOFP16-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v1.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v0.4s, v0.8h
 ; CHECK-GI-NOFP16-NEXT:    fcvtzs v1.4s, v1.4s
 ; CHECK-GI-NOFP16-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-GI-NOFP16-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NOFP16-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fptos_v8f16_v8i16:
@@ -4771,11 +5507,11 @@ define <8 x i16> @fptou_v8f16_v8i16(<8 x half> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fptou_v8f16_v8i16:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    fcvtl2 v1.4s, v0.8h
-; CHECK-GI-NOFP16-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v1.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v0.4s, v0.8h
 ; CHECK-GI-NOFP16-NEXT:    fcvtzu v1.4s, v1.4s
 ; CHECK-GI-NOFP16-NEXT:    fcvtzu v0.4s, v0.4s
-; CHECK-GI-NOFP16-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NOFP16-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fptou_v8f16_v8i16:
@@ -4810,16 +5546,16 @@ define <16 x i16> @fptos_v16f16_v16i16(<16 x half> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fptos_v16f16_v16i16:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    fcvtl2 v2.4s, v0.8h
-; CHECK-GI-NOFP16-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-GI-NOFP16-NEXT:    fcvtl2 v3.4s, v1.8h
-; CHECK-GI-NOFP16-NEXT:    fcvtl v1.4s, v1.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v2.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v0.4s, v0.8h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v3.4s, v1.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v1.4s, v1.8h
 ; CHECK-GI-NOFP16-NEXT:    fcvtzs v2.4s, v2.4s
 ; CHECK-GI-NOFP16-NEXT:    fcvtzs v0.4s, v0.4s
 ; CHECK-GI-NOFP16-NEXT:    fcvtzs v3.4s, v3.4s
 ; CHECK-GI-NOFP16-NEXT:    fcvtzs v1.4s, v1.4s
-; CHECK-GI-NOFP16-NEXT:    uzp1 v0.8h, v0.8h, v2.8h
-; CHECK-GI-NOFP16-NEXT:    uzp1 v1.8h, v1.8h, v3.8h
+; CHECK-GI-NOFP16-NEXT:    uzp1 v0.8h, v2.8h, v0.8h
+; CHECK-GI-NOFP16-NEXT:    uzp1 v1.8h, v3.8h, v1.8h
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fptos_v16f16_v16i16:
@@ -4855,16 +5591,16 @@ define <16 x i16> @fptou_v16f16_v16i16(<16 x half> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fptou_v16f16_v16i16:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    fcvtl2 v2.4s, v0.8h
-; CHECK-GI-NOFP16-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-GI-NOFP16-NEXT:    fcvtl2 v3.4s, v1.8h
-; CHECK-GI-NOFP16-NEXT:    fcvtl v1.4s, v1.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v2.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v0.4s, v0.8h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v3.4s, v1.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v1.4s, v1.8h
 ; CHECK-GI-NOFP16-NEXT:    fcvtzu v2.4s, v2.4s
 ; CHECK-GI-NOFP16-NEXT:    fcvtzu v0.4s, v0.4s
 ; CHECK-GI-NOFP16-NEXT:    fcvtzu v3.4s, v3.4s
 ; CHECK-GI-NOFP16-NEXT:    fcvtzu v1.4s, v1.4s
-; CHECK-GI-NOFP16-NEXT:    uzp1 v0.8h, v0.8h, v2.8h
-; CHECK-GI-NOFP16-NEXT:    uzp1 v1.8h, v1.8h, v3.8h
+; CHECK-GI-NOFP16-NEXT:    uzp1 v0.8h, v2.8h, v0.8h
+; CHECK-GI-NOFP16-NEXT:    uzp1 v1.8h, v3.8h, v1.8h
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fptou_v16f16_v16i16:
@@ -4912,14 +5648,14 @@ define <32 x i16> @fptos_v32f16_v32i16(<32 x half> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fptos_v32f16_v32i16:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    fcvtl2 v4.4s, v0.8h
-; CHECK-GI-NOFP16-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-GI-NOFP16-NEXT:    fcvtl2 v5.4s, v1.8h
-; CHECK-GI-NOFP16-NEXT:    fcvtl v1.4s, v1.4h
-; CHECK-GI-NOFP16-NEXT:    fcvtl2 v6.4s, v2.8h
-; CHECK-GI-NOFP16-NEXT:    fcvtl v2.4s, v2.4h
-; CHECK-GI-NOFP16-NEXT:    fcvtl2 v7.4s, v3.8h
-; CHECK-GI-NOFP16-NEXT:    fcvtl v3.4s, v3.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v4.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v0.4s, v0.8h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v5.4s, v1.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v1.4s, v1.8h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v6.4s, v2.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v2.4s, v2.8h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v7.4s, v3.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v3.4s, v3.8h
 ; CHECK-GI-NOFP16-NEXT:    fcvtzs v4.4s, v4.4s
 ; CHECK-GI-NOFP16-NEXT:    fcvtzs v0.4s, v0.4s
 ; CHECK-GI-NOFP16-NEXT:    fcvtzs v5.4s, v5.4s
@@ -4928,10 +5664,10 @@ define <32 x i16> @fptos_v32f16_v32i16(<32 x half> %a) {
 ; CHECK-GI-NOFP16-NEXT:    fcvtzs v2.4s, v2.4s
 ; CHECK-GI-NOFP16-NEXT:    fcvtzs v7.4s, v7.4s
 ; CHECK-GI-NOFP16-NEXT:    fcvtzs v3.4s, v3.4s
-; CHECK-GI-NOFP16-NEXT:    uzp1 v0.8h, v0.8h, v4.8h
-; CHECK-GI-NOFP16-NEXT:    uzp1 v1.8h, v1.8h, v5.8h
-; CHECK-GI-NOFP16-NEXT:    uzp1 v2.8h, v2.8h, v6.8h
-; CHECK-GI-NOFP16-NEXT:    uzp1 v3.8h, v3.8h, v7.8h
+; CHECK-GI-NOFP16-NEXT:    uzp1 v0.8h, v4.8h, v0.8h
+; CHECK-GI-NOFP16-NEXT:    uzp1 v1.8h, v5.8h, v1.8h
+; CHECK-GI-NOFP16-NEXT:    uzp1 v2.8h, v6.8h, v2.8h
+; CHECK-GI-NOFP16-NEXT:    uzp1 v3.8h, v7.8h, v3.8h
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fptos_v32f16_v32i16:
@@ -4981,14 +5717,14 @@ define <32 x i16> @fptou_v32f16_v32i16(<32 x half> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fptou_v32f16_v32i16:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    fcvtl2 v4.4s, v0.8h
-; CHECK-GI-NOFP16-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-GI-NOFP16-NEXT:    fcvtl2 v5.4s, v1.8h
-; CHECK-GI-NOFP16-NEXT:    fcvtl v1.4s, v1.4h
-; CHECK-GI-NOFP16-NEXT:    fcvtl2 v6.4s, v2.8h
-; CHECK-GI-NOFP16-NEXT:    fcvtl v2.4s, v2.4h
-; CHECK-GI-NOFP16-NEXT:    fcvtl2 v7.4s, v3.8h
-; CHECK-GI-NOFP16-NEXT:    fcvtl v3.4s, v3.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v4.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v0.4s, v0.8h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v5.4s, v1.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v1.4s, v1.8h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v6.4s, v2.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v2.4s, v2.8h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v7.4s, v3.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v3.4s, v3.8h
 ; CHECK-GI-NOFP16-NEXT:    fcvtzu v4.4s, v4.4s
 ; CHECK-GI-NOFP16-NEXT:    fcvtzu v0.4s, v0.4s
 ; CHECK-GI-NOFP16-NEXT:    fcvtzu v5.4s, v5.4s
@@ -4997,10 +5733,10 @@ define <32 x i16> @fptou_v32f16_v32i16(<32 x half> %a) {
 ; CHECK-GI-NOFP16-NEXT:    fcvtzu v2.4s, v2.4s
 ; CHECK-GI-NOFP16-NEXT:    fcvtzu v7.4s, v7.4s
 ; CHECK-GI-NOFP16-NEXT:    fcvtzu v3.4s, v3.4s
-; CHECK-GI-NOFP16-NEXT:    uzp1 v0.8h, v0.8h, v4.8h
-; CHECK-GI-NOFP16-NEXT:    uzp1 v1.8h, v1.8h, v5.8h
-; CHECK-GI-NOFP16-NEXT:    uzp1 v2.8h, v2.8h, v6.8h
-; CHECK-GI-NOFP16-NEXT:    uzp1 v3.8h, v3.8h, v7.8h
+; CHECK-GI-NOFP16-NEXT:    uzp1 v0.8h, v4.8h, v0.8h
+; CHECK-GI-NOFP16-NEXT:    uzp1 v1.8h, v5.8h, v1.8h
+; CHECK-GI-NOFP16-NEXT:    uzp1 v2.8h, v6.8h, v2.8h
+; CHECK-GI-NOFP16-NEXT:    uzp1 v3.8h, v7.8h, v3.8h
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fptou_v32f16_v32i16:
@@ -5016,24 +5752,62 @@ entry:
 }
 
 define <2 x i8> @fptos_v2f16_v2i8(<2 x half> %a) {
-; CHECK-LABEL: fptos_v2f16_v2i8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptos_v2f16_v2i8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-SD-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-NOFP16-LABEL: fptos_v2f16_v2i8:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    fmov x8, d0
+; CHECK-GI-NOFP16-NEXT:    fmov s0, w8
+; CHECK-GI-NOFP16-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[2], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[3], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtzs v0.2s, v0.2s
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: fptos_v2f16_v2i8:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-GI-FP16-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = fptosi <2 x half> %a to <2 x i8>
   ret <2 x i8> %c
 }
 
 define <2 x i8> @fptou_v2f16_v2i8(<2 x half> %a) {
-; CHECK-LABEL: fptou_v2f16_v2i8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptou_v2f16_v2i8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-SD-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-NOFP16-LABEL: fptou_v2f16_v2i8:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    fmov x8, d0
+; CHECK-GI-NOFP16-NEXT:    fmov s0, w8
+; CHECK-GI-NOFP16-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[2], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[3], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtzu v0.2s, v0.2s
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: fptou_v2f16_v2i8:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-GI-FP16-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = fptoui <2 x half> %a to <2 x i8>
   ret <2 x i8> %c
@@ -5062,18 +5836,21 @@ define <3 x i8> @fptos_v3f16_v3i8(<3 x half> %a) {
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
 ; CHECK-GI-NOFP16-NEXT:    fcvtl v0.4s, v0.4h
 ; CHECK-GI-NOFP16-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-GI-NOFP16-NEXT:    xtn v0.4h, v0.4s
-; CHECK-GI-NOFP16-NEXT:    umov w0, v0.h[0]
-; CHECK-GI-NOFP16-NEXT:    umov w1, v0.h[1]
-; CHECK-GI-NOFP16-NEXT:    umov w2, v0.h[2]
+; CHECK-GI-NOFP16-NEXT:    mov s1, v0.s[1]
+; CHECK-GI-NOFP16-NEXT:    mov s2, v0.s[2]
+; CHECK-GI-NOFP16-NEXT:    fmov w0, s0
+; CHECK-GI-NOFP16-NEXT:    fmov w1, s1
+; CHECK-GI-NOFP16-NEXT:    fmov w2, s2
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fptos_v3f16_v3i8:
 ; CHECK-GI-FP16:       // %bb.0: // %entry
 ; CHECK-GI-FP16-NEXT:    fcvtzs v0.4h, v0.4h
-; CHECK-GI-FP16-NEXT:    umov w0, v0.h[0]
-; CHECK-GI-FP16-NEXT:    umov w1, v0.h[1]
-; CHECK-GI-FP16-NEXT:    umov w2, v0.h[2]
+; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
+; CHECK-GI-FP16-NEXT:    fmov w0, s0
+; CHECK-GI-FP16-NEXT:    fmov w1, s1
+; CHECK-GI-FP16-NEXT:    fmov w2, s2
 ; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = fptosi <3 x half> %a to <3 x i8>
@@ -5102,19 +5879,22 @@ define <3 x i8> @fptou_v3f16_v3i8(<3 x half> %a) {
 ; CHECK-GI-NOFP16-LABEL: fptou_v3f16_v3i8:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
 ; CHECK-GI-NOFP16-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-GI-NOFP16-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-GI-NOFP16-NEXT:    xtn v0.4h, v0.4s
-; CHECK-GI-NOFP16-NEXT:    umov w0, v0.h[0]
-; CHECK-GI-NOFP16-NEXT:    umov w1, v0.h[1]
-; CHECK-GI-NOFP16-NEXT:    umov w2, v0.h[2]
+; CHECK-GI-NOFP16-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-GI-NOFP16-NEXT:    mov s1, v0.s[1]
+; CHECK-GI-NOFP16-NEXT:    mov s2, v0.s[2]
+; CHECK-GI-NOFP16-NEXT:    fmov w0, s0
+; CHECK-GI-NOFP16-NEXT:    fmov w1, s1
+; CHECK-GI-NOFP16-NEXT:    fmov w2, s2
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fptou_v3f16_v3i8:
 ; CHECK-GI-FP16:       // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT:    fcvtzs v0.4h, v0.4h
-; CHECK-GI-FP16-NEXT:    umov w0, v0.h[0]
-; CHECK-GI-FP16-NEXT:    umov w1, v0.h[1]
-; CHECK-GI-FP16-NEXT:    umov w2, v0.h[2]
+; CHECK-GI-FP16-NEXT:    fcvtzu v0.4h, v0.4h
+; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
+; CHECK-GI-FP16-NEXT:    fmov w0, s0
+; CHECK-GI-FP16-NEXT:    fmov w1, s1
+; CHECK-GI-FP16-NEXT:    fmov w2, s2
 ; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = fptoui <3 x half> %a to <3 x i8>
@@ -5166,13 +5946,13 @@ define <4 x i8> @fptou_v4f16_v4i8(<4 x half> %a) {
 ; CHECK-GI-NOFP16-LABEL: fptou_v4f16_v4i8:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
 ; CHECK-GI-NOFP16-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-GI-NOFP16-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtzu v0.4s, v0.4s
 ; CHECK-GI-NOFP16-NEXT:    xtn v0.4h, v0.4s
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fptou_v4f16_v4i8:
 ; CHECK-GI-FP16:       // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT:    fcvtzs v0.4h, v0.4h
+; CHECK-GI-FP16-NEXT:    fcvtzu v0.4h, v0.4h
 ; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = fptoui <4 x half> %a to <4 x i8>
@@ -5198,11 +5978,11 @@ define <8 x i8> @fptos_v8f16_v8i8(<8 x half> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fptos_v8f16_v8i8:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    fcvtl2 v1.4s, v0.8h
-; CHECK-GI-NOFP16-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v1.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v0.4s, v0.8h
 ; CHECK-GI-NOFP16-NEXT:    fcvtzs v1.4s, v1.4s
 ; CHECK-GI-NOFP16-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-GI-NOFP16-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NOFP16-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
 ; CHECK-GI-NOFP16-NEXT:    xtn v0.8b, v0.8h
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
@@ -5235,11 +6015,11 @@ define <8 x i8> @fptou_v8f16_v8i8(<8 x half> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fptou_v8f16_v8i8:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    fcvtl2 v1.4s, v0.8h
-; CHECK-GI-NOFP16-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v1.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v0.4s, v0.8h
 ; CHECK-GI-NOFP16-NEXT:    fcvtzu v1.4s, v1.4s
 ; CHECK-GI-NOFP16-NEXT:    fcvtzu v0.4s, v0.4s
-; CHECK-GI-NOFP16-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NOFP16-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
 ; CHECK-GI-NOFP16-NEXT:    xtn v0.8b, v0.8h
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
@@ -5278,23 +6058,23 @@ define <16 x i8> @fptos_v16f16_v16i8(<16 x half> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fptos_v16f16_v16i8:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    fcvtl2 v2.4s, v1.8h
-; CHECK-GI-NOFP16-NEXT:    fcvtl v1.4s, v1.4h
-; CHECK-GI-NOFP16-NEXT:    fcvtl2 v3.4s, v0.8h
-; CHECK-GI-NOFP16-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v2.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v0.4s, v0.8h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v3.4s, v1.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v1.4s, v1.8h
 ; CHECK-GI-NOFP16-NEXT:    fcvtzs v2.4s, v2.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtzs v1.4s, v1.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtzs v3.4s, v3.4s
 ; CHECK-GI-NOFP16-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-GI-NOFP16-NEXT:    uzp1 v1.8h, v1.8h, v2.8h
-; CHECK-GI-NOFP16-NEXT:    uzp1 v0.8h, v0.8h, v3.8h
+; CHECK-GI-NOFP16-NEXT:    fcvtzs v3.4s, v3.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtzs v1.4s, v1.4s
+; CHECK-GI-NOFP16-NEXT:    uzp1 v0.8h, v2.8h, v0.8h
+; CHECK-GI-NOFP16-NEXT:    uzp1 v1.8h, v3.8h, v1.8h
 ; CHECK-GI-NOFP16-NEXT:    uzp1 v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fptos_v16f16_v16i8:
 ; CHECK-GI-FP16:       // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT:    fcvtzs v1.8h, v1.8h
 ; CHECK-GI-FP16-NEXT:    fcvtzs v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT:    fcvtzs v1.8h, v1.8h
 ; CHECK-GI-FP16-NEXT:    uzp1 v0.16b, v0.16b, v1.16b
 ; CHECK-GI-FP16-NEXT:    ret
 entry:
@@ -5327,23 +6107,23 @@ define <16 x i8> @fptou_v16f16_v16i8(<16 x half> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fptou_v16f16_v16i8:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    fcvtl2 v2.4s, v1.8h
-; CHECK-GI-NOFP16-NEXT:    fcvtl v1.4s, v1.4h
-; CHECK-GI-NOFP16-NEXT:    fcvtl2 v3.4s, v0.8h
-; CHECK-GI-NOFP16-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v2.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v0.4s, v0.8h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v3.4s, v1.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v1.4s, v1.8h
 ; CHECK-GI-NOFP16-NEXT:    fcvtzu v2.4s, v2.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtzu v1.4s, v1.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtzu v3.4s, v3.4s
 ; CHECK-GI-NOFP16-NEXT:    fcvtzu v0.4s, v0.4s
-; CHECK-GI-NOFP16-NEXT:    uzp1 v1.8h, v1.8h, v2.8h
-; CHECK-GI-NOFP16-NEXT:    uzp1 v0.8h, v0.8h, v3.8h
+; CHECK-GI-NOFP16-NEXT:    fcvtzu v3.4s, v3.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtzu v1.4s, v1.4s
+; CHECK-GI-NOFP16-NEXT:    uzp1 v0.8h, v2.8h, v0.8h
+; CHECK-GI-NOFP16-NEXT:    uzp1 v1.8h, v3.8h, v1.8h
 ; CHECK-GI-NOFP16-NEXT:    uzp1 v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fptou_v16f16_v16i8:
 ; CHECK-GI-FP16:       // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT:    fcvtzu v1.8h, v1.8h
 ; CHECK-GI-FP16-NEXT:    fcvtzu v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT:    fcvtzu v1.8h, v1.8h
 ; CHECK-GI-FP16-NEXT:    uzp1 v0.16b, v0.16b, v1.16b
 ; CHECK-GI-FP16-NEXT:    ret
 entry:
@@ -5390,36 +6170,36 @@ define <32 x i8> @fptos_v32f16_v32i8(<32 x half> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fptos_v32f16_v32i8:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    fcvtl2 v4.4s, v1.8h
-; CHECK-GI-NOFP16-NEXT:    fcvtl v1.4s, v1.4h
-; CHECK-GI-NOFP16-NEXT:    fcvtl2 v5.4s, v0.8h
-; CHECK-GI-NOFP16-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-GI-NOFP16-NEXT:    fcvtl2 v6.4s, v3.8h
-; CHECK-GI-NOFP16-NEXT:    fcvtl v3.4s, v3.4h
-; CHECK-GI-NOFP16-NEXT:    fcvtl2 v7.4s, v2.8h
-; CHECK-GI-NOFP16-NEXT:    fcvtl v2.4s, v2.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v4.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v0.4s, v0.8h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v5.4s, v1.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v1.4s, v1.8h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v6.4s, v2.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v2.4s, v2.8h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v7.4s, v3.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v3.4s, v3.8h
 ; CHECK-GI-NOFP16-NEXT:    fcvtzs v4.4s, v4.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtzs v1.4s, v1.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtzs v5.4s, v5.4s
 ; CHECK-GI-NOFP16-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtzs v5.4s, v5.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtzs v1.4s, v1.4s
 ; CHECK-GI-NOFP16-NEXT:    fcvtzs v6.4s, v6.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtzs v3.4s, v3.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtzs v7.4s, v7.4s
 ; CHECK-GI-NOFP16-NEXT:    fcvtzs v2.4s, v2.4s
-; CHECK-GI-NOFP16-NEXT:    uzp1 v1.8h, v1.8h, v4.8h
-; CHECK-GI-NOFP16-NEXT:    uzp1 v0.8h, v0.8h, v5.8h
-; CHECK-GI-NOFP16-NEXT:    uzp1 v3.8h, v3.8h, v6.8h
-; CHECK-GI-NOFP16-NEXT:    uzp1 v2.8h, v2.8h, v7.8h
+; CHECK-GI-NOFP16-NEXT:    fcvtzs v7.4s, v7.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtzs v3.4s, v3.4s
+; CHECK-GI-NOFP16-NEXT:    uzp1 v0.8h, v4.8h, v0.8h
+; CHECK-GI-NOFP16-NEXT:    uzp1 v1.8h, v5.8h, v1.8h
+; CHECK-GI-NOFP16-NEXT:    uzp1 v2.8h, v6.8h, v2.8h
+; CHECK-GI-NOFP16-NEXT:    uzp1 v3.8h, v7.8h, v3.8h
 ; CHECK-GI-NOFP16-NEXT:    uzp1 v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NOFP16-NEXT:    uzp1 v1.16b, v2.16b, v3.16b
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fptos_v32f16_v32i8:
 ; CHECK-GI-FP16:       // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT:    fcvtzs v1.8h, v1.8h
 ; CHECK-GI-FP16-NEXT:    fcvtzs v0.8h, v0.8h
-; CHECK-GI-FP16-NEXT:    fcvtzs v3.8h, v3.8h
+; CHECK-GI-FP16-NEXT:    fcvtzs v1.8h, v1.8h
 ; CHECK-GI-FP16-NEXT:    fcvtzs v2.8h, v2.8h
+; CHECK-GI-FP16-NEXT:    fcvtzs v3.8h, v3.8h
 ; CHECK-GI-FP16-NEXT:    uzp1 v0.16b, v0.16b, v1.16b
 ; CHECK-GI-FP16-NEXT:    uzp1 v1.16b, v2.16b, v3.16b
 ; CHECK-GI-FP16-NEXT:    ret
@@ -5467,36 +6247,36 @@ define <32 x i8> @fptou_v32f16_v32i8(<32 x half> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fptou_v32f16_v32i8:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    fcvtl2 v4.4s, v1.8h
-; CHECK-GI-NOFP16-NEXT:    fcvtl v1.4s, v1.4h
-; CHECK-GI-NOFP16-NEXT:    fcvtl2 v5.4s, v0.8h
-; CHECK-GI-NOFP16-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-GI-NOFP16-NEXT:    fcvtl2 v6.4s, v3.8h
-; CHECK-GI-NOFP16-NEXT:    fcvtl v3.4s, v3.4h
-; CHECK-GI-NOFP16-NEXT:    fcvtl2 v7.4s, v2.8h
-; CHECK-GI-NOFP16-NEXT:    fcvtl v2.4s, v2.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v4.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v0.4s, v0.8h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v5.4s, v1.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v1.4s, v1.8h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v6.4s, v2.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v2.4s, v2.8h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v7.4s, v3.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v3.4s, v3.8h
 ; CHECK-GI-NOFP16-NEXT:    fcvtzu v4.4s, v4.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtzu v1.4s, v1.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtzu v5.4s, v5.4s
 ; CHECK-GI-NOFP16-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtzu v5.4s, v5.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtzu v1.4s, v1.4s
 ; CHECK-GI-NOFP16-NEXT:    fcvtzu v6.4s, v6.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtzu v3.4s, v3.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtzu v7.4s, v7.4s
 ; CHECK-GI-NOFP16-NEXT:    fcvtzu v2.4s, v2.4s
-; CHECK-GI-NOFP16-NEXT:    uzp1 v1.8h, v1.8h, v4.8h
-; CHECK-GI-NOFP16-NEXT:    uzp1 v0.8h, v0.8h, v5.8h
-; CHECK-GI-NOFP16-NEXT:    uzp1 v3.8h, v3.8h, v6.8h
-; CHECK-GI-NOFP16-NEXT:    uzp1 v2.8h, v2.8h, v7.8h
+; CHECK-GI-NOFP16-NEXT:    fcvtzu v7.4s, v7.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtzu v3.4s, v3.4s
+; CHECK-GI-NOFP16-NEXT:    uzp1 v0.8h, v4.8h, v0.8h
+; CHECK-GI-NOFP16-NEXT:    uzp1 v1.8h, v5.8h, v1.8h
+; CHECK-GI-NOFP16-NEXT:    uzp1 v2.8h, v6.8h, v2.8h
+; CHECK-GI-NOFP16-NEXT:    uzp1 v3.8h, v7.8h, v3.8h
 ; CHECK-GI-NOFP16-NEXT:    uzp1 v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NOFP16-NEXT:    uzp1 v1.16b, v2.16b, v3.16b
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fptou_v32f16_v32i8:
 ; CHECK-GI-FP16:       // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT:    fcvtzu v1.8h, v1.8h
 ; CHECK-GI-FP16-NEXT:    fcvtzu v0.8h, v0.8h
-; CHECK-GI-FP16-NEXT:    fcvtzu v3.8h, v3.8h
+; CHECK-GI-FP16-NEXT:    fcvtzu v1.8h, v1.8h
 ; CHECK-GI-FP16-NEXT:    fcvtzu v2.8h, v2.8h
+; CHECK-GI-FP16-NEXT:    fcvtzu v3.8h, v3.8h
 ; CHECK-GI-FP16-NEXT:    uzp1 v0.16b, v0.16b, v1.16b
 ; CHECK-GI-FP16-NEXT:    uzp1 v1.16b, v2.16b, v3.16b
 ; CHECK-GI-FP16-NEXT:    ret


        


More information about the llvm-commits mailing list