[llvm] [AArch64] Wrap integer SCALAR_TO_VECTOR nodes in bitcasts (PR #172837)

via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 22 04:41:55 PST 2026


https://github.com/Lukacma updated https://github.com/llvm/llvm-project/pull/172837

>From f453fd3056b9ffbdf377bc4b5607ef3569049b8b Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Thu, 18 Dec 2025 11:16:41 +0000
Subject: [PATCH 1/8] [AArch64] Add scal_to_vec patterns for SIMD convert
 intrinsics

---
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   |   17 +-
 .../CodeGen/AArch64/arm64-cvt-simd-fptoi.ll   |  170 ++
 .../CodeGen/AArch64/arm64-cvt-simd-fptoi.s    | 1515 +++++++++++++++++
 .../AArch64/arm64-cvt-simd-intrinsics.ll      |  334 +++-
 ...arm64-fixed-point-scalar-cvt-dagcombine.ll |    3 +-
 llvm/test/CodeGen/AArch64/arm64-neon-copy.ll  |   57 +-
 llvm/test/CodeGen/AArch64/arm64-vcvt.ll       |   28 +-
 .../CodeGen/AArch64/fp-intrinsics-vector.ll   |    6 +-
 .../AArch64/sve-fixed-length-fp-to-int.ll     |    6 +-
 9 files changed, 2064 insertions(+), 72 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.s

diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index c22929f379dfc..447fd9ef66343 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -6563,12 +6563,19 @@ multiclass FPToIntegerSIMDScalarPatterns<SDPatternOperator OpN, string INST> {
             (!cast<Instruction>(INST # DHr) FPR16:$Rn)>;
   def : Pat<(f64 (bitconvert (i64 (OpN (f32 FPR32:$Rn))))),
             (!cast<Instruction>(INST # DSr) FPR32:$Rn)>;
+
+  def : Pat<(v1i64 (scalar_to_vector (i64 (OpN (f16 FPR16:$Rn))))),
+            (!cast<Instruction>(INST # DHr) FPR16:$Rn)>;
+  def : Pat<(v1i64 (scalar_to_vector (i64 (OpN (f32 FPR32:$Rn))))),
+            (!cast<Instruction>(INST # DSr) FPR32:$Rn)>;
   }
   def : Pat<(f32 (bitconvert (i32 (OpN (f32 FPR32:$Rn))))),
             (!cast<Instruction>(INST # v1i32) FPR32:$Rn)>;
   def : Pat<(f64 (bitconvert (i64 (OpN (f64 FPR64:$Rn))))),
             (!cast<Instruction>(INST # v1i64) FPR64:$Rn)>;
-            
+
+  def : Pat<(v1i64 (scalar_to_vector (i64 (OpN (f64 FPR64:$Rn))))),
+            (!cast<Instruction>(INST # v1i64) FPR64:$Rn)>;
 }
 defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtas, "FCVTAS">;
 defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtau, "FCVTAU">;
@@ -6611,12 +6618,20 @@ multiclass FPToIntegerIntPats<Intrinsic round, string INST> {
             (!cast<Instruction>(INST # DSr) $Rn)>;
   def : Pat<(f32 (bitconvert (i32 (round f64:$Rn)))), 
             (!cast<Instruction>(INST # SDr) $Rn)>;
+
+  def : Pat<(v1i64 (scalar_to_vector (i64 (round f16:$Rn)))), 
+            (!cast<Instruction>(INST # DHr) $Rn)>;
+  def : Pat<(v1i64 (scalar_to_vector (i64 (round f32:$Rn)))), 
+            (!cast<Instruction>(INST # DSr) $Rn)>;
   }
   def : Pat<(f32 (bitconvert (i32 (round f32:$Rn)))), 
             (!cast<Instruction>(INST # v1i32) $Rn)>;
   def : Pat<(f64 (bitconvert (i64 (round f64:$Rn)))), 
             (!cast<Instruction>(INST # v1i64) $Rn)>;
 
+  def : Pat<(v1i64 (scalar_to_vector (i64 (round f64:$Rn)))), 
+            (!cast<Instruction>(INST # v1i64) $Rn)>;
+
   let Predicates = [HasFullFP16] in {
   def : Pat<(i32 (round (fmul f16:$Rn, fixedpoint_f16_i32:$scale))),
             (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
diff --git a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll
index a729772f2897a..ebaca00d2cdb9 100644
--- a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll
@@ -15,6 +15,10 @@
 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i32_f64_simd
 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i64_f64_simd
 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i32_f32_simd
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcvtzs_scalar_to_vector_h_strict
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcvtzs_scalar_to_vector_s_strict
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcvtzu_scalar_to_vector_h_strict
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcvtzu_scalar_to_vector_s_strict
 
 ;
 ; FPTOI
@@ -1941,3 +1945,169 @@ define double @fcvtzu_dd_simd(double %a) {
   %bc = bitcast i64 %i to double
   ret double %bc
 }
+
+;
+; FPTOI scalar_to_vector
+;
+
+define <1 x i64> @fcvtzs_scalar_to_vector_h(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_scalar_to_vector_h:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs x8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzs_scalar_to_vector_h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, h0
+; CHECK-NEXT:    ret
+  %val = fptosi half %a to i64
+  %vec = insertelement <1 x i64> poison, i64 %val, i32 0
+  ret <1 x i64> %vec
+}
+
+define <1 x i64> @fcvtzs_scalar_to_vector_s(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_scalar_to_vector_s:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs x8, s0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzs_scalar_to_vector_s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, s0
+; CHECK-NEXT:    ret
+  %val = fptosi float %a to i64
+  %vec = insertelement <1 x i64> poison, i64 %val, i32 0
+  ret <1 x i64> %vec
+}
+
+define <1 x i64> @fcvtzs_scalar_to_vector_d(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_scalar_to_vector_d:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs d0, d0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzs_scalar_to_vector_d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, d0
+; CHECK-NEXT:    ret
+  %val = fptosi double %a to i64
+  %vec = insertelement <1 x i64> poison, i64 %val, i32 0
+  ret <1 x i64> %vec
+}
+
+define <1 x i64> @fcvtzu_scalar_to_vector_h(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_scalar_to_vector_h:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu x8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzu_scalar_to_vector_h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, h0
+; CHECK-NEXT:    ret
+  %val = fptoui half %a to i64
+  %vec = insertelement <1 x i64> poison, i64 %val, i32 0
+  ret <1 x i64> %vec
+}
+
+define <1 x i64> @fcvtzu_scalar_to_vector_s(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_scalar_to_vector_s:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu x8, s0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzu_scalar_to_vector_s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, s0
+; CHECK-NEXT:    ret
+  %val = fptoui float %a to i64
+  %vec = insertelement <1 x i64> poison, i64 %val, i32 0
+  ret <1 x i64> %vec
+}
+
+define <1 x i64> @fcvtzu_scalar_to_vector_d(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_scalar_to_vector_d:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu d0, d0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzu_scalar_to_vector_d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, d0
+; CHECK-NEXT:    ret
+  %val = fptoui double %a to i64
+  %vec = insertelement <1 x i64> poison, i64 %val, i32 0
+  ret <1 x i64> %vec
+}
+
+;
+; FPTOI scalar_to_vector strictfp
+;
+
+define <1 x i64> @fcvtzs_scalar_to_vector_h_strict(half %x) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_scalar_to_vector_h_strict:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs x8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzs_scalar_to_vector_h_strict:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, h0
+; CHECK-NEXT:    ret
+  %val = call i64 @llvm.experimental.constrained.fptosi.i64.f16(half %x, metadata !"fpexcept.strict")
+  %vec = insertelement <1 x i64> poison, i64 %val, i32 0
+  ret <1 x i64> %vec
+}
+
+define <1 x i64> @fcvtzs_scalar_to_vector_s_strict(float %x) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_scalar_to_vector_s_strict:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs x8, s0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzs_scalar_to_vector_s_strict:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, s0
+; CHECK-NEXT:    ret
+  %val = call i64 @llvm.experimental.constrained.fptosi.i64.f32(float %x, metadata !"fpexcept.strict")
+  %vec = insertelement <1 x i64> poison, i64 %val, i32 0
+  ret <1 x i64> %vec
+}
+
+define <1 x i64> @fcvtzu_scalar_to_vector_h_strict(half %x) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_scalar_to_vector_h_strict:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu x8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzu_scalar_to_vector_h_strict:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, h0
+; CHECK-NEXT:    ret
+  %val = call i64 @llvm.experimental.constrained.fptoui.i64.f16(half %x, metadata !"fpexcept.strict")
+  %vec = insertelement <1 x i64> poison, i64 %val, i32 0
+  ret <1 x i64> %vec
+}
+
+define <1 x i64> @fcvtzu_scalar_to_vector_s_strict(float %x) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_scalar_to_vector_s_strict:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu x8, s0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzu_scalar_to_vector_s_strict:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, s0
+; CHECK-NEXT:    ret
+  %val = call i64 @llvm.experimental.constrained.fptoui.i64.f32(float %x, metadata !"fpexcept.strict")
+  %vec = insertelement <1 x i64> poison, i64 %val, i32 0
+  ret <1 x i64> %vec
+}
diff --git a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.s b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.s
new file mode 100644
index 0000000000000..0850b306e8c79
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.s
@@ -0,0 +1,1515 @@
+	.file	"arm64-cvt-simd-fptoi.ll"
+	.text
+	.globl	test_fptosi_f16_i32_simd        // -- Begin function test_fptosi_f16_i32_simd
+	.p2align	2
+	.type	test_fptosi_f16_i32_simd, at function
+test_fptosi_f16_i32_simd:               // @test_fptosi_f16_i32_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzs	s0, h0
+	ret
+.Lfunc_end0:
+	.size	test_fptosi_f16_i32_simd, .Lfunc_end0-test_fptosi_f16_i32_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_fptosi_f16_i64_simd        // -- Begin function test_fptosi_f16_i64_simd
+	.p2align	2
+	.type	test_fptosi_f16_i64_simd, at function
+test_fptosi_f16_i64_simd:               // @test_fptosi_f16_i64_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzs	d0, h0
+	ret
+.Lfunc_end1:
+	.size	test_fptosi_f16_i64_simd, .Lfunc_end1-test_fptosi_f16_i64_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_fptosi_f64_i32_simd        // -- Begin function test_fptosi_f64_i32_simd
+	.p2align	2
+	.type	test_fptosi_f64_i32_simd, at function
+test_fptosi_f64_i32_simd:               // @test_fptosi_f64_i32_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzs	s0, d0
+	ret
+.Lfunc_end2:
+	.size	test_fptosi_f64_i32_simd, .Lfunc_end2-test_fptosi_f64_i32_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_fptosi_f32_i64_simd        // -- Begin function test_fptosi_f32_i64_simd
+	.p2align	2
+	.type	test_fptosi_f32_i64_simd, at function
+test_fptosi_f32_i64_simd:               // @test_fptosi_f32_i64_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzs	d0, s0
+	ret
+.Lfunc_end3:
+	.size	test_fptosi_f32_i64_simd, .Lfunc_end3-test_fptosi_f32_i64_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_fptosi_f64_i64_simd        // -- Begin function test_fptosi_f64_i64_simd
+	.p2align	2
+	.type	test_fptosi_f64_i64_simd, at function
+test_fptosi_f64_i64_simd:               // @test_fptosi_f64_i64_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzs	d0, d0
+	ret
+.Lfunc_end4:
+	.size	test_fptosi_f64_i64_simd, .Lfunc_end4-test_fptosi_f64_i64_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_fptosi_f32_i32_simd        // -- Begin function test_fptosi_f32_i32_simd
+	.p2align	2
+	.type	test_fptosi_f32_i32_simd, at function
+test_fptosi_f32_i32_simd:               // @test_fptosi_f32_i32_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzs	s0, s0
+	ret
+.Lfunc_end5:
+	.size	test_fptosi_f32_i32_simd, .Lfunc_end5-test_fptosi_f32_i32_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_fptoui_f16_i32_simd        // -- Begin function test_fptoui_f16_i32_simd
+	.p2align	2
+	.type	test_fptoui_f16_i32_simd, at function
+test_fptoui_f16_i32_simd:               // @test_fptoui_f16_i32_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzu	s0, h0
+	ret
+.Lfunc_end6:
+	.size	test_fptoui_f16_i32_simd, .Lfunc_end6-test_fptoui_f16_i32_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_fptoui_f16_i64_simd        // -- Begin function test_fptoui_f16_i64_simd
+	.p2align	2
+	.type	test_fptoui_f16_i64_simd, at function
+test_fptoui_f16_i64_simd:               // @test_fptoui_f16_i64_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzu	d0, h0
+	ret
+.Lfunc_end7:
+	.size	test_fptoui_f16_i64_simd, .Lfunc_end7-test_fptoui_f16_i64_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_fptoui_f64_i32_simd        // -- Begin function test_fptoui_f64_i32_simd
+	.p2align	2
+	.type	test_fptoui_f64_i32_simd, at function
+test_fptoui_f64_i32_simd:               // @test_fptoui_f64_i32_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzu	s0, d0
+	ret
+.Lfunc_end8:
+	.size	test_fptoui_f64_i32_simd, .Lfunc_end8-test_fptoui_f64_i32_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_fptoui_f32_i64_simd        // -- Begin function test_fptoui_f32_i64_simd
+	.p2align	2
+	.type	test_fptoui_f32_i64_simd, at function
+test_fptoui_f32_i64_simd:               // @test_fptoui_f32_i64_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzu	d0, s0
+	ret
+.Lfunc_end9:
+	.size	test_fptoui_f32_i64_simd, .Lfunc_end9-test_fptoui_f32_i64_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_fptoui_f64_i64_simd        // -- Begin function test_fptoui_f64_i64_simd
+	.p2align	2
+	.type	test_fptoui_f64_i64_simd, at function
+test_fptoui_f64_i64_simd:               // @test_fptoui_f64_i64_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzu	d0, d0
+	ret
+.Lfunc_end10:
+	.size	test_fptoui_f64_i64_simd, .Lfunc_end10-test_fptoui_f64_i64_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_fptoui_f32_i32_simd        // -- Begin function test_fptoui_f32_i32_simd
+	.p2align	2
+	.type	test_fptoui_f32_i32_simd, at function
+test_fptoui_f32_i32_simd:               // @test_fptoui_f32_i32_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzu	s0, s0
+	ret
+.Lfunc_end11:
+	.size	test_fptoui_f32_i32_simd, .Lfunc_end11-test_fptoui_f32_i32_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fptosi_i32_f16_simd             // -- Begin function fptosi_i32_f16_simd
+	.p2align	2
+	.type	fptosi_i32_f16_simd, at function
+fptosi_i32_f16_simd:                    // @fptosi_i32_f16_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzs	s0, h0
+	ret
+.Lfunc_end12:
+	.size	fptosi_i32_f16_simd, .Lfunc_end12-fptosi_i32_f16_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fptosi_i64_f16_simd             // -- Begin function fptosi_i64_f16_simd
+	.p2align	2
+	.type	fptosi_i64_f16_simd, at function
+fptosi_i64_f16_simd:                    // @fptosi_i64_f16_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzs	d0, h0
+	ret
+.Lfunc_end13:
+	.size	fptosi_i64_f16_simd, .Lfunc_end13-fptosi_i64_f16_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fptosi_i64_f32_simd             // -- Begin function fptosi_i64_f32_simd
+	.p2align	2
+	.type	fptosi_i64_f32_simd, at function
+fptosi_i64_f32_simd:                    // @fptosi_i64_f32_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzs	d0, s0
+	ret
+.Lfunc_end14:
+	.size	fptosi_i64_f32_simd, .Lfunc_end14-fptosi_i64_f32_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fptosi_i32_f64_simd             // -- Begin function fptosi_i32_f64_simd
+	.p2align	2
+	.type	fptosi_i32_f64_simd, at function
+fptosi_i32_f64_simd:                    // @fptosi_i32_f64_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzs	s0, d0
+	ret
+.Lfunc_end15:
+	.size	fptosi_i32_f64_simd, .Lfunc_end15-fptosi_i32_f64_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fptosi_i64_f64_simd             // -- Begin function fptosi_i64_f64_simd
+	.p2align	2
+	.type	fptosi_i64_f64_simd, at function
+fptosi_i64_f64_simd:                    // @fptosi_i64_f64_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzs	d0, d0
+	ret
+.Lfunc_end16:
+	.size	fptosi_i64_f64_simd, .Lfunc_end16-fptosi_i64_f64_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fptosi_i32_f32_simd             // -- Begin function fptosi_i32_f32_simd
+	.p2align	2
+	.type	fptosi_i32_f32_simd, at function
+fptosi_i32_f32_simd:                    // @fptosi_i32_f32_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzs	s0, s0
+	ret
+.Lfunc_end17:
+	.size	fptosi_i32_f32_simd, .Lfunc_end17-fptosi_i32_f32_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fptoui_i32_f16_simd             // -- Begin function fptoui_i32_f16_simd
+	.p2align	2
+	.type	fptoui_i32_f16_simd, at function
+fptoui_i32_f16_simd:                    // @fptoui_i32_f16_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzu	s0, h0
+	ret
+.Lfunc_end18:
+	.size	fptoui_i32_f16_simd, .Lfunc_end18-fptoui_i32_f16_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fptoui_i64_f16_simd             // -- Begin function fptoui_i64_f16_simd
+	.p2align	2
+	.type	fptoui_i64_f16_simd, at function
+fptoui_i64_f16_simd:                    // @fptoui_i64_f16_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzu	d0, h0
+	ret
+.Lfunc_end19:
+	.size	fptoui_i64_f16_simd, .Lfunc_end19-fptoui_i64_f16_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fptoui_i64_f32_simd             // -- Begin function fptoui_i64_f32_simd
+	.p2align	2
+	.type	fptoui_i64_f32_simd, at function
+fptoui_i64_f32_simd:                    // @fptoui_i64_f32_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzu	d0, s0
+	ret
+.Lfunc_end20:
+	.size	fptoui_i64_f32_simd, .Lfunc_end20-fptoui_i64_f32_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fptoui_i32_f64_simd             // -- Begin function fptoui_i32_f64_simd
+	.p2align	2
+	.type	fptoui_i32_f64_simd, at function
+fptoui_i32_f64_simd:                    // @fptoui_i32_f64_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzu	s0, d0
+	ret
+.Lfunc_end21:
+	.size	fptoui_i32_f64_simd, .Lfunc_end21-fptoui_i32_f64_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fptoui_i64_f64_simd             // -- Begin function fptoui_i64_f64_simd
+	.p2align	2
+	.type	fptoui_i64_f64_simd, at function
+fptoui_i64_f64_simd:                    // @fptoui_i64_f64_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzu	d0, d0
+	ret
+.Lfunc_end22:
+	.size	fptoui_i64_f64_simd, .Lfunc_end22-fptoui_i64_f64_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fptoui_i32_f32_simd             // -- Begin function fptoui_i32_f32_simd
+	.p2align	2
+	.type	fptoui_i32_f32_simd, at function
+fptoui_i32_f32_simd:                    // @fptoui_i32_f32_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzu	s0, s0
+	ret
+.Lfunc_end23:
+	.size	fptoui_i32_f32_simd, .Lfunc_end23-fptoui_i32_f32_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtas_ds_round_simd            // -- Begin function fcvtas_ds_round_simd
+	.p2align	2
+	.type	fcvtas_ds_round_simd, at function
+fcvtas_ds_round_simd:                   // @fcvtas_ds_round_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtas	d0, s0
+	ret
+.Lfunc_end24:
+	.size	fcvtas_ds_round_simd, .Lfunc_end24-fcvtas_ds_round_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtas_sd_round_simd            // -- Begin function fcvtas_sd_round_simd
+	.p2align	2
+	.type	fcvtas_sd_round_simd, at function
+fcvtas_sd_round_simd:                   // @fcvtas_sd_round_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtas	s0, d0
+	ret
+.Lfunc_end25:
+	.size	fcvtas_sd_round_simd, .Lfunc_end25-fcvtas_sd_round_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtas_ss_round_simd            // -- Begin function fcvtas_ss_round_simd
+	.p2align	2
+	.type	fcvtas_ss_round_simd, at function
+fcvtas_ss_round_simd:                   // @fcvtas_ss_round_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtas	s0, s0
+	ret
+.Lfunc_end26:
+	.size	fcvtas_ss_round_simd, .Lfunc_end26-fcvtas_ss_round_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtas_dd_round_simd            // -- Begin function fcvtas_dd_round_simd
+	.p2align	2
+	.type	fcvtas_dd_round_simd, at function
+fcvtas_dd_round_simd:                   // @fcvtas_dd_round_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtas	d0, d0
+	ret
+.Lfunc_end27:
+	.size	fcvtas_dd_round_simd, .Lfunc_end27-fcvtas_dd_round_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtau_ds_round_simd            // -- Begin function fcvtau_ds_round_simd
+	.p2align	2
+	.type	fcvtau_ds_round_simd, at function
+fcvtau_ds_round_simd:                   // @fcvtau_ds_round_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtau	d0, s0
+	ret
+.Lfunc_end28:
+	.size	fcvtau_ds_round_simd, .Lfunc_end28-fcvtau_ds_round_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtau_sd_round_simd            // -- Begin function fcvtau_sd_round_simd
+	.p2align	2
+	.type	fcvtau_sd_round_simd, at function
+fcvtau_sd_round_simd:                   // @fcvtau_sd_round_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtau	s0, d0
+	ret
+.Lfunc_end29:
+	.size	fcvtau_sd_round_simd, .Lfunc_end29-fcvtau_sd_round_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtau_ss_round_simd            // -- Begin function fcvtau_ss_round_simd
+	.p2align	2
+	.type	fcvtau_ss_round_simd, at function
+fcvtau_ss_round_simd:                   // @fcvtau_ss_round_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtas	s0, s0
+	ret
+.Lfunc_end30:
+	.size	fcvtau_ss_round_simd, .Lfunc_end30-fcvtau_ss_round_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtau_dd_round_simd            // -- Begin function fcvtau_dd_round_simd
+	.p2align	2
+	.type	fcvtau_dd_round_simd, at function
+fcvtau_dd_round_simd:                   // @fcvtau_dd_round_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtas	d0, d0
+	ret
+.Lfunc_end31:
+	.size	fcvtau_dd_round_simd, .Lfunc_end31-fcvtau_dd_round_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtms_ds_round_simd            // -- Begin function fcvtms_ds_round_simd
+	.p2align	2
+	.type	fcvtms_ds_round_simd, at function
+fcvtms_ds_round_simd:                   // @fcvtms_ds_round_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtms	d0, s0
+	ret
+.Lfunc_end32:
+	.size	fcvtms_ds_round_simd, .Lfunc_end32-fcvtms_ds_round_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtms_sd_round_simd            // -- Begin function fcvtms_sd_round_simd
+	.p2align	2
+	.type	fcvtms_sd_round_simd, at function
+fcvtms_sd_round_simd:                   // @fcvtms_sd_round_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtms	s0, d0
+	ret
+.Lfunc_end33:
+	.size	fcvtms_sd_round_simd, .Lfunc_end33-fcvtms_sd_round_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtms_ss_round_simd            // -- Begin function fcvtms_ss_round_simd
+	.p2align	2
+	.type	fcvtms_ss_round_simd, at function
+fcvtms_ss_round_simd:                   // @fcvtms_ss_round_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtms	s0, s0
+	ret
+.Lfunc_end34:
+	.size	fcvtms_ss_round_simd, .Lfunc_end34-fcvtms_ss_round_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtms_dd_round_simd            // -- Begin function fcvtms_dd_round_simd
+	.p2align	2
+	.type	fcvtms_dd_round_simd, at function
+fcvtms_dd_round_simd:                   // @fcvtms_dd_round_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtms	d0, d0
+	ret
+.Lfunc_end35:
+	.size	fcvtms_dd_round_simd, .Lfunc_end35-fcvtms_dd_round_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtmu_ds_round_simd            // -- Begin function fcvtmu_ds_round_simd
+	.p2align	2
+	.type	fcvtmu_ds_round_simd, at function
+fcvtmu_ds_round_simd:                   // @fcvtmu_ds_round_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtmu	d0, s0
+	ret
+.Lfunc_end36:
+	.size	fcvtmu_ds_round_simd, .Lfunc_end36-fcvtmu_ds_round_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtmu_sd_round_simd            // -- Begin function fcvtmu_sd_round_simd
+	.p2align	2
+	.type	fcvtmu_sd_round_simd, at function
+fcvtmu_sd_round_simd:                   // @fcvtmu_sd_round_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtmu	s0, d0
+	ret
+.Lfunc_end37:
+	.size	fcvtmu_sd_round_simd, .Lfunc_end37-fcvtmu_sd_round_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtmu_ss_round_simd            // -- Begin function fcvtmu_ss_round_simd
+	.p2align	2
+	.type	fcvtmu_ss_round_simd, at function
+fcvtmu_ss_round_simd:                   // @fcvtmu_ss_round_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtms	s0, s0
+	ret
+.Lfunc_end38:
+	.size	fcvtmu_ss_round_simd, .Lfunc_end38-fcvtmu_ss_round_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtmu_dd_round_simd            // -- Begin function fcvtmu_dd_round_simd
+	.p2align	2
+	.type	fcvtmu_dd_round_simd, at function
+fcvtmu_dd_round_simd:                   // @fcvtmu_dd_round_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtms	d0, d0
+	ret
+.Lfunc_end39:
+	.size	fcvtmu_dd_round_simd, .Lfunc_end39-fcvtmu_dd_round_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtps_ds_round_simd            // -- Begin function fcvtps_ds_round_simd
+	.p2align	2
+	.type	fcvtps_ds_round_simd, at function
+fcvtps_ds_round_simd:                   // @fcvtps_ds_round_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtps	d0, s0
+	ret
+.Lfunc_end40:
+	.size	fcvtps_ds_round_simd, .Lfunc_end40-fcvtps_ds_round_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtps_sd_round_simd            // -- Begin function fcvtps_sd_round_simd
+	.p2align	2
+	.type	fcvtps_sd_round_simd, at function
+fcvtps_sd_round_simd:                   // @fcvtps_sd_round_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtps	s0, d0
+	ret
+.Lfunc_end41:
+	.size	fcvtps_sd_round_simd, .Lfunc_end41-fcvtps_sd_round_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtps_ss_round_simd            // -- Begin function fcvtps_ss_round_simd
+	.p2align	2
+	.type	fcvtps_ss_round_simd, at function
+fcvtps_ss_round_simd:                   // @fcvtps_ss_round_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtps	s0, s0
+	ret
+.Lfunc_end42:
+	.size	fcvtps_ss_round_simd, .Lfunc_end42-fcvtps_ss_round_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtps_dd_round_simd            // -- Begin function fcvtps_dd_round_simd
+	.p2align	2
+	.type	fcvtps_dd_round_simd, at function
+fcvtps_dd_round_simd:                   // @fcvtps_dd_round_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtps	d0, d0
+	ret
+.Lfunc_end43:
+	.size	fcvtps_dd_round_simd, .Lfunc_end43-fcvtps_dd_round_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtpu_ds_round_simd            // -- Begin function fcvtpu_ds_round_simd
+	.p2align	2
+	.type	fcvtpu_ds_round_simd, at function
+fcvtpu_ds_round_simd:                   // @fcvtpu_ds_round_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtpu	d0, s0
+	ret
+.Lfunc_end44:
+	.size	fcvtpu_ds_round_simd, .Lfunc_end44-fcvtpu_ds_round_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtpu_sd_round_simd            // -- Begin function fcvtpu_sd_round_simd
+	.p2align	2
+	.type	fcvtpu_sd_round_simd, at function
+fcvtpu_sd_round_simd:                   // @fcvtpu_sd_round_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtpu	s0, d0
+	ret
+.Lfunc_end45:
+	.size	fcvtpu_sd_round_simd, .Lfunc_end45-fcvtpu_sd_round_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtpu_ss_round_simd            // -- Begin function fcvtpu_ss_round_simd
+	.p2align	2
+	.type	fcvtpu_ss_round_simd, at function
+fcvtpu_ss_round_simd:                   // @fcvtpu_ss_round_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtps	s0, s0
+	ret
+.Lfunc_end46:
+	.size	fcvtpu_ss_round_simd, .Lfunc_end46-fcvtpu_ss_round_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtpu_dd_round_simd            // -- Begin function fcvtpu_dd_round_simd
+	.p2align	2
+	.type	fcvtpu_dd_round_simd, at function
+fcvtpu_dd_round_simd:                   // @fcvtpu_dd_round_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtps	d0, d0
+	ret
+.Lfunc_end47:
+	.size	fcvtpu_dd_round_simd, .Lfunc_end47-fcvtpu_dd_round_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtzs_ds_round_simd            // -- Begin function fcvtzs_ds_round_simd
+	.p2align	2
+	.type	fcvtzs_ds_round_simd, at function
+fcvtzs_ds_round_simd:                   // @fcvtzs_ds_round_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzs	d0, s0
+	ret
+.Lfunc_end48:
+	.size	fcvtzs_ds_round_simd, .Lfunc_end48-fcvtzs_ds_round_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtzs_sd_round_simd            // -- Begin function fcvtzs_sd_round_simd
+	.p2align	2
+	.type	fcvtzs_sd_round_simd, at function
+fcvtzs_sd_round_simd:                   // @fcvtzs_sd_round_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzs	s0, d0
+	ret
+.Lfunc_end49:
+	.size	fcvtzs_sd_round_simd, .Lfunc_end49-fcvtzs_sd_round_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtzs_ss_round_simd            // -- Begin function fcvtzs_ss_round_simd
+	.p2align	2
+	.type	fcvtzs_ss_round_simd, at function
+fcvtzs_ss_round_simd:                   // @fcvtzs_ss_round_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzs	s0, s0
+	ret
+.Lfunc_end50:
+	.size	fcvtzs_ss_round_simd, .Lfunc_end50-fcvtzs_ss_round_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtzs_dd_round_simd            // -- Begin function fcvtzs_dd_round_simd
+	.p2align	2
+	.type	fcvtzs_dd_round_simd, at function
+fcvtzs_dd_round_simd:                   // @fcvtzs_dd_round_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzs	d0, d0
+	ret
+.Lfunc_end51:
+	.size	fcvtzs_dd_round_simd, .Lfunc_end51-fcvtzs_dd_round_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtzu_ds_round_simd            // -- Begin function fcvtzu_ds_round_simd
+	.p2align	2
+	.type	fcvtzu_ds_round_simd, at function
+fcvtzu_ds_round_simd:                   // @fcvtzu_ds_round_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzu	d0, s0
+	ret
+.Lfunc_end52:
+	.size	fcvtzu_ds_round_simd, .Lfunc_end52-fcvtzu_ds_round_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtzu_sd_round_simd            // -- Begin function fcvtzu_sd_round_simd
+	.p2align	2
+	.type	fcvtzu_sd_round_simd, at function
+fcvtzu_sd_round_simd:                   // @fcvtzu_sd_round_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzu	s0, d0
+	ret
+.Lfunc_end53:
+	.size	fcvtzu_sd_round_simd, .Lfunc_end53-fcvtzu_sd_round_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtzu_ss_round_simd            // -- Begin function fcvtzu_ss_round_simd
+	.p2align	2
+	.type	fcvtzu_ss_round_simd, at function
+fcvtzu_ss_round_simd:                   // @fcvtzu_ss_round_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzs	s0, s0
+	ret
+.Lfunc_end54:
+	.size	fcvtzu_ss_round_simd, .Lfunc_end54-fcvtzu_ss_round_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtzu_dd_round_simd            // -- Begin function fcvtzu_dd_round_simd
+	.p2align	2
+	.type	fcvtzu_dd_round_simd, at function
+fcvtzu_dd_round_simd:                   // @fcvtzu_dd_round_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzs	d0, d0
+	ret
+.Lfunc_end55:
+	.size	fcvtzu_dd_round_simd, .Lfunc_end55-fcvtzu_dd_round_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtzs_sh_sat_simd              // -- Begin function fcvtzs_sh_sat_simd
+	.p2align	2
+	.type	fcvtzs_sh_sat_simd, at function
+fcvtzs_sh_sat_simd:                     // @fcvtzs_sh_sat_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzs	s0, h0
+	ret
+.Lfunc_end56:
+	.size	fcvtzs_sh_sat_simd, .Lfunc_end56-fcvtzs_sh_sat_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtzs_dh_sat_simd              // -- Begin function fcvtzs_dh_sat_simd
+	.p2align	2
+	.type	fcvtzs_dh_sat_simd, at function
+fcvtzs_dh_sat_simd:                     // @fcvtzs_dh_sat_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzs	d0, h0
+	ret
+.Lfunc_end57:
+	.size	fcvtzs_dh_sat_simd, .Lfunc_end57-fcvtzs_dh_sat_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtzs_ds_sat_simd              // -- Begin function fcvtzs_ds_sat_simd
+	.p2align	2
+	.type	fcvtzs_ds_sat_simd, at function
+fcvtzs_ds_sat_simd:                     // @fcvtzs_ds_sat_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzs	d0, s0
+	ret
+.Lfunc_end58:
+	.size	fcvtzs_ds_sat_simd, .Lfunc_end58-fcvtzs_ds_sat_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtzs_sd_sat_simd              // -- Begin function fcvtzs_sd_sat_simd
+	.p2align	2
+	.type	fcvtzs_sd_sat_simd, at function
+fcvtzs_sd_sat_simd:                     // @fcvtzs_sd_sat_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzs	s0, d0
+	ret
+.Lfunc_end59:
+	.size	fcvtzs_sd_sat_simd, .Lfunc_end59-fcvtzs_sd_sat_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtzs_ss_sat_simd              // -- Begin function fcvtzs_ss_sat_simd
+	.p2align	2
+	.type	fcvtzs_ss_sat_simd, at function
+fcvtzs_ss_sat_simd:                     // @fcvtzs_ss_sat_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzs	s0, s0
+	ret
+.Lfunc_end60:
+	.size	fcvtzs_ss_sat_simd, .Lfunc_end60-fcvtzs_ss_sat_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtzs_dd_sat_simd              // -- Begin function fcvtzs_dd_sat_simd
+	.p2align	2
+	.type	fcvtzs_dd_sat_simd, at function
+fcvtzs_dd_sat_simd:                     // @fcvtzs_dd_sat_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzs	d0, d0
+	ret
+.Lfunc_end61:
+	.size	fcvtzs_dd_sat_simd, .Lfunc_end61-fcvtzs_dd_sat_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtzu_sh_sat_simd              // -- Begin function fcvtzu_sh_sat_simd
+	.p2align	2
+	.type	fcvtzu_sh_sat_simd, at function
+fcvtzu_sh_sat_simd:                     // @fcvtzu_sh_sat_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzu	s0, h0
+	ret
+.Lfunc_end62:
+	.size	fcvtzu_sh_sat_simd, .Lfunc_end62-fcvtzu_sh_sat_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtzu_dh_sat_simd              // -- Begin function fcvtzu_dh_sat_simd
+	.p2align	2
+	.type	fcvtzu_dh_sat_simd, at function
+fcvtzu_dh_sat_simd:                     // @fcvtzu_dh_sat_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzu	d0, h0
+	ret
+.Lfunc_end63:
+	.size	fcvtzu_dh_sat_simd, .Lfunc_end63-fcvtzu_dh_sat_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtzu_ds_sat_simd              // -- Begin function fcvtzu_ds_sat_simd
+	.p2align	2
+	.type	fcvtzu_ds_sat_simd, at function
+fcvtzu_ds_sat_simd:                     // @fcvtzu_ds_sat_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzu	d0, s0
+	ret
+.Lfunc_end64:
+	.size	fcvtzu_ds_sat_simd, .Lfunc_end64-fcvtzu_ds_sat_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtzu_sd_sat_simd              // -- Begin function fcvtzu_sd_sat_simd
+	.p2align	2
+	.type	fcvtzu_sd_sat_simd, at function
+fcvtzu_sd_sat_simd:                     // @fcvtzu_sd_sat_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzu	s0, d0
+	ret
+.Lfunc_end65:
+	.size	fcvtzu_sd_sat_simd, .Lfunc_end65-fcvtzu_sd_sat_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtzu_ss_sat_simd              // -- Begin function fcvtzu_ss_sat_simd
+	.p2align	2
+	.type	fcvtzu_ss_sat_simd, at function
+fcvtzu_ss_sat_simd:                     // @fcvtzu_ss_sat_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzs	s0, s0
+	ret
+.Lfunc_end66:
+	.size	fcvtzu_ss_sat_simd, .Lfunc_end66-fcvtzu_ss_sat_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtzu_dd_sat_simd              // -- Begin function fcvtzu_dd_sat_simd
+	.p2align	2
+	.type	fcvtzu_dd_sat_simd, at function
+fcvtzu_dd_sat_simd:                     // @fcvtzu_dd_sat_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzs	d0, d0
+	ret
+.Lfunc_end67:
+	.size	fcvtzu_dd_sat_simd, .Lfunc_end67-fcvtzu_dd_sat_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtas_sh_simd                  // -- Begin function fcvtas_sh_simd
+	.p2align	2
+	.type	fcvtas_sh_simd, at function
+fcvtas_sh_simd:                         // @fcvtas_sh_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtas	s0, h0
+	ret
+.Lfunc_end68:
+	.size	fcvtas_sh_simd, .Lfunc_end68-fcvtas_sh_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtas_dh_simd                  // -- Begin function fcvtas_dh_simd
+	.p2align	2
+	.type	fcvtas_dh_simd, at function
+fcvtas_dh_simd:                         // @fcvtas_dh_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtas	d0, h0
+	ret
+.Lfunc_end69:
+	.size	fcvtas_dh_simd, .Lfunc_end69-fcvtas_dh_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtas_ds_simd                  // -- Begin function fcvtas_ds_simd
+	.p2align	2
+	.type	fcvtas_ds_simd, at function
+fcvtas_ds_simd:                         // @fcvtas_ds_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtas	d0, s0
+	ret
+.Lfunc_end70:
+	.size	fcvtas_ds_simd, .Lfunc_end70-fcvtas_ds_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtas_sd_simd                  // -- Begin function fcvtas_sd_simd
+	.p2align	2
+	.type	fcvtas_sd_simd, at function
+fcvtas_sd_simd:                         // @fcvtas_sd_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtas	s0, d0
+	ret
+.Lfunc_end71:
+	.size	fcvtas_sd_simd, .Lfunc_end71-fcvtas_sd_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtas_ss_simd                  // -- Begin function fcvtas_ss_simd
+	.p2align	2
+	.type	fcvtas_ss_simd, at function
+fcvtas_ss_simd:                         // @fcvtas_ss_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtas	s0, s0
+	ret
+.Lfunc_end72:
+	.size	fcvtas_ss_simd, .Lfunc_end72-fcvtas_ss_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtas_dd_simd                  // -- Begin function fcvtas_dd_simd
+	.p2align	2
+	.type	fcvtas_dd_simd, at function
+fcvtas_dd_simd:                         // @fcvtas_dd_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtas	d0, d0
+	ret
+.Lfunc_end73:
+	.size	fcvtas_dd_simd, .Lfunc_end73-fcvtas_dd_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtau_sh_simd                  // -- Begin function fcvtau_sh_simd
+	.p2align	2
+	.type	fcvtau_sh_simd, at function
+fcvtau_sh_simd:                         // @fcvtau_sh_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtau	s0, h0
+	ret
+.Lfunc_end74:
+	.size	fcvtau_sh_simd, .Lfunc_end74-fcvtau_sh_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtau_dh_simd                  // -- Begin function fcvtau_dh_simd
+	.p2align	2
+	.type	fcvtau_dh_simd, at function
+fcvtau_dh_simd:                         // @fcvtau_dh_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtau	d0, h0
+	ret
+.Lfunc_end75:
+	.size	fcvtau_dh_simd, .Lfunc_end75-fcvtau_dh_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtau_ds_simd                  // -- Begin function fcvtau_ds_simd
+	.p2align	2
+	.type	fcvtau_ds_simd, at function
+fcvtau_ds_simd:                         // @fcvtau_ds_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtau	d0, s0
+	ret
+.Lfunc_end76:
+	.size	fcvtau_ds_simd, .Lfunc_end76-fcvtau_ds_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtau_sd_simd                  // -- Begin function fcvtau_sd_simd
+	.p2align	2
+	.type	fcvtau_sd_simd, at function
+fcvtau_sd_simd:                         // @fcvtau_sd_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtau	s0, d0
+	ret
+.Lfunc_end77:
+	.size	fcvtau_sd_simd, .Lfunc_end77-fcvtau_sd_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtau_ss_simd                  // -- Begin function fcvtau_ss_simd
+	.p2align	2
+	.type	fcvtau_ss_simd, at function
+fcvtau_ss_simd:                         // @fcvtau_ss_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtas	s0, s0
+	ret
+.Lfunc_end78:
+	.size	fcvtau_ss_simd, .Lfunc_end78-fcvtau_ss_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtau_dd_simd                  // -- Begin function fcvtau_dd_simd
+	.p2align	2
+	.type	fcvtau_dd_simd, at function
+fcvtau_dd_simd:                         // @fcvtau_dd_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtas	d0, d0
+	ret
+.Lfunc_end79:
+	.size	fcvtau_dd_simd, .Lfunc_end79-fcvtau_dd_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtms_sh_simd                  // -- Begin function fcvtms_sh_simd
+	.p2align	2
+	.type	fcvtms_sh_simd, at function
+fcvtms_sh_simd:                         // @fcvtms_sh_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtms	s0, h0
+	ret
+.Lfunc_end80:
+	.size	fcvtms_sh_simd, .Lfunc_end80-fcvtms_sh_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtms_dh_simd                  // -- Begin function fcvtms_dh_simd
+	.p2align	2
+	.type	fcvtms_dh_simd, at function
+fcvtms_dh_simd:                         // @fcvtms_dh_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtms	d0, h0
+	ret
+.Lfunc_end81:
+	.size	fcvtms_dh_simd, .Lfunc_end81-fcvtms_dh_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtms_ds_simd                  // -- Begin function fcvtms_ds_simd
+	.p2align	2
+	.type	fcvtms_ds_simd, at function
+fcvtms_ds_simd:                         // @fcvtms_ds_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtms	d0, s0
+	ret
+.Lfunc_end82:
+	.size	fcvtms_ds_simd, .Lfunc_end82-fcvtms_ds_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtms_sd_simd                  // -- Begin function fcvtms_sd_simd
+	.p2align	2
+	.type	fcvtms_sd_simd, at function
+fcvtms_sd_simd:                         // @fcvtms_sd_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtms	s0, d0
+	ret
+.Lfunc_end83:
+	.size	fcvtms_sd_simd, .Lfunc_end83-fcvtms_sd_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtms_ss_simd                  // -- Begin function fcvtms_ss_simd
+	.p2align	2
+	.type	fcvtms_ss_simd, at function
+fcvtms_ss_simd:                         // @fcvtms_ss_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtms	s0, s0
+	ret
+.Lfunc_end84:
+	.size	fcvtms_ss_simd, .Lfunc_end84-fcvtms_ss_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtms_dd_simd                  // -- Begin function fcvtms_dd_simd
+	.p2align	2
+	.type	fcvtms_dd_simd, at function
+fcvtms_dd_simd:                         // @fcvtms_dd_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtms	d0, d0
+	ret
+.Lfunc_end85:
+	.size	fcvtms_dd_simd, .Lfunc_end85-fcvtms_dd_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtmu_sh_simd                  // -- Begin function fcvtmu_sh_simd
+	.p2align	2
+	.type	fcvtmu_sh_simd, at function
+fcvtmu_sh_simd:                         // @fcvtmu_sh_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtmu	s0, h0
+	ret
+.Lfunc_end86:
+	.size	fcvtmu_sh_simd, .Lfunc_end86-fcvtmu_sh_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtmu_dh_simd                  // -- Begin function fcvtmu_dh_simd
+	.p2align	2
+	.type	fcvtmu_dh_simd, at function
+fcvtmu_dh_simd:                         // @fcvtmu_dh_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtmu	d0, h0
+	ret
+.Lfunc_end87:
+	.size	fcvtmu_dh_simd, .Lfunc_end87-fcvtmu_dh_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtmu_ds_simd                  // -- Begin function fcvtmu_ds_simd
+	.p2align	2
+	.type	fcvtmu_ds_simd, at function
+fcvtmu_ds_simd:                         // @fcvtmu_ds_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtmu	d0, s0
+	ret
+.Lfunc_end88:
+	.size	fcvtmu_ds_simd, .Lfunc_end88-fcvtmu_ds_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtmu_sd_simd                  // -- Begin function fcvtmu_sd_simd
+	.p2align	2
+	.type	fcvtmu_sd_simd, at function
+fcvtmu_sd_simd:                         // @fcvtmu_sd_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtmu	s0, d0
+	ret
+.Lfunc_end89:
+	.size	fcvtmu_sd_simd, .Lfunc_end89-fcvtmu_sd_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtmu_ss_simd                  // -- Begin function fcvtmu_ss_simd
+	.p2align	2
+	.type	fcvtmu_ss_simd, at function
+fcvtmu_ss_simd:                         // @fcvtmu_ss_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtms	s0, s0
+	ret
+.Lfunc_end90:
+	.size	fcvtmu_ss_simd, .Lfunc_end90-fcvtmu_ss_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtmu_dd_simd                  // -- Begin function fcvtmu_dd_simd
+	.p2align	2
+	.type	fcvtmu_dd_simd, at function
+fcvtmu_dd_simd:                         // @fcvtmu_dd_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtms	d0, d0
+	ret
+.Lfunc_end91:
+	.size	fcvtmu_dd_simd, .Lfunc_end91-fcvtmu_dd_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtps_sh_simd                  // -- Begin function fcvtps_sh_simd
+	.p2align	2
+	.type	fcvtps_sh_simd, at function
+fcvtps_sh_simd:                         // @fcvtps_sh_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtps	s0, h0
+	ret
+.Lfunc_end92:
+	.size	fcvtps_sh_simd, .Lfunc_end92-fcvtps_sh_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtps_dh_simd                  // -- Begin function fcvtps_dh_simd
+	.p2align	2
+	.type	fcvtps_dh_simd, at function
+fcvtps_dh_simd:                         // @fcvtps_dh_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtps	d0, h0
+	ret
+.Lfunc_end93:
+	.size	fcvtps_dh_simd, .Lfunc_end93-fcvtps_dh_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtps_ds_simd                  // -- Begin function fcvtps_ds_simd
+	.p2align	2
+	.type	fcvtps_ds_simd, at function
+fcvtps_ds_simd:                         // @fcvtps_ds_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtps	d0, s0
+	ret
+.Lfunc_end94:
+	.size	fcvtps_ds_simd, .Lfunc_end94-fcvtps_ds_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtps_sd_simd                  // -- Begin function fcvtps_sd_simd
+	.p2align	2
+	.type	fcvtps_sd_simd, at function
+fcvtps_sd_simd:                         // @fcvtps_sd_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtps	s0, d0
+	ret
+.Lfunc_end95:
+	.size	fcvtps_sd_simd, .Lfunc_end95-fcvtps_sd_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtps_ss_simd                  // -- Begin function fcvtps_ss_simd
+	.p2align	2
+	.type	fcvtps_ss_simd, at function
+fcvtps_ss_simd:                         // @fcvtps_ss_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtps	s0, s0
+	ret
+.Lfunc_end96:
+	.size	fcvtps_ss_simd, .Lfunc_end96-fcvtps_ss_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtps_dd_simd                  // -- Begin function fcvtps_dd_simd
+	.p2align	2
+	.type	fcvtps_dd_simd, at function
+fcvtps_dd_simd:                         // @fcvtps_dd_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtps	d0, d0
+	ret
+.Lfunc_end97:
+	.size	fcvtps_dd_simd, .Lfunc_end97-fcvtps_dd_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtpu_sh_simd                  // -- Begin function fcvtpu_sh_simd
+	.p2align	2
+	.type	fcvtpu_sh_simd, at function
+fcvtpu_sh_simd:                         // @fcvtpu_sh_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtpu	s0, h0
+	ret
+.Lfunc_end98:
+	.size	fcvtpu_sh_simd, .Lfunc_end98-fcvtpu_sh_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtpu_dh_simd                  // -- Begin function fcvtpu_dh_simd
+	.p2align	2
+	.type	fcvtpu_dh_simd, at function
+fcvtpu_dh_simd:                         // @fcvtpu_dh_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtpu	d0, h0
+	ret
+.Lfunc_end99:
+	.size	fcvtpu_dh_simd, .Lfunc_end99-fcvtpu_dh_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtpu_ds_simd                  // -- Begin function fcvtpu_ds_simd
+	.p2align	2
+	.type	fcvtpu_ds_simd, at function
+fcvtpu_ds_simd:                         // @fcvtpu_ds_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtpu	d0, s0
+	ret
+.Lfunc_end100:
+	.size	fcvtpu_ds_simd, .Lfunc_end100-fcvtpu_ds_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtpu_sd_simd                  // -- Begin function fcvtpu_sd_simd
+	.p2align	2
+	.type	fcvtpu_sd_simd, at function
+fcvtpu_sd_simd:                         // @fcvtpu_sd_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtpu	s0, d0
+	ret
+.Lfunc_end101:
+	.size	fcvtpu_sd_simd, .Lfunc_end101-fcvtpu_sd_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtpu_ss_simd                  // -- Begin function fcvtpu_ss_simd
+	.p2align	2
+	.type	fcvtpu_ss_simd, at function
+fcvtpu_ss_simd:                         // @fcvtpu_ss_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtps	s0, s0
+	ret
+.Lfunc_end102:
+	.size	fcvtpu_ss_simd, .Lfunc_end102-fcvtpu_ss_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtpu_dd_simd                  // -- Begin function fcvtpu_dd_simd
+	.p2align	2
+	.type	fcvtpu_dd_simd, at function
+fcvtpu_dd_simd:                         // @fcvtpu_dd_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtps	d0, d0
+	ret
+.Lfunc_end103:
+	.size	fcvtpu_dd_simd, .Lfunc_end103-fcvtpu_dd_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtzs_sh_simd                  // -- Begin function fcvtzs_sh_simd
+	.p2align	2
+	.type	fcvtzs_sh_simd, at function
+fcvtzs_sh_simd:                         // @fcvtzs_sh_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzs	s0, h0
+	ret
+.Lfunc_end104:
+	.size	fcvtzs_sh_simd, .Lfunc_end104-fcvtzs_sh_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtzs_dh_simd                  // -- Begin function fcvtzs_dh_simd
+	.p2align	2
+	.type	fcvtzs_dh_simd, at function
+fcvtzs_dh_simd:                         // @fcvtzs_dh_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzs	d0, h0
+	ret
+.Lfunc_end105:
+	.size	fcvtzs_dh_simd, .Lfunc_end105-fcvtzs_dh_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtzs_ds_simd                  // -- Begin function fcvtzs_ds_simd
+	.p2align	2
+	.type	fcvtzs_ds_simd, at function
+fcvtzs_ds_simd:                         // @fcvtzs_ds_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzs	d0, s0
+	ret
+.Lfunc_end106:
+	.size	fcvtzs_ds_simd, .Lfunc_end106-fcvtzs_ds_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtzs_sd_simd                  // -- Begin function fcvtzs_sd_simd
+	.p2align	2
+	.type	fcvtzs_sd_simd, at function
+fcvtzs_sd_simd:                         // @fcvtzs_sd_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzs	s0, d0
+	ret
+.Lfunc_end107:
+	.size	fcvtzs_sd_simd, .Lfunc_end107-fcvtzs_sd_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtzs_ss_simd                  // -- Begin function fcvtzs_ss_simd
+	.p2align	2
+	.type	fcvtzs_ss_simd, at function
+fcvtzs_ss_simd:                         // @fcvtzs_ss_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzs	s0, s0
+	ret
+.Lfunc_end108:
+	.size	fcvtzs_ss_simd, .Lfunc_end108-fcvtzs_ss_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtzs_dd_simd                  // -- Begin function fcvtzs_dd_simd
+	.p2align	2
+	.type	fcvtzs_dd_simd, at function
+fcvtzs_dd_simd:                         // @fcvtzs_dd_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzs	d0, d0
+	ret
+.Lfunc_end109:
+	.size	fcvtzs_dd_simd, .Lfunc_end109-fcvtzs_dd_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtzu_sh_simd                  // -- Begin function fcvtzu_sh_simd
+	.p2align	2
+	.type	fcvtzu_sh_simd, at function
+fcvtzu_sh_simd:                         // @fcvtzu_sh_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzu	s0, h0
+	ret
+.Lfunc_end110:
+	.size	fcvtzu_sh_simd, .Lfunc_end110-fcvtzu_sh_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtzu_dh_simd                  // -- Begin function fcvtzu_dh_simd
+	.p2align	2
+	.type	fcvtzu_dh_simd, at function
+fcvtzu_dh_simd:                         // @fcvtzu_dh_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzu	d0, h0
+	ret
+.Lfunc_end111:
+	.size	fcvtzu_dh_simd, .Lfunc_end111-fcvtzu_dh_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtzu_ds_simd                  // -- Begin function fcvtzu_ds_simd
+	.p2align	2
+	.type	fcvtzu_ds_simd, at function
+fcvtzu_ds_simd:                         // @fcvtzu_ds_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzu	d0, s0
+	ret
+.Lfunc_end112:
+	.size	fcvtzu_ds_simd, .Lfunc_end112-fcvtzu_ds_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtzu_sd_simd                  // -- Begin function fcvtzu_sd_simd
+	.p2align	2
+	.type	fcvtzu_sd_simd, at function
+fcvtzu_sd_simd:                         // @fcvtzu_sd_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzu	s0, d0
+	ret
+.Lfunc_end113:
+	.size	fcvtzu_sd_simd, .Lfunc_end113-fcvtzu_sd_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtzu_ss_simd                  // -- Begin function fcvtzu_ss_simd
+	.p2align	2
+	.type	fcvtzu_ss_simd, at function
+fcvtzu_ss_simd:                         // @fcvtzu_ss_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzu	s0, s0
+	ret
+.Lfunc_end114:
+	.size	fcvtzu_ss_simd, .Lfunc_end114-fcvtzu_ss_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtzu_dd_simd                  // -- Begin function fcvtzu_dd_simd
+	.p2align	2
+	.type	fcvtzu_dd_simd, at function
+fcvtzu_dd_simd:                         // @fcvtzu_dd_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzu	d0, d0
+	ret
+.Lfunc_end115:
+	.size	fcvtzu_dd_simd, .Lfunc_end115-fcvtzu_dd_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtzs_scalar_to_vector_h       // -- Begin function fcvtzs_scalar_to_vector_h
+	.p2align	2
+	.type	fcvtzs_scalar_to_vector_h, at function
+fcvtzs_scalar_to_vector_h:              // @fcvtzs_scalar_to_vector_h
+	.cfi_startproc
+// %bb.0:
+	fcvtzs	d0, h0
+	ret
+.Lfunc_end116:
+	.size	fcvtzs_scalar_to_vector_h, .Lfunc_end116-fcvtzs_scalar_to_vector_h
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtzs_scalar_to_vector_s       // -- Begin function fcvtzs_scalar_to_vector_s
+	.p2align	2
+	.type	fcvtzs_scalar_to_vector_s, at function
+fcvtzs_scalar_to_vector_s:              // @fcvtzs_scalar_to_vector_s
+	.cfi_startproc
+// %bb.0:
+	fcvtzs	d0, s0
+	ret
+.Lfunc_end117:
+	.size	fcvtzs_scalar_to_vector_s, .Lfunc_end117-fcvtzs_scalar_to_vector_s
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtzs_scalar_to_vector_d       // -- Begin function fcvtzs_scalar_to_vector_d
+	.p2align	2
+	.type	fcvtzs_scalar_to_vector_d, at function
+fcvtzs_scalar_to_vector_d:              // @fcvtzs_scalar_to_vector_d
+	.cfi_startproc
+// %bb.0:
+	fcvtzs	d0, d0
+	ret
+.Lfunc_end118:
+	.size	fcvtzs_scalar_to_vector_d, .Lfunc_end118-fcvtzs_scalar_to_vector_d
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtzu_scalar_to_vector_h       // -- Begin function fcvtzu_scalar_to_vector_h
+	.p2align	2
+	.type	fcvtzu_scalar_to_vector_h, at function
+fcvtzu_scalar_to_vector_h:              // @fcvtzu_scalar_to_vector_h
+	.cfi_startproc
+// %bb.0:
+	fcvtzu	d0, h0
+	ret
+.Lfunc_end119:
+	.size	fcvtzu_scalar_to_vector_h, .Lfunc_end119-fcvtzu_scalar_to_vector_h
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtzu_scalar_to_vector_s       // -- Begin function fcvtzu_scalar_to_vector_s
+	.p2align	2
+	.type	fcvtzu_scalar_to_vector_s, at function
+fcvtzu_scalar_to_vector_s:              // @fcvtzu_scalar_to_vector_s
+	.cfi_startproc
+// %bb.0:
+	fcvtzu	d0, s0
+	ret
+.Lfunc_end120:
+	.size	fcvtzu_scalar_to_vector_s, .Lfunc_end120-fcvtzu_scalar_to_vector_s
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtzu_scalar_to_vector_d       // -- Begin function fcvtzu_scalar_to_vector_d
+	.p2align	2
+	.type	fcvtzu_scalar_to_vector_d, at function
+fcvtzu_scalar_to_vector_d:              // @fcvtzu_scalar_to_vector_d
+	.cfi_startproc
+// %bb.0:
+	fcvtzu	d0, d0
+	ret
+.Lfunc_end121:
+	.size	fcvtzu_scalar_to_vector_d, .Lfunc_end121-fcvtzu_scalar_to_vector_d
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtzs_scalar_to_vector_h_strict // -- Begin function fcvtzs_scalar_to_vector_h_strict
+	.p2align	2
+	.type	fcvtzs_scalar_to_vector_h_strict, at function
+fcvtzs_scalar_to_vector_h_strict:       // @fcvtzs_scalar_to_vector_h_strict
+	.cfi_startproc
+// %bb.0:
+	fcvtzs	d0, h0
+	ret
+.Lfunc_end122:
+	.size	fcvtzs_scalar_to_vector_h_strict, .Lfunc_end122-fcvtzs_scalar_to_vector_h_strict
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtzs_scalar_to_vector_s_strict // -- Begin function fcvtzs_scalar_to_vector_s_strict
+	.p2align	2
+	.type	fcvtzs_scalar_to_vector_s_strict, at function
+fcvtzs_scalar_to_vector_s_strict:       // @fcvtzs_scalar_to_vector_s_strict
+	.cfi_startproc
+// %bb.0:
+	fcvtzs	d0, s0
+	ret
+.Lfunc_end123:
+	.size	fcvtzs_scalar_to_vector_s_strict, .Lfunc_end123-fcvtzs_scalar_to_vector_s_strict
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtzu_scalar_to_vector_h_strict // -- Begin function fcvtzu_scalar_to_vector_h_strict
+	.p2align	2
+	.type	fcvtzu_scalar_to_vector_h_strict, at function
+fcvtzu_scalar_to_vector_h_strict:       // @fcvtzu_scalar_to_vector_h_strict
+	.cfi_startproc
+// %bb.0:
+	fcvtzu	d0, h0
+	ret
+.Lfunc_end124:
+	.size	fcvtzu_scalar_to_vector_h_strict, .Lfunc_end124-fcvtzu_scalar_to_vector_h_strict
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtzu_scalar_to_vector_s_strict // -- Begin function fcvtzu_scalar_to_vector_s_strict
+	.p2align	2
+	.type	fcvtzu_scalar_to_vector_s_strict, at function
+fcvtzu_scalar_to_vector_s_strict:       // @fcvtzu_scalar_to_vector_s_strict
+	.cfi_startproc
+// %bb.0:
+	fcvtzu	d0, s0
+	ret
+.Lfunc_end125:
+	.size	fcvtzu_scalar_to_vector_s_strict, .Lfunc_end125-fcvtzu_scalar_to_vector_s_strict
+	.cfi_endproc
+                                        // -- End function
+	.section	".note.GNU-stack","", at progbits
diff --git a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll
index b1b9fcf8a8b3c..8b8f23a049107 100644
--- a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll
@@ -4,7 +4,7 @@
 
 
 ;
-; Intriniscs
+; Intriniscs (bitcast)
 ;
 
 define float @fcvtas_1s1d_simd(double %A) nounwind {
@@ -607,3 +607,335 @@ define  float @fcvtzu_1s1s_simd(float %a) {
   %d = bitcast i32 %vcvtah_s32_f32 to float
   ret float %d
 }
+
+;
+; Intriniscs (scalar_to_vector)
+;
+
+define <1 x i64> @fcvtas_1d1s_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtas_1d1s_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas d0, s0
+; CHECK-NEXT:    ret
+  %i = call i64 @llvm.aarch64.neon.fcvtas.i64.f32(float %A)
+  %vec = insertelement <1 x i64> poison, i64 %i, i32 0
+  ret <1 x i64> %vec
+}
+
+
+define  <1 x i64> @fcvtas_1d1h_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtas_1d1h_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas d0, h0
+; CHECK-NEXT:    ret
+  %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtas.i64.f16(half %a)
+  %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f16, i32 0
+  ret <1 x i64> %vec
+}
+
+define  <1 x i64> @fcvtas_1d1d_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtas_1d1d_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas d0, d0
+; CHECK-NEXT:    ret
+  %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtas.i64.f64(double %a)
+  %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f64, i32 0
+  ret <1 x i64> %vec
+}
+
+
+
+define <1 x i64> @fcvtau_1d1s_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtau_1d1s_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtau d0, s0
+; CHECK-NEXT:    ret
+  %i = call i64 @llvm.aarch64.neon.fcvtau.i64.f32(float %A)
+  %vec = insertelement <1 x i64> poison, i64 %i, i32 0
+  ret <1 x i64> %vec
+}
+
+
+define  <1 x i64> @fcvtau_1d1h_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtau_1d1h_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtau d0, h0
+; CHECK-NEXT:    ret
+  %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtau.i64.f16(half %a)
+  %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f16, i32 0
+  ret <1 x i64> %vec
+}
+
+define  <1 x i64> @fcvtau_1d1d_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtau_1d1d_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtau d0, d0
+; CHECK-NEXT:    ret
+  %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtau.i64.f64(double %a)
+  %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f64, i32 0
+  ret <1 x i64> %vec
+}
+
+
+
+define <1 x i64> @fcvtms_1d1s_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtms_1d1s_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtms d0, s0
+; CHECK-NEXT:    ret
+  %i = call i64 @llvm.aarch64.neon.fcvtms.i64.f32(float %A)
+  %vec = insertelement <1 x i64> poison, i64 %i, i32 0
+  ret <1 x i64> %vec
+}
+
+
+define  <1 x i64> @fcvtms_1d1h_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtms_1d1h_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtms d0, h0
+; CHECK-NEXT:    ret
+  %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtms.i64.f16(half %a)
+  %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f16, i32 0
+  ret <1 x i64> %vec
+}
+
+define  <1 x i64> @fcvtms_1d1d_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtms_1d1d_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtms d0, d0
+; CHECK-NEXT:    ret
+  %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtms.i64.f64(double %a)
+  %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f64, i32 0
+  ret <1 x i64> %vec
+}
+
+
+
+define <1 x i64> @fcvtmu_1d1s_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtmu_1d1s_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtmu d0, s0
+; CHECK-NEXT:    ret
+  %i = call i64 @llvm.aarch64.neon.fcvtmu.i64.f32(float %A)
+  %vec = insertelement <1 x i64> poison, i64 %i, i32 0
+  ret <1 x i64> %vec
+}
+
+
+define  <1 x i64> @fcvtmu_1d1h_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtmu_1d1h_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtmu d0, h0
+; CHECK-NEXT:    ret
+  %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtmu.i64.f16(half %a)
+  %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f16, i32 0
+  ret <1 x i64> %vec
+}
+
+define  <1 x i64> @fcvtmu_1d1d_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtmu_1d1d_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtmu d0, d0
+; CHECK-NEXT:    ret
+  %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtmu.i64.f64(double %a)
+  %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f64, i32 0
+  ret <1 x i64> %vec
+}
+
+
+
+define <1 x i64> @fcvtns_1d1s_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtns_1d1s_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtns d0, s0
+; CHECK-NEXT:    ret
+  %i = call i64 @llvm.aarch64.neon.fcvtns.i64.f32(float %A)
+  %vec = insertelement <1 x i64> poison, i64 %i, i32 0
+  ret <1 x i64> %vec
+}
+
+
+define  <1 x i64> @fcvtns_1d1h_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtns_1d1h_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtns d0, h0
+; CHECK-NEXT:    ret
+  %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtns.i64.f16(half %a)
+  %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f16, i32 0
+  ret <1 x i64> %vec
+}
+
+define  <1 x i64> @fcvtns_1d1d_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtns_1d1d_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtns d0, d0
+; CHECK-NEXT:    ret
+  %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtns.i64.f64(double %a)
+  %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f64, i32 0
+  ret <1 x i64> %vec
+}
+
+
+
+define <1 x i64> @fcvtnu_1d1s_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtnu_1d1s_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtnu d0, s0
+; CHECK-NEXT:    ret
+  %i = call i64 @llvm.aarch64.neon.fcvtnu.i64.f32(float %A)
+  %vec = insertelement <1 x i64> poison, i64 %i, i32 0
+  ret <1 x i64> %vec
+}
+
+
+define  <1 x i64> @fcvtnu_1d1h_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtnu_1d1h_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtnu d0, h0
+; CHECK-NEXT:    ret
+  %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtnu.i64.f16(half %a)
+  %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f16, i32 0
+  ret <1 x i64> %vec
+}
+
+define  <1 x i64> @fcvtnu_1d1d_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtnu_1d1d_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtnu d0, d0
+; CHECK-NEXT:    ret
+  %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtnu.i64.f64(double %a)
+  %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f64, i32 0
+  ret <1 x i64> %vec
+}
+
+
+
+define <1 x i64> @fcvtps_1d1s_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtps_1d1s_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtps d0, s0
+; CHECK-NEXT:    ret
+  %i = call i64 @llvm.aarch64.neon.fcvtps.i64.f32(float %A)
+  %vec = insertelement <1 x i64> poison, i64 %i, i32 0
+  ret <1 x i64> %vec
+}
+
+
+define  <1 x i64> @fcvtps_1d1h_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtps_1d1h_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtps d0, h0
+; CHECK-NEXT:    ret
+  %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtps.i64.f16(half %a)
+  %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f16, i32 0
+  ret <1 x i64> %vec
+}
+
+define  <1 x i64> @fcvtps_1d1d_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtps_1d1d_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtps d0, d0
+; CHECK-NEXT:    ret
+  %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtps.i64.f64(double %a)
+  %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f64, i32 0
+  ret <1 x i64> %vec
+}
+
+
+
+define <1 x i64> @fcvtpu_1d1s_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtpu_1d1s_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtpu d0, s0
+; CHECK-NEXT:    ret
+  %i = call i64 @llvm.aarch64.neon.fcvtpu.i64.f32(float %A)
+  %vec = insertelement <1 x i64> poison, i64 %i, i32 0
+  ret <1 x i64> %vec
+}
+
+
+define  <1 x i64> @fcvtpu_1d1h_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtpu_1d1h_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtpu d0, h0
+; CHECK-NEXT:    ret
+  %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtpu.i64.f16(half %a)
+  %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f16, i32 0
+  ret <1 x i64> %vec
+}
+
+define  <1 x i64> @fcvtpu_1d1d_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtpu_1d1d_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtpu d0, d0
+; CHECK-NEXT:    ret
+  %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtpu.i64.f64(double %a)
+  %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f64, i32 0
+  ret <1 x i64> %vec
+}
+
+
+
+define <1 x i64> @fcvtzs_1d1s_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtzs_1d1s_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, s0
+; CHECK-NEXT:    ret
+  %i = call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %A)
+  %vec = insertelement <1 x i64> poison, i64 %i, i32 0
+  ret <1 x i64> %vec
+}
+
+
+define  <1 x i64> @fcvtzs_1d1h_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtzs_1d1h_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, h0
+; CHECK-NEXT:    ret
+  %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f16(half %a)
+  %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f16, i32 0
+  ret <1 x i64> %vec
+}
+
+define  <1 x i64> @fcvtzs_1d1d_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtzs_1d1d_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, d0
+; CHECK-NEXT:    ret
+  %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f64(double %a)
+  %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f64, i32 0
+  ret <1 x i64> %vec
+}
+
+
+
+define <1 x i64> @fcvtzu_1d1s_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtzu_1d1s_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, s0
+; CHECK-NEXT:    ret
+  %i = call i64 @llvm.aarch64.neon.fcvtzu.i64.f32(float %A)
+  %vec = insertelement <1 x i64> poison, i64 %i, i32 0
+  ret <1 x i64> %vec
+}
+
+
+define  <1 x i64> @fcvtzu_1d1h_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtzu_1d1h_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, h0
+; CHECK-NEXT:    ret
+  %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtzu.i64.f16(half %a)
+  %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f16, i32 0
+  ret <1 x i64> %vec
+}
+
+define  <1 x i64> @fcvtzu_1d1d_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtzu_1d1d_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, d0
+; CHECK-NEXT:    ret
+  %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtzu.i64.f64(double %a)
+  %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f64, i32 0
+  ret <1 x i64> %vec
+}
diff --git a/llvm/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll b/llvm/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll
index b580c4921fb66..35f62e52ffd76 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll
@@ -21,8 +21,7 @@ define double @bar(ptr %iVals, ptr %fVals, ptr %dVals) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ldr d0, [x2, #128]
 ; CHECK-NEXT:    frinti d0, d0
-; CHECK-NEXT:    fcvtzs x8, d0
-; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    fcvtzs d0, d0
 ; CHECK-NEXT:    sri d0, d0, #1
 ; CHECK-NEXT:    scvtf.2d v0, v0, #1
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
index d8f370884c84a..c2f39fb14ee24 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
@@ -980,18 +980,11 @@ define <1 x double> @test_bitcasti64tov1f64(i64 %in) {
 }
 
 define <1 x i64> @test_bitcastv8i8tov1f64(<8 x i8> %a) #0 {
-; CHECK-SD-LABEL: test_bitcastv8i8tov1f64:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    neg v0.8b, v0.8b
-; CHECK-SD-NEXT:    fcvtzs x8, d0
-; CHECK-SD-NEXT:    fmov d0, x8
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: test_bitcastv8i8tov1f64:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    neg v0.8b, v0.8b
-; CHECK-GI-NEXT:    fcvtzs d0, d0
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: test_bitcastv8i8tov1f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg v0.8b, v0.8b
+; CHECK-NEXT:    fcvtzs d0, d0
+; CHECK-NEXT:    ret
   %sub.i = sub <8 x i8> zeroinitializer, %a
   %1 = bitcast <8 x i8> %sub.i to <1 x double>
   %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
@@ -999,18 +992,11 @@ define <1 x i64> @test_bitcastv8i8tov1f64(<8 x i8> %a) #0 {
 }
 
 define <1 x i64> @test_bitcastv4i16tov1f64(<4 x i16> %a) #0 {
-; CHECK-SD-LABEL: test_bitcastv4i16tov1f64:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    neg v0.4h, v0.4h
-; CHECK-SD-NEXT:    fcvtzs x8, d0
-; CHECK-SD-NEXT:    fmov d0, x8
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: test_bitcastv4i16tov1f64:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    neg v0.4h, v0.4h
-; CHECK-GI-NEXT:    fcvtzs d0, d0
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: test_bitcastv4i16tov1f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg v0.4h, v0.4h
+; CHECK-NEXT:    fcvtzs d0, d0
+; CHECK-NEXT:    ret
   %sub.i = sub <4 x i16> zeroinitializer, %a
   %1 = bitcast <4 x i16> %sub.i to <1 x double>
   %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
@@ -1018,18 +1004,11 @@ define <1 x i64> @test_bitcastv4i16tov1f64(<4 x i16> %a) #0 {
 }
 
 define <1 x i64> @test_bitcastv2i32tov1f64(<2 x i32> %a) #0 {
-; CHECK-SD-LABEL: test_bitcastv2i32tov1f64:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    neg v0.2s, v0.2s
-; CHECK-SD-NEXT:    fcvtzs x8, d0
-; CHECK-SD-NEXT:    fmov d0, x8
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: test_bitcastv2i32tov1f64:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    neg v0.2s, v0.2s
-; CHECK-GI-NEXT:    fcvtzs d0, d0
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: test_bitcastv2i32tov1f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg v0.2s, v0.2s
+; CHECK-NEXT:    fcvtzs d0, d0
+; CHECK-NEXT:    ret
   %sub.i = sub <2 x i32> zeroinitializer, %a
   %1 = bitcast <2 x i32> %sub.i to <1 x double>
   %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
@@ -1040,8 +1019,7 @@ define <1 x i64> @test_bitcastv1i64tov1f64(<1 x i64> %a) #0 {
 ; CHECK-SD-LABEL: test_bitcastv1i64tov1f64:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    neg d0, d0
-; CHECK-SD-NEXT:    fcvtzs x8, d0
-; CHECK-SD-NEXT:    fmov d0, x8
+; CHECK-SD-NEXT:    fcvtzs d0, d0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_bitcastv1i64tov1f64:
@@ -1061,8 +1039,7 @@ define <1 x i64> @test_bitcastv2f32tov1f64(<2 x float> %a) #0 {
 ; CHECK-LABEL: test_bitcastv2f32tov1f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fneg v0.2s, v0.2s
-; CHECK-NEXT:    fcvtzs x8, d0
-; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    fcvtzs d0, d0
 ; CHECK-NEXT:    ret
   %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %a
   %1 = bitcast <2 x float> %sub.i to <1 x double>
diff --git a/llvm/test/CodeGen/AArch64/arm64-vcvt.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt.ll
index 1e0cfa0201263..dcb3b9b24627b 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vcvt.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vcvt.ll
@@ -359,16 +359,10 @@ define <2 x i64> @fcvtzs_2d(<2 x double> %A) nounwind {
 
 ; FIXME: Generate "fcvtzs d0, d0"?
 define <1 x i64> @fcvtzs_1d(<1 x double> %A) nounwind {
-; CHECK-SD-LABEL: fcvtzs_1d:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fcvtzs x8, d0
-; CHECK-SD-NEXT:    fmov d0, x8
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: fcvtzs_1d:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    fcvtzs d0, d0
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: fcvtzs_1d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, d0
+; CHECK-NEXT:    ret
 	%tmp3 = fptosi <1 x double> %A to <1 x i64>
 	ret <1 x i64> %tmp3
 }
@@ -443,16 +437,10 @@ define <2 x i64> @fcvtzu_2d(<2 x double> %A) nounwind {
 
 ; FIXME: Generate "fcvtzu d0, d0"?
 define <1 x i64> @fcvtzu_1d(<1 x double> %A) nounwind {
-; CHECK-SD-LABEL: fcvtzu_1d:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fcvtzu x8, d0
-; CHECK-SD-NEXT:    fmov d0, x8
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: fcvtzu_1d:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    fcvtzu d0, d0
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: fcvtzu_1d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, d0
+; CHECK-NEXT:    ret
 	%tmp3 = fptoui <1 x double> %A to <1 x i64>
 	ret <1 x i64> %tmp3
 }
diff --git a/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll b/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll
index c3da22757f1d2..0b05e00a1b0db 100644
--- a/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll
@@ -717,8 +717,7 @@ define <1 x i32> @fptoui_v1i32_v1f64(<1 x double> %x) #0 {
 define <1 x i64> @fptosi_v1i64_v1f64(<1 x double> %x) #0 {
 ; CHECK-LABEL: fptosi_v1i64_v1f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs x8, d0
-; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    fcvtzs d0, d0
 ; CHECK-NEXT:    ret
   %val = call <1 x i64> @llvm.experimental.constrained.fptosi.v1i64.v1f64(<1 x double> %x, metadata !"fpexcept.strict") #0
   ret <1 x i64> %val
@@ -727,8 +726,7 @@ define <1 x i64> @fptosi_v1i64_v1f64(<1 x double> %x) #0 {
 define <1 x i64> @fptoui_v1i64_v1f64(<1 x double> %x) #0 {
 ; CHECK-LABEL: fptoui_v1i64_v1f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu x8, d0
-; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    fcvtzu d0, d0
 ; CHECK-NEXT:    ret
   %val = call <1 x i64> @llvm.experimental.constrained.fptoui.v1i64.v1f64(<1 x double> %x, metadata !"fpexcept.strict") #0
   ret <1 x i64> %val
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-to-int.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-to-int.ll
index c8f6d98f5a63f..312d158cfb2b6 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-to-int.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-to-int.ll
@@ -815,8 +815,7 @@ define void @fcvtzu_v32f64_v32i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
 define <1 x i64> @fcvtzu_v1f64_v1i64(<1 x double> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: fcvtzu_v1f64_v1i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu x8, d0
-; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    fcvtzu d0, d0
 ; CHECK-NEXT:    ret
   %res = fptoui <1 x double> %op1 to <1 x i64>
   ret <1 x i64> %res
@@ -1710,8 +1709,7 @@ define void @fcvtzs_v32f64_v32i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
 define <1 x i64> @fcvtzs_v1f64_v1i64(<1 x double> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: fcvtzs_v1f64_v1i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs x8, d0
-; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    fcvtzs d0, d0
 ; CHECK-NEXT:    ret
   %res = fptosi <1 x double> %op1 to <1 x i64>
   ret <1 x i64> %res

>From 0927e44a9c2247935ae10e4e368a4be6f8aae8c0 Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Thu, 18 Dec 2025 13:05:46 +0000
Subject: [PATCH 2/8] FIx

---
 .../CodeGen/AArch64/arm64-cvt-simd-fptoi.s    | 1515 -----------------
 .../AArch64/arm64-cvt-simd-intrinsics.ll      |    2 +-
 2 files changed, 1 insertion(+), 1516 deletions(-)
 delete mode 100644 llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.s

diff --git a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.s b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.s
deleted file mode 100644
index 0850b306e8c79..0000000000000
--- a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.s
+++ /dev/null
@@ -1,1515 +0,0 @@
-	.file	"arm64-cvt-simd-fptoi.ll"
-	.text
-	.globl	test_fptosi_f16_i32_simd        // -- Begin function test_fptosi_f16_i32_simd
-	.p2align	2
-	.type	test_fptosi_f16_i32_simd, at function
-test_fptosi_f16_i32_simd:               // @test_fptosi_f16_i32_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzs	s0, h0
-	ret
-.Lfunc_end0:
-	.size	test_fptosi_f16_i32_simd, .Lfunc_end0-test_fptosi_f16_i32_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	test_fptosi_f16_i64_simd        // -- Begin function test_fptosi_f16_i64_simd
-	.p2align	2
-	.type	test_fptosi_f16_i64_simd, at function
-test_fptosi_f16_i64_simd:               // @test_fptosi_f16_i64_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzs	d0, h0
-	ret
-.Lfunc_end1:
-	.size	test_fptosi_f16_i64_simd, .Lfunc_end1-test_fptosi_f16_i64_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	test_fptosi_f64_i32_simd        // -- Begin function test_fptosi_f64_i32_simd
-	.p2align	2
-	.type	test_fptosi_f64_i32_simd, at function
-test_fptosi_f64_i32_simd:               // @test_fptosi_f64_i32_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzs	s0, d0
-	ret
-.Lfunc_end2:
-	.size	test_fptosi_f64_i32_simd, .Lfunc_end2-test_fptosi_f64_i32_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	test_fptosi_f32_i64_simd        // -- Begin function test_fptosi_f32_i64_simd
-	.p2align	2
-	.type	test_fptosi_f32_i64_simd, at function
-test_fptosi_f32_i64_simd:               // @test_fptosi_f32_i64_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzs	d0, s0
-	ret
-.Lfunc_end3:
-	.size	test_fptosi_f32_i64_simd, .Lfunc_end3-test_fptosi_f32_i64_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	test_fptosi_f64_i64_simd        // -- Begin function test_fptosi_f64_i64_simd
-	.p2align	2
-	.type	test_fptosi_f64_i64_simd, at function
-test_fptosi_f64_i64_simd:               // @test_fptosi_f64_i64_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzs	d0, d0
-	ret
-.Lfunc_end4:
-	.size	test_fptosi_f64_i64_simd, .Lfunc_end4-test_fptosi_f64_i64_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	test_fptosi_f32_i32_simd        // -- Begin function test_fptosi_f32_i32_simd
-	.p2align	2
-	.type	test_fptosi_f32_i32_simd, at function
-test_fptosi_f32_i32_simd:               // @test_fptosi_f32_i32_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzs	s0, s0
-	ret
-.Lfunc_end5:
-	.size	test_fptosi_f32_i32_simd, .Lfunc_end5-test_fptosi_f32_i32_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	test_fptoui_f16_i32_simd        // -- Begin function test_fptoui_f16_i32_simd
-	.p2align	2
-	.type	test_fptoui_f16_i32_simd, at function
-test_fptoui_f16_i32_simd:               // @test_fptoui_f16_i32_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzu	s0, h0
-	ret
-.Lfunc_end6:
-	.size	test_fptoui_f16_i32_simd, .Lfunc_end6-test_fptoui_f16_i32_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	test_fptoui_f16_i64_simd        // -- Begin function test_fptoui_f16_i64_simd
-	.p2align	2
-	.type	test_fptoui_f16_i64_simd, at function
-test_fptoui_f16_i64_simd:               // @test_fptoui_f16_i64_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzu	d0, h0
-	ret
-.Lfunc_end7:
-	.size	test_fptoui_f16_i64_simd, .Lfunc_end7-test_fptoui_f16_i64_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	test_fptoui_f64_i32_simd        // -- Begin function test_fptoui_f64_i32_simd
-	.p2align	2
-	.type	test_fptoui_f64_i32_simd, at function
-test_fptoui_f64_i32_simd:               // @test_fptoui_f64_i32_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzu	s0, d0
-	ret
-.Lfunc_end8:
-	.size	test_fptoui_f64_i32_simd, .Lfunc_end8-test_fptoui_f64_i32_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	test_fptoui_f32_i64_simd        // -- Begin function test_fptoui_f32_i64_simd
-	.p2align	2
-	.type	test_fptoui_f32_i64_simd, at function
-test_fptoui_f32_i64_simd:               // @test_fptoui_f32_i64_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzu	d0, s0
-	ret
-.Lfunc_end9:
-	.size	test_fptoui_f32_i64_simd, .Lfunc_end9-test_fptoui_f32_i64_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	test_fptoui_f64_i64_simd        // -- Begin function test_fptoui_f64_i64_simd
-	.p2align	2
-	.type	test_fptoui_f64_i64_simd, at function
-test_fptoui_f64_i64_simd:               // @test_fptoui_f64_i64_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzu	d0, d0
-	ret
-.Lfunc_end10:
-	.size	test_fptoui_f64_i64_simd, .Lfunc_end10-test_fptoui_f64_i64_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	test_fptoui_f32_i32_simd        // -- Begin function test_fptoui_f32_i32_simd
-	.p2align	2
-	.type	test_fptoui_f32_i32_simd, at function
-test_fptoui_f32_i32_simd:               // @test_fptoui_f32_i32_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzu	s0, s0
-	ret
-.Lfunc_end11:
-	.size	test_fptoui_f32_i32_simd, .Lfunc_end11-test_fptoui_f32_i32_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fptosi_i32_f16_simd             // -- Begin function fptosi_i32_f16_simd
-	.p2align	2
-	.type	fptosi_i32_f16_simd, at function
-fptosi_i32_f16_simd:                    // @fptosi_i32_f16_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzs	s0, h0
-	ret
-.Lfunc_end12:
-	.size	fptosi_i32_f16_simd, .Lfunc_end12-fptosi_i32_f16_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fptosi_i64_f16_simd             // -- Begin function fptosi_i64_f16_simd
-	.p2align	2
-	.type	fptosi_i64_f16_simd, at function
-fptosi_i64_f16_simd:                    // @fptosi_i64_f16_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzs	d0, h0
-	ret
-.Lfunc_end13:
-	.size	fptosi_i64_f16_simd, .Lfunc_end13-fptosi_i64_f16_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fptosi_i64_f32_simd             // -- Begin function fptosi_i64_f32_simd
-	.p2align	2
-	.type	fptosi_i64_f32_simd, at function
-fptosi_i64_f32_simd:                    // @fptosi_i64_f32_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzs	d0, s0
-	ret
-.Lfunc_end14:
-	.size	fptosi_i64_f32_simd, .Lfunc_end14-fptosi_i64_f32_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fptosi_i32_f64_simd             // -- Begin function fptosi_i32_f64_simd
-	.p2align	2
-	.type	fptosi_i32_f64_simd, at function
-fptosi_i32_f64_simd:                    // @fptosi_i32_f64_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzs	s0, d0
-	ret
-.Lfunc_end15:
-	.size	fptosi_i32_f64_simd, .Lfunc_end15-fptosi_i32_f64_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fptosi_i64_f64_simd             // -- Begin function fptosi_i64_f64_simd
-	.p2align	2
-	.type	fptosi_i64_f64_simd, at function
-fptosi_i64_f64_simd:                    // @fptosi_i64_f64_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzs	d0, d0
-	ret
-.Lfunc_end16:
-	.size	fptosi_i64_f64_simd, .Lfunc_end16-fptosi_i64_f64_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fptosi_i32_f32_simd             // -- Begin function fptosi_i32_f32_simd
-	.p2align	2
-	.type	fptosi_i32_f32_simd, at function
-fptosi_i32_f32_simd:                    // @fptosi_i32_f32_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzs	s0, s0
-	ret
-.Lfunc_end17:
-	.size	fptosi_i32_f32_simd, .Lfunc_end17-fptosi_i32_f32_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fptoui_i32_f16_simd             // -- Begin function fptoui_i32_f16_simd
-	.p2align	2
-	.type	fptoui_i32_f16_simd, at function
-fptoui_i32_f16_simd:                    // @fptoui_i32_f16_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzu	s0, h0
-	ret
-.Lfunc_end18:
-	.size	fptoui_i32_f16_simd, .Lfunc_end18-fptoui_i32_f16_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fptoui_i64_f16_simd             // -- Begin function fptoui_i64_f16_simd
-	.p2align	2
-	.type	fptoui_i64_f16_simd, at function
-fptoui_i64_f16_simd:                    // @fptoui_i64_f16_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzu	d0, h0
-	ret
-.Lfunc_end19:
-	.size	fptoui_i64_f16_simd, .Lfunc_end19-fptoui_i64_f16_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fptoui_i64_f32_simd             // -- Begin function fptoui_i64_f32_simd
-	.p2align	2
-	.type	fptoui_i64_f32_simd, at function
-fptoui_i64_f32_simd:                    // @fptoui_i64_f32_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzu	d0, s0
-	ret
-.Lfunc_end20:
-	.size	fptoui_i64_f32_simd, .Lfunc_end20-fptoui_i64_f32_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fptoui_i32_f64_simd             // -- Begin function fptoui_i32_f64_simd
-	.p2align	2
-	.type	fptoui_i32_f64_simd, at function
-fptoui_i32_f64_simd:                    // @fptoui_i32_f64_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzu	s0, d0
-	ret
-.Lfunc_end21:
-	.size	fptoui_i32_f64_simd, .Lfunc_end21-fptoui_i32_f64_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fptoui_i64_f64_simd             // -- Begin function fptoui_i64_f64_simd
-	.p2align	2
-	.type	fptoui_i64_f64_simd, at function
-fptoui_i64_f64_simd:                    // @fptoui_i64_f64_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzu	d0, d0
-	ret
-.Lfunc_end22:
-	.size	fptoui_i64_f64_simd, .Lfunc_end22-fptoui_i64_f64_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fptoui_i32_f32_simd             // -- Begin function fptoui_i32_f32_simd
-	.p2align	2
-	.type	fptoui_i32_f32_simd, at function
-fptoui_i32_f32_simd:                    // @fptoui_i32_f32_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzu	s0, s0
-	ret
-.Lfunc_end23:
-	.size	fptoui_i32_f32_simd, .Lfunc_end23-fptoui_i32_f32_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtas_ds_round_simd            // -- Begin function fcvtas_ds_round_simd
-	.p2align	2
-	.type	fcvtas_ds_round_simd, at function
-fcvtas_ds_round_simd:                   // @fcvtas_ds_round_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtas	d0, s0
-	ret
-.Lfunc_end24:
-	.size	fcvtas_ds_round_simd, .Lfunc_end24-fcvtas_ds_round_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtas_sd_round_simd            // -- Begin function fcvtas_sd_round_simd
-	.p2align	2
-	.type	fcvtas_sd_round_simd, at function
-fcvtas_sd_round_simd:                   // @fcvtas_sd_round_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtas	s0, d0
-	ret
-.Lfunc_end25:
-	.size	fcvtas_sd_round_simd, .Lfunc_end25-fcvtas_sd_round_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtas_ss_round_simd            // -- Begin function fcvtas_ss_round_simd
-	.p2align	2
-	.type	fcvtas_ss_round_simd, at function
-fcvtas_ss_round_simd:                   // @fcvtas_ss_round_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtas	s0, s0
-	ret
-.Lfunc_end26:
-	.size	fcvtas_ss_round_simd, .Lfunc_end26-fcvtas_ss_round_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtas_dd_round_simd            // -- Begin function fcvtas_dd_round_simd
-	.p2align	2
-	.type	fcvtas_dd_round_simd, at function
-fcvtas_dd_round_simd:                   // @fcvtas_dd_round_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtas	d0, d0
-	ret
-.Lfunc_end27:
-	.size	fcvtas_dd_round_simd, .Lfunc_end27-fcvtas_dd_round_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtau_ds_round_simd            // -- Begin function fcvtau_ds_round_simd
-	.p2align	2
-	.type	fcvtau_ds_round_simd, at function
-fcvtau_ds_round_simd:                   // @fcvtau_ds_round_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtau	d0, s0
-	ret
-.Lfunc_end28:
-	.size	fcvtau_ds_round_simd, .Lfunc_end28-fcvtau_ds_round_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtau_sd_round_simd            // -- Begin function fcvtau_sd_round_simd
-	.p2align	2
-	.type	fcvtau_sd_round_simd, at function
-fcvtau_sd_round_simd:                   // @fcvtau_sd_round_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtau	s0, d0
-	ret
-.Lfunc_end29:
-	.size	fcvtau_sd_round_simd, .Lfunc_end29-fcvtau_sd_round_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtau_ss_round_simd            // -- Begin function fcvtau_ss_round_simd
-	.p2align	2
-	.type	fcvtau_ss_round_simd, at function
-fcvtau_ss_round_simd:                   // @fcvtau_ss_round_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtas	s0, s0
-	ret
-.Lfunc_end30:
-	.size	fcvtau_ss_round_simd, .Lfunc_end30-fcvtau_ss_round_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtau_dd_round_simd            // -- Begin function fcvtau_dd_round_simd
-	.p2align	2
-	.type	fcvtau_dd_round_simd, at function
-fcvtau_dd_round_simd:                   // @fcvtau_dd_round_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtas	d0, d0
-	ret
-.Lfunc_end31:
-	.size	fcvtau_dd_round_simd, .Lfunc_end31-fcvtau_dd_round_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtms_ds_round_simd            // -- Begin function fcvtms_ds_round_simd
-	.p2align	2
-	.type	fcvtms_ds_round_simd, at function
-fcvtms_ds_round_simd:                   // @fcvtms_ds_round_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtms	d0, s0
-	ret
-.Lfunc_end32:
-	.size	fcvtms_ds_round_simd, .Lfunc_end32-fcvtms_ds_round_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtms_sd_round_simd            // -- Begin function fcvtms_sd_round_simd
-	.p2align	2
-	.type	fcvtms_sd_round_simd, at function
-fcvtms_sd_round_simd:                   // @fcvtms_sd_round_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtms	s0, d0
-	ret
-.Lfunc_end33:
-	.size	fcvtms_sd_round_simd, .Lfunc_end33-fcvtms_sd_round_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtms_ss_round_simd            // -- Begin function fcvtms_ss_round_simd
-	.p2align	2
-	.type	fcvtms_ss_round_simd, at function
-fcvtms_ss_round_simd:                   // @fcvtms_ss_round_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtms	s0, s0
-	ret
-.Lfunc_end34:
-	.size	fcvtms_ss_round_simd, .Lfunc_end34-fcvtms_ss_round_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtms_dd_round_simd            // -- Begin function fcvtms_dd_round_simd
-	.p2align	2
-	.type	fcvtms_dd_round_simd, at function
-fcvtms_dd_round_simd:                   // @fcvtms_dd_round_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtms	d0, d0
-	ret
-.Lfunc_end35:
-	.size	fcvtms_dd_round_simd, .Lfunc_end35-fcvtms_dd_round_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtmu_ds_round_simd            // -- Begin function fcvtmu_ds_round_simd
-	.p2align	2
-	.type	fcvtmu_ds_round_simd, at function
-fcvtmu_ds_round_simd:                   // @fcvtmu_ds_round_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtmu	d0, s0
-	ret
-.Lfunc_end36:
-	.size	fcvtmu_ds_round_simd, .Lfunc_end36-fcvtmu_ds_round_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtmu_sd_round_simd            // -- Begin function fcvtmu_sd_round_simd
-	.p2align	2
-	.type	fcvtmu_sd_round_simd, at function
-fcvtmu_sd_round_simd:                   // @fcvtmu_sd_round_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtmu	s0, d0
-	ret
-.Lfunc_end37:
-	.size	fcvtmu_sd_round_simd, .Lfunc_end37-fcvtmu_sd_round_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtmu_ss_round_simd            // -- Begin function fcvtmu_ss_round_simd
-	.p2align	2
-	.type	fcvtmu_ss_round_simd, at function
-fcvtmu_ss_round_simd:                   // @fcvtmu_ss_round_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtms	s0, s0
-	ret
-.Lfunc_end38:
-	.size	fcvtmu_ss_round_simd, .Lfunc_end38-fcvtmu_ss_round_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtmu_dd_round_simd            // -- Begin function fcvtmu_dd_round_simd
-	.p2align	2
-	.type	fcvtmu_dd_round_simd, at function
-fcvtmu_dd_round_simd:                   // @fcvtmu_dd_round_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtms	d0, d0
-	ret
-.Lfunc_end39:
-	.size	fcvtmu_dd_round_simd, .Lfunc_end39-fcvtmu_dd_round_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtps_ds_round_simd            // -- Begin function fcvtps_ds_round_simd
-	.p2align	2
-	.type	fcvtps_ds_round_simd, at function
-fcvtps_ds_round_simd:                   // @fcvtps_ds_round_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtps	d0, s0
-	ret
-.Lfunc_end40:
-	.size	fcvtps_ds_round_simd, .Lfunc_end40-fcvtps_ds_round_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtps_sd_round_simd            // -- Begin function fcvtps_sd_round_simd
-	.p2align	2
-	.type	fcvtps_sd_round_simd, at function
-fcvtps_sd_round_simd:                   // @fcvtps_sd_round_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtps	s0, d0
-	ret
-.Lfunc_end41:
-	.size	fcvtps_sd_round_simd, .Lfunc_end41-fcvtps_sd_round_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtps_ss_round_simd            // -- Begin function fcvtps_ss_round_simd
-	.p2align	2
-	.type	fcvtps_ss_round_simd, at function
-fcvtps_ss_round_simd:                   // @fcvtps_ss_round_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtps	s0, s0
-	ret
-.Lfunc_end42:
-	.size	fcvtps_ss_round_simd, .Lfunc_end42-fcvtps_ss_round_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtps_dd_round_simd            // -- Begin function fcvtps_dd_round_simd
-	.p2align	2
-	.type	fcvtps_dd_round_simd, at function
-fcvtps_dd_round_simd:                   // @fcvtps_dd_round_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtps	d0, d0
-	ret
-.Lfunc_end43:
-	.size	fcvtps_dd_round_simd, .Lfunc_end43-fcvtps_dd_round_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtpu_ds_round_simd            // -- Begin function fcvtpu_ds_round_simd
-	.p2align	2
-	.type	fcvtpu_ds_round_simd, at function
-fcvtpu_ds_round_simd:                   // @fcvtpu_ds_round_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtpu	d0, s0
-	ret
-.Lfunc_end44:
-	.size	fcvtpu_ds_round_simd, .Lfunc_end44-fcvtpu_ds_round_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtpu_sd_round_simd            // -- Begin function fcvtpu_sd_round_simd
-	.p2align	2
-	.type	fcvtpu_sd_round_simd, at function
-fcvtpu_sd_round_simd:                   // @fcvtpu_sd_round_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtpu	s0, d0
-	ret
-.Lfunc_end45:
-	.size	fcvtpu_sd_round_simd, .Lfunc_end45-fcvtpu_sd_round_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtpu_ss_round_simd            // -- Begin function fcvtpu_ss_round_simd
-	.p2align	2
-	.type	fcvtpu_ss_round_simd, at function
-fcvtpu_ss_round_simd:                   // @fcvtpu_ss_round_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtps	s0, s0
-	ret
-.Lfunc_end46:
-	.size	fcvtpu_ss_round_simd, .Lfunc_end46-fcvtpu_ss_round_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtpu_dd_round_simd            // -- Begin function fcvtpu_dd_round_simd
-	.p2align	2
-	.type	fcvtpu_dd_round_simd, at function
-fcvtpu_dd_round_simd:                   // @fcvtpu_dd_round_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtps	d0, d0
-	ret
-.Lfunc_end47:
-	.size	fcvtpu_dd_round_simd, .Lfunc_end47-fcvtpu_dd_round_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtzs_ds_round_simd            // -- Begin function fcvtzs_ds_round_simd
-	.p2align	2
-	.type	fcvtzs_ds_round_simd, at function
-fcvtzs_ds_round_simd:                   // @fcvtzs_ds_round_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzs	d0, s0
-	ret
-.Lfunc_end48:
-	.size	fcvtzs_ds_round_simd, .Lfunc_end48-fcvtzs_ds_round_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtzs_sd_round_simd            // -- Begin function fcvtzs_sd_round_simd
-	.p2align	2
-	.type	fcvtzs_sd_round_simd, at function
-fcvtzs_sd_round_simd:                   // @fcvtzs_sd_round_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzs	s0, d0
-	ret
-.Lfunc_end49:
-	.size	fcvtzs_sd_round_simd, .Lfunc_end49-fcvtzs_sd_round_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtzs_ss_round_simd            // -- Begin function fcvtzs_ss_round_simd
-	.p2align	2
-	.type	fcvtzs_ss_round_simd, at function
-fcvtzs_ss_round_simd:                   // @fcvtzs_ss_round_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzs	s0, s0
-	ret
-.Lfunc_end50:
-	.size	fcvtzs_ss_round_simd, .Lfunc_end50-fcvtzs_ss_round_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtzs_dd_round_simd            // -- Begin function fcvtzs_dd_round_simd
-	.p2align	2
-	.type	fcvtzs_dd_round_simd, at function
-fcvtzs_dd_round_simd:                   // @fcvtzs_dd_round_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzs	d0, d0
-	ret
-.Lfunc_end51:
-	.size	fcvtzs_dd_round_simd, .Lfunc_end51-fcvtzs_dd_round_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtzu_ds_round_simd            // -- Begin function fcvtzu_ds_round_simd
-	.p2align	2
-	.type	fcvtzu_ds_round_simd, at function
-fcvtzu_ds_round_simd:                   // @fcvtzu_ds_round_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzu	d0, s0
-	ret
-.Lfunc_end52:
-	.size	fcvtzu_ds_round_simd, .Lfunc_end52-fcvtzu_ds_round_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtzu_sd_round_simd            // -- Begin function fcvtzu_sd_round_simd
-	.p2align	2
-	.type	fcvtzu_sd_round_simd, at function
-fcvtzu_sd_round_simd:                   // @fcvtzu_sd_round_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzu	s0, d0
-	ret
-.Lfunc_end53:
-	.size	fcvtzu_sd_round_simd, .Lfunc_end53-fcvtzu_sd_round_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtzu_ss_round_simd            // -- Begin function fcvtzu_ss_round_simd
-	.p2align	2
-	.type	fcvtzu_ss_round_simd, at function
-fcvtzu_ss_round_simd:                   // @fcvtzu_ss_round_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzs	s0, s0
-	ret
-.Lfunc_end54:
-	.size	fcvtzu_ss_round_simd, .Lfunc_end54-fcvtzu_ss_round_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtzu_dd_round_simd            // -- Begin function fcvtzu_dd_round_simd
-	.p2align	2
-	.type	fcvtzu_dd_round_simd, at function
-fcvtzu_dd_round_simd:                   // @fcvtzu_dd_round_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzs	d0, d0
-	ret
-.Lfunc_end55:
-	.size	fcvtzu_dd_round_simd, .Lfunc_end55-fcvtzu_dd_round_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtzs_sh_sat_simd              // -- Begin function fcvtzs_sh_sat_simd
-	.p2align	2
-	.type	fcvtzs_sh_sat_simd, at function
-fcvtzs_sh_sat_simd:                     // @fcvtzs_sh_sat_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzs	s0, h0
-	ret
-.Lfunc_end56:
-	.size	fcvtzs_sh_sat_simd, .Lfunc_end56-fcvtzs_sh_sat_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtzs_dh_sat_simd              // -- Begin function fcvtzs_dh_sat_simd
-	.p2align	2
-	.type	fcvtzs_dh_sat_simd, at function
-fcvtzs_dh_sat_simd:                     // @fcvtzs_dh_sat_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzs	d0, h0
-	ret
-.Lfunc_end57:
-	.size	fcvtzs_dh_sat_simd, .Lfunc_end57-fcvtzs_dh_sat_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtzs_ds_sat_simd              // -- Begin function fcvtzs_ds_sat_simd
-	.p2align	2
-	.type	fcvtzs_ds_sat_simd, at function
-fcvtzs_ds_sat_simd:                     // @fcvtzs_ds_sat_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzs	d0, s0
-	ret
-.Lfunc_end58:
-	.size	fcvtzs_ds_sat_simd, .Lfunc_end58-fcvtzs_ds_sat_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtzs_sd_sat_simd              // -- Begin function fcvtzs_sd_sat_simd
-	.p2align	2
-	.type	fcvtzs_sd_sat_simd, at function
-fcvtzs_sd_sat_simd:                     // @fcvtzs_sd_sat_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzs	s0, d0
-	ret
-.Lfunc_end59:
-	.size	fcvtzs_sd_sat_simd, .Lfunc_end59-fcvtzs_sd_sat_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtzs_ss_sat_simd              // -- Begin function fcvtzs_ss_sat_simd
-	.p2align	2
-	.type	fcvtzs_ss_sat_simd, at function
-fcvtzs_ss_sat_simd:                     // @fcvtzs_ss_sat_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzs	s0, s0
-	ret
-.Lfunc_end60:
-	.size	fcvtzs_ss_sat_simd, .Lfunc_end60-fcvtzs_ss_sat_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtzs_dd_sat_simd              // -- Begin function fcvtzs_dd_sat_simd
-	.p2align	2
-	.type	fcvtzs_dd_sat_simd, at function
-fcvtzs_dd_sat_simd:                     // @fcvtzs_dd_sat_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzs	d0, d0
-	ret
-.Lfunc_end61:
-	.size	fcvtzs_dd_sat_simd, .Lfunc_end61-fcvtzs_dd_sat_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtzu_sh_sat_simd              // -- Begin function fcvtzu_sh_sat_simd
-	.p2align	2
-	.type	fcvtzu_sh_sat_simd, at function
-fcvtzu_sh_sat_simd:                     // @fcvtzu_sh_sat_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzu	s0, h0
-	ret
-.Lfunc_end62:
-	.size	fcvtzu_sh_sat_simd, .Lfunc_end62-fcvtzu_sh_sat_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtzu_dh_sat_simd              // -- Begin function fcvtzu_dh_sat_simd
-	.p2align	2
-	.type	fcvtzu_dh_sat_simd, at function
-fcvtzu_dh_sat_simd:                     // @fcvtzu_dh_sat_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzu	d0, h0
-	ret
-.Lfunc_end63:
-	.size	fcvtzu_dh_sat_simd, .Lfunc_end63-fcvtzu_dh_sat_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtzu_ds_sat_simd              // -- Begin function fcvtzu_ds_sat_simd
-	.p2align	2
-	.type	fcvtzu_ds_sat_simd, at function
-fcvtzu_ds_sat_simd:                     // @fcvtzu_ds_sat_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzu	d0, s0
-	ret
-.Lfunc_end64:
-	.size	fcvtzu_ds_sat_simd, .Lfunc_end64-fcvtzu_ds_sat_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtzu_sd_sat_simd              // -- Begin function fcvtzu_sd_sat_simd
-	.p2align	2
-	.type	fcvtzu_sd_sat_simd, at function
-fcvtzu_sd_sat_simd:                     // @fcvtzu_sd_sat_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzu	s0, d0
-	ret
-.Lfunc_end65:
-	.size	fcvtzu_sd_sat_simd, .Lfunc_end65-fcvtzu_sd_sat_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtzu_ss_sat_simd              // -- Begin function fcvtzu_ss_sat_simd
-	.p2align	2
-	.type	fcvtzu_ss_sat_simd, at function
-fcvtzu_ss_sat_simd:                     // @fcvtzu_ss_sat_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzs	s0, s0
-	ret
-.Lfunc_end66:
-	.size	fcvtzu_ss_sat_simd, .Lfunc_end66-fcvtzu_ss_sat_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtzu_dd_sat_simd              // -- Begin function fcvtzu_dd_sat_simd
-	.p2align	2
-	.type	fcvtzu_dd_sat_simd, at function
-fcvtzu_dd_sat_simd:                     // @fcvtzu_dd_sat_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzs	d0, d0
-	ret
-.Lfunc_end67:
-	.size	fcvtzu_dd_sat_simd, .Lfunc_end67-fcvtzu_dd_sat_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtas_sh_simd                  // -- Begin function fcvtas_sh_simd
-	.p2align	2
-	.type	fcvtas_sh_simd, at function
-fcvtas_sh_simd:                         // @fcvtas_sh_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtas	s0, h0
-	ret
-.Lfunc_end68:
-	.size	fcvtas_sh_simd, .Lfunc_end68-fcvtas_sh_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtas_dh_simd                  // -- Begin function fcvtas_dh_simd
-	.p2align	2
-	.type	fcvtas_dh_simd, at function
-fcvtas_dh_simd:                         // @fcvtas_dh_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtas	d0, h0
-	ret
-.Lfunc_end69:
-	.size	fcvtas_dh_simd, .Lfunc_end69-fcvtas_dh_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtas_ds_simd                  // -- Begin function fcvtas_ds_simd
-	.p2align	2
-	.type	fcvtas_ds_simd, at function
-fcvtas_ds_simd:                         // @fcvtas_ds_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtas	d0, s0
-	ret
-.Lfunc_end70:
-	.size	fcvtas_ds_simd, .Lfunc_end70-fcvtas_ds_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtas_sd_simd                  // -- Begin function fcvtas_sd_simd
-	.p2align	2
-	.type	fcvtas_sd_simd, at function
-fcvtas_sd_simd:                         // @fcvtas_sd_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtas	s0, d0
-	ret
-.Lfunc_end71:
-	.size	fcvtas_sd_simd, .Lfunc_end71-fcvtas_sd_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtas_ss_simd                  // -- Begin function fcvtas_ss_simd
-	.p2align	2
-	.type	fcvtas_ss_simd, at function
-fcvtas_ss_simd:                         // @fcvtas_ss_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtas	s0, s0
-	ret
-.Lfunc_end72:
-	.size	fcvtas_ss_simd, .Lfunc_end72-fcvtas_ss_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtas_dd_simd                  // -- Begin function fcvtas_dd_simd
-	.p2align	2
-	.type	fcvtas_dd_simd, at function
-fcvtas_dd_simd:                         // @fcvtas_dd_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtas	d0, d0
-	ret
-.Lfunc_end73:
-	.size	fcvtas_dd_simd, .Lfunc_end73-fcvtas_dd_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtau_sh_simd                  // -- Begin function fcvtau_sh_simd
-	.p2align	2
-	.type	fcvtau_sh_simd, at function
-fcvtau_sh_simd:                         // @fcvtau_sh_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtau	s0, h0
-	ret
-.Lfunc_end74:
-	.size	fcvtau_sh_simd, .Lfunc_end74-fcvtau_sh_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtau_dh_simd                  // -- Begin function fcvtau_dh_simd
-	.p2align	2
-	.type	fcvtau_dh_simd, at function
-fcvtau_dh_simd:                         // @fcvtau_dh_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtau	d0, h0
-	ret
-.Lfunc_end75:
-	.size	fcvtau_dh_simd, .Lfunc_end75-fcvtau_dh_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtau_ds_simd                  // -- Begin function fcvtau_ds_simd
-	.p2align	2
-	.type	fcvtau_ds_simd, at function
-fcvtau_ds_simd:                         // @fcvtau_ds_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtau	d0, s0
-	ret
-.Lfunc_end76:
-	.size	fcvtau_ds_simd, .Lfunc_end76-fcvtau_ds_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtau_sd_simd                  // -- Begin function fcvtau_sd_simd
-	.p2align	2
-	.type	fcvtau_sd_simd, at function
-fcvtau_sd_simd:                         // @fcvtau_sd_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtau	s0, d0
-	ret
-.Lfunc_end77:
-	.size	fcvtau_sd_simd, .Lfunc_end77-fcvtau_sd_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtau_ss_simd                  // -- Begin function fcvtau_ss_simd
-	.p2align	2
-	.type	fcvtau_ss_simd, at function
-fcvtau_ss_simd:                         // @fcvtau_ss_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtas	s0, s0
-	ret
-.Lfunc_end78:
-	.size	fcvtau_ss_simd, .Lfunc_end78-fcvtau_ss_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtau_dd_simd                  // -- Begin function fcvtau_dd_simd
-	.p2align	2
-	.type	fcvtau_dd_simd, at function
-fcvtau_dd_simd:                         // @fcvtau_dd_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtas	d0, d0
-	ret
-.Lfunc_end79:
-	.size	fcvtau_dd_simd, .Lfunc_end79-fcvtau_dd_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtms_sh_simd                  // -- Begin function fcvtms_sh_simd
-	.p2align	2
-	.type	fcvtms_sh_simd, at function
-fcvtms_sh_simd:                         // @fcvtms_sh_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtms	s0, h0
-	ret
-.Lfunc_end80:
-	.size	fcvtms_sh_simd, .Lfunc_end80-fcvtms_sh_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtms_dh_simd                  // -- Begin function fcvtms_dh_simd
-	.p2align	2
-	.type	fcvtms_dh_simd, at function
-fcvtms_dh_simd:                         // @fcvtms_dh_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtms	d0, h0
-	ret
-.Lfunc_end81:
-	.size	fcvtms_dh_simd, .Lfunc_end81-fcvtms_dh_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtms_ds_simd                  // -- Begin function fcvtms_ds_simd
-	.p2align	2
-	.type	fcvtms_ds_simd, at function
-fcvtms_ds_simd:                         // @fcvtms_ds_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtms	d0, s0
-	ret
-.Lfunc_end82:
-	.size	fcvtms_ds_simd, .Lfunc_end82-fcvtms_ds_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtms_sd_simd                  // -- Begin function fcvtms_sd_simd
-	.p2align	2
-	.type	fcvtms_sd_simd, at function
-fcvtms_sd_simd:                         // @fcvtms_sd_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtms	s0, d0
-	ret
-.Lfunc_end83:
-	.size	fcvtms_sd_simd, .Lfunc_end83-fcvtms_sd_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtms_ss_simd                  // -- Begin function fcvtms_ss_simd
-	.p2align	2
-	.type	fcvtms_ss_simd, at function
-fcvtms_ss_simd:                         // @fcvtms_ss_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtms	s0, s0
-	ret
-.Lfunc_end84:
-	.size	fcvtms_ss_simd, .Lfunc_end84-fcvtms_ss_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtms_dd_simd                  // -- Begin function fcvtms_dd_simd
-	.p2align	2
-	.type	fcvtms_dd_simd, at function
-fcvtms_dd_simd:                         // @fcvtms_dd_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtms	d0, d0
-	ret
-.Lfunc_end85:
-	.size	fcvtms_dd_simd, .Lfunc_end85-fcvtms_dd_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtmu_sh_simd                  // -- Begin function fcvtmu_sh_simd
-	.p2align	2
-	.type	fcvtmu_sh_simd, at function
-fcvtmu_sh_simd:                         // @fcvtmu_sh_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtmu	s0, h0
-	ret
-.Lfunc_end86:
-	.size	fcvtmu_sh_simd, .Lfunc_end86-fcvtmu_sh_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtmu_dh_simd                  // -- Begin function fcvtmu_dh_simd
-	.p2align	2
-	.type	fcvtmu_dh_simd, at function
-fcvtmu_dh_simd:                         // @fcvtmu_dh_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtmu	d0, h0
-	ret
-.Lfunc_end87:
-	.size	fcvtmu_dh_simd, .Lfunc_end87-fcvtmu_dh_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtmu_ds_simd                  // -- Begin function fcvtmu_ds_simd
-	.p2align	2
-	.type	fcvtmu_ds_simd, at function
-fcvtmu_ds_simd:                         // @fcvtmu_ds_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtmu	d0, s0
-	ret
-.Lfunc_end88:
-	.size	fcvtmu_ds_simd, .Lfunc_end88-fcvtmu_ds_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtmu_sd_simd                  // -- Begin function fcvtmu_sd_simd
-	.p2align	2
-	.type	fcvtmu_sd_simd, at function
-fcvtmu_sd_simd:                         // @fcvtmu_sd_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtmu	s0, d0
-	ret
-.Lfunc_end89:
-	.size	fcvtmu_sd_simd, .Lfunc_end89-fcvtmu_sd_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtmu_ss_simd                  // -- Begin function fcvtmu_ss_simd
-	.p2align	2
-	.type	fcvtmu_ss_simd, at function
-fcvtmu_ss_simd:                         // @fcvtmu_ss_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtms	s0, s0
-	ret
-.Lfunc_end90:
-	.size	fcvtmu_ss_simd, .Lfunc_end90-fcvtmu_ss_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtmu_dd_simd                  // -- Begin function fcvtmu_dd_simd
-	.p2align	2
-	.type	fcvtmu_dd_simd, at function
-fcvtmu_dd_simd:                         // @fcvtmu_dd_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtms	d0, d0
-	ret
-.Lfunc_end91:
-	.size	fcvtmu_dd_simd, .Lfunc_end91-fcvtmu_dd_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtps_sh_simd                  // -- Begin function fcvtps_sh_simd
-	.p2align	2
-	.type	fcvtps_sh_simd, at function
-fcvtps_sh_simd:                         // @fcvtps_sh_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtps	s0, h0
-	ret
-.Lfunc_end92:
-	.size	fcvtps_sh_simd, .Lfunc_end92-fcvtps_sh_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtps_dh_simd                  // -- Begin function fcvtps_dh_simd
-	.p2align	2
-	.type	fcvtps_dh_simd, at function
-fcvtps_dh_simd:                         // @fcvtps_dh_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtps	d0, h0
-	ret
-.Lfunc_end93:
-	.size	fcvtps_dh_simd, .Lfunc_end93-fcvtps_dh_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtps_ds_simd                  // -- Begin function fcvtps_ds_simd
-	.p2align	2
-	.type	fcvtps_ds_simd, at function
-fcvtps_ds_simd:                         // @fcvtps_ds_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtps	d0, s0
-	ret
-.Lfunc_end94:
-	.size	fcvtps_ds_simd, .Lfunc_end94-fcvtps_ds_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtps_sd_simd                  // -- Begin function fcvtps_sd_simd
-	.p2align	2
-	.type	fcvtps_sd_simd, at function
-fcvtps_sd_simd:                         // @fcvtps_sd_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtps	s0, d0
-	ret
-.Lfunc_end95:
-	.size	fcvtps_sd_simd, .Lfunc_end95-fcvtps_sd_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtps_ss_simd                  // -- Begin function fcvtps_ss_simd
-	.p2align	2
-	.type	fcvtps_ss_simd, at function
-fcvtps_ss_simd:                         // @fcvtps_ss_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtps	s0, s0
-	ret
-.Lfunc_end96:
-	.size	fcvtps_ss_simd, .Lfunc_end96-fcvtps_ss_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtps_dd_simd                  // -- Begin function fcvtps_dd_simd
-	.p2align	2
-	.type	fcvtps_dd_simd, at function
-fcvtps_dd_simd:                         // @fcvtps_dd_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtps	d0, d0
-	ret
-.Lfunc_end97:
-	.size	fcvtps_dd_simd, .Lfunc_end97-fcvtps_dd_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtpu_sh_simd                  // -- Begin function fcvtpu_sh_simd
-	.p2align	2
-	.type	fcvtpu_sh_simd, at function
-fcvtpu_sh_simd:                         // @fcvtpu_sh_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtpu	s0, h0
-	ret
-.Lfunc_end98:
-	.size	fcvtpu_sh_simd, .Lfunc_end98-fcvtpu_sh_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtpu_dh_simd                  // -- Begin function fcvtpu_dh_simd
-	.p2align	2
-	.type	fcvtpu_dh_simd, at function
-fcvtpu_dh_simd:                         // @fcvtpu_dh_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtpu	d0, h0
-	ret
-.Lfunc_end99:
-	.size	fcvtpu_dh_simd, .Lfunc_end99-fcvtpu_dh_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtpu_ds_simd                  // -- Begin function fcvtpu_ds_simd
-	.p2align	2
-	.type	fcvtpu_ds_simd, at function
-fcvtpu_ds_simd:                         // @fcvtpu_ds_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtpu	d0, s0
-	ret
-.Lfunc_end100:
-	.size	fcvtpu_ds_simd, .Lfunc_end100-fcvtpu_ds_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtpu_sd_simd                  // -- Begin function fcvtpu_sd_simd
-	.p2align	2
-	.type	fcvtpu_sd_simd, at function
-fcvtpu_sd_simd:                         // @fcvtpu_sd_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtpu	s0, d0
-	ret
-.Lfunc_end101:
-	.size	fcvtpu_sd_simd, .Lfunc_end101-fcvtpu_sd_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtpu_ss_simd                  // -- Begin function fcvtpu_ss_simd
-	.p2align	2
-	.type	fcvtpu_ss_simd, at function
-fcvtpu_ss_simd:                         // @fcvtpu_ss_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtps	s0, s0
-	ret
-.Lfunc_end102:
-	.size	fcvtpu_ss_simd, .Lfunc_end102-fcvtpu_ss_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtpu_dd_simd                  // -- Begin function fcvtpu_dd_simd
-	.p2align	2
-	.type	fcvtpu_dd_simd, at function
-fcvtpu_dd_simd:                         // @fcvtpu_dd_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtps	d0, d0
-	ret
-.Lfunc_end103:
-	.size	fcvtpu_dd_simd, .Lfunc_end103-fcvtpu_dd_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtzs_sh_simd                  // -- Begin function fcvtzs_sh_simd
-	.p2align	2
-	.type	fcvtzs_sh_simd, at function
-fcvtzs_sh_simd:                         // @fcvtzs_sh_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzs	s0, h0
-	ret
-.Lfunc_end104:
-	.size	fcvtzs_sh_simd, .Lfunc_end104-fcvtzs_sh_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtzs_dh_simd                  // -- Begin function fcvtzs_dh_simd
-	.p2align	2
-	.type	fcvtzs_dh_simd, at function
-fcvtzs_dh_simd:                         // @fcvtzs_dh_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzs	d0, h0
-	ret
-.Lfunc_end105:
-	.size	fcvtzs_dh_simd, .Lfunc_end105-fcvtzs_dh_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtzs_ds_simd                  // -- Begin function fcvtzs_ds_simd
-	.p2align	2
-	.type	fcvtzs_ds_simd, at function
-fcvtzs_ds_simd:                         // @fcvtzs_ds_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzs	d0, s0
-	ret
-.Lfunc_end106:
-	.size	fcvtzs_ds_simd, .Lfunc_end106-fcvtzs_ds_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtzs_sd_simd                  // -- Begin function fcvtzs_sd_simd
-	.p2align	2
-	.type	fcvtzs_sd_simd, at function
-fcvtzs_sd_simd:                         // @fcvtzs_sd_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzs	s0, d0
-	ret
-.Lfunc_end107:
-	.size	fcvtzs_sd_simd, .Lfunc_end107-fcvtzs_sd_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtzs_ss_simd                  // -- Begin function fcvtzs_ss_simd
-	.p2align	2
-	.type	fcvtzs_ss_simd, at function
-fcvtzs_ss_simd:                         // @fcvtzs_ss_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzs	s0, s0
-	ret
-.Lfunc_end108:
-	.size	fcvtzs_ss_simd, .Lfunc_end108-fcvtzs_ss_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtzs_dd_simd                  // -- Begin function fcvtzs_dd_simd
-	.p2align	2
-	.type	fcvtzs_dd_simd, at function
-fcvtzs_dd_simd:                         // @fcvtzs_dd_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzs	d0, d0
-	ret
-.Lfunc_end109:
-	.size	fcvtzs_dd_simd, .Lfunc_end109-fcvtzs_dd_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtzu_sh_simd                  // -- Begin function fcvtzu_sh_simd
-	.p2align	2
-	.type	fcvtzu_sh_simd, at function
-fcvtzu_sh_simd:                         // @fcvtzu_sh_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzu	s0, h0
-	ret
-.Lfunc_end110:
-	.size	fcvtzu_sh_simd, .Lfunc_end110-fcvtzu_sh_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtzu_dh_simd                  // -- Begin function fcvtzu_dh_simd
-	.p2align	2
-	.type	fcvtzu_dh_simd, at function
-fcvtzu_dh_simd:                         // @fcvtzu_dh_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzu	d0, h0
-	ret
-.Lfunc_end111:
-	.size	fcvtzu_dh_simd, .Lfunc_end111-fcvtzu_dh_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtzu_ds_simd                  // -- Begin function fcvtzu_ds_simd
-	.p2align	2
-	.type	fcvtzu_ds_simd, at function
-fcvtzu_ds_simd:                         // @fcvtzu_ds_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzu	d0, s0
-	ret
-.Lfunc_end112:
-	.size	fcvtzu_ds_simd, .Lfunc_end112-fcvtzu_ds_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtzu_sd_simd                  // -- Begin function fcvtzu_sd_simd
-	.p2align	2
-	.type	fcvtzu_sd_simd, at function
-fcvtzu_sd_simd:                         // @fcvtzu_sd_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzu	s0, d0
-	ret
-.Lfunc_end113:
-	.size	fcvtzu_sd_simd, .Lfunc_end113-fcvtzu_sd_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtzu_ss_simd                  // -- Begin function fcvtzu_ss_simd
-	.p2align	2
-	.type	fcvtzu_ss_simd, at function
-fcvtzu_ss_simd:                         // @fcvtzu_ss_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzu	s0, s0
-	ret
-.Lfunc_end114:
-	.size	fcvtzu_ss_simd, .Lfunc_end114-fcvtzu_ss_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtzu_dd_simd                  // -- Begin function fcvtzu_dd_simd
-	.p2align	2
-	.type	fcvtzu_dd_simd, at function
-fcvtzu_dd_simd:                         // @fcvtzu_dd_simd
-	.cfi_startproc
-// %bb.0:
-	fcvtzu	d0, d0
-	ret
-.Lfunc_end115:
-	.size	fcvtzu_dd_simd, .Lfunc_end115-fcvtzu_dd_simd
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtzs_scalar_to_vector_h       // -- Begin function fcvtzs_scalar_to_vector_h
-	.p2align	2
-	.type	fcvtzs_scalar_to_vector_h, at function
-fcvtzs_scalar_to_vector_h:              // @fcvtzs_scalar_to_vector_h
-	.cfi_startproc
-// %bb.0:
-	fcvtzs	d0, h0
-	ret
-.Lfunc_end116:
-	.size	fcvtzs_scalar_to_vector_h, .Lfunc_end116-fcvtzs_scalar_to_vector_h
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtzs_scalar_to_vector_s       // -- Begin function fcvtzs_scalar_to_vector_s
-	.p2align	2
-	.type	fcvtzs_scalar_to_vector_s, at function
-fcvtzs_scalar_to_vector_s:              // @fcvtzs_scalar_to_vector_s
-	.cfi_startproc
-// %bb.0:
-	fcvtzs	d0, s0
-	ret
-.Lfunc_end117:
-	.size	fcvtzs_scalar_to_vector_s, .Lfunc_end117-fcvtzs_scalar_to_vector_s
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtzs_scalar_to_vector_d       // -- Begin function fcvtzs_scalar_to_vector_d
-	.p2align	2
-	.type	fcvtzs_scalar_to_vector_d, at function
-fcvtzs_scalar_to_vector_d:              // @fcvtzs_scalar_to_vector_d
-	.cfi_startproc
-// %bb.0:
-	fcvtzs	d0, d0
-	ret
-.Lfunc_end118:
-	.size	fcvtzs_scalar_to_vector_d, .Lfunc_end118-fcvtzs_scalar_to_vector_d
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtzu_scalar_to_vector_h       // -- Begin function fcvtzu_scalar_to_vector_h
-	.p2align	2
-	.type	fcvtzu_scalar_to_vector_h, at function
-fcvtzu_scalar_to_vector_h:              // @fcvtzu_scalar_to_vector_h
-	.cfi_startproc
-// %bb.0:
-	fcvtzu	d0, h0
-	ret
-.Lfunc_end119:
-	.size	fcvtzu_scalar_to_vector_h, .Lfunc_end119-fcvtzu_scalar_to_vector_h
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtzu_scalar_to_vector_s       // -- Begin function fcvtzu_scalar_to_vector_s
-	.p2align	2
-	.type	fcvtzu_scalar_to_vector_s, at function
-fcvtzu_scalar_to_vector_s:              // @fcvtzu_scalar_to_vector_s
-	.cfi_startproc
-// %bb.0:
-	fcvtzu	d0, s0
-	ret
-.Lfunc_end120:
-	.size	fcvtzu_scalar_to_vector_s, .Lfunc_end120-fcvtzu_scalar_to_vector_s
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtzu_scalar_to_vector_d       // -- Begin function fcvtzu_scalar_to_vector_d
-	.p2align	2
-	.type	fcvtzu_scalar_to_vector_d, at function
-fcvtzu_scalar_to_vector_d:              // @fcvtzu_scalar_to_vector_d
-	.cfi_startproc
-// %bb.0:
-	fcvtzu	d0, d0
-	ret
-.Lfunc_end121:
-	.size	fcvtzu_scalar_to_vector_d, .Lfunc_end121-fcvtzu_scalar_to_vector_d
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtzs_scalar_to_vector_h_strict // -- Begin function fcvtzs_scalar_to_vector_h_strict
-	.p2align	2
-	.type	fcvtzs_scalar_to_vector_h_strict, at function
-fcvtzs_scalar_to_vector_h_strict:       // @fcvtzs_scalar_to_vector_h_strict
-	.cfi_startproc
-// %bb.0:
-	fcvtzs	d0, h0
-	ret
-.Lfunc_end122:
-	.size	fcvtzs_scalar_to_vector_h_strict, .Lfunc_end122-fcvtzs_scalar_to_vector_h_strict
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtzs_scalar_to_vector_s_strict // -- Begin function fcvtzs_scalar_to_vector_s_strict
-	.p2align	2
-	.type	fcvtzs_scalar_to_vector_s_strict, at function
-fcvtzs_scalar_to_vector_s_strict:       // @fcvtzs_scalar_to_vector_s_strict
-	.cfi_startproc
-// %bb.0:
-	fcvtzs	d0, s0
-	ret
-.Lfunc_end123:
-	.size	fcvtzs_scalar_to_vector_s_strict, .Lfunc_end123-fcvtzs_scalar_to_vector_s_strict
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtzu_scalar_to_vector_h_strict // -- Begin function fcvtzu_scalar_to_vector_h_strict
-	.p2align	2
-	.type	fcvtzu_scalar_to_vector_h_strict, at function
-fcvtzu_scalar_to_vector_h_strict:       // @fcvtzu_scalar_to_vector_h_strict
-	.cfi_startproc
-// %bb.0:
-	fcvtzu	d0, h0
-	ret
-.Lfunc_end124:
-	.size	fcvtzu_scalar_to_vector_h_strict, .Lfunc_end124-fcvtzu_scalar_to_vector_h_strict
-	.cfi_endproc
-                                        // -- End function
-	.globl	fcvtzu_scalar_to_vector_s_strict // -- Begin function fcvtzu_scalar_to_vector_s_strict
-	.p2align	2
-	.type	fcvtzu_scalar_to_vector_s_strict, at function
-fcvtzu_scalar_to_vector_s_strict:       // @fcvtzu_scalar_to_vector_s_strict
-	.cfi_startproc
-// %bb.0:
-	fcvtzu	d0, s0
-	ret
-.Lfunc_end125:
-	.size	fcvtzu_scalar_to_vector_s_strict, .Lfunc_end125-fcvtzu_scalar_to_vector_s_strict
-	.cfi_endproc
-                                        // -- End function
-	.section	".note.GNU-stack","", at progbits
diff --git a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll
index 8b8f23a049107..68c24f2a30709 100644
--- a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll
@@ -4,7 +4,7 @@
 
 
 ;
-; Intriniscs (bitcast)
+; Intrinsics (bitcast)
 ;
 
 define float @fcvtas_1s1d_simd(double %A) nounwind {

>From da8e86fb8c9b2c1cfbcbf65fa332564807d1c2fc Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Thu, 15 Jan 2026 14:42:57 +0000
Subject: [PATCH 3/8] Add missing patterns address review comments

---
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   |   66 +-
 .../AArch64/arm64-cvt-simd-fptoi-strictfp.ll  |  575 ++++++++
 .../CodeGen/AArch64/arm64-cvt-simd-fptoi.ll   |  567 ++++----
 .../AArch64/arm64-cvt-simd-intrinsics.ll      | 1172 ++++++++++++++---
 llvm/test/CodeGen/AArch64/arm64-vcvt.ll       |    1 -
 5 files changed, 1940 insertions(+), 441 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi-strictfp.ll

diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 447fd9ef66343..6a0fe9b4619c6 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -6563,19 +6563,44 @@ multiclass FPToIntegerSIMDScalarPatterns<SDPatternOperator OpN, string INST> {
             (!cast<Instruction>(INST # DHr) FPR16:$Rn)>;
   def : Pat<(f64 (bitconvert (i64 (OpN (f32 FPR32:$Rn))))),
             (!cast<Instruction>(INST # DSr) FPR32:$Rn)>;
+  }
+  def : Pat<(f32 (bitconvert (i32 (OpN (f32 FPR32:$Rn))))),
+            (!cast<Instruction>(INST # v1i32) FPR32:$Rn)>;
+  def : Pat<(f64 (bitconvert (i64 (OpN (f64 FPR64:$Rn))))),
+            (!cast<Instruction>(INST # v1i64) FPR64:$Rn)>;
 
+  foreach ret_type = [v2i32, v4i32] in {
+    let Predicates = [HasFPRCVT] in {
+    def : Pat<(ret_type (scalar_to_vector (i32 (OpN (f64 FPR64:$Rn))))),
+              (INSERT_SUBREG (IMPLICIT_DEF),
+              (!cast<Instruction>(INST # SDr) FPR64:$Rn), ssub)>;
+    def : Pat<(ret_type (scalar_to_vector (i32 (OpN (f16 FPR16:$Rn))))),
+              (INSERT_SUBREG (IMPLICIT_DEF),
+              (!cast<Instruction>(INST # SHr) FPR16:$Rn), ssub)>;
+    }
+    def : Pat<(ret_type (scalar_to_vector (i32 (OpN (f32 FPR32:$Rn))))),
+              (INSERT_SUBREG (IMPLICIT_DEF),
+              (!cast<Instruction>(INST # v1i32) FPR32:$Rn), ssub)>;
+  }
+
+  let Predicates = [HasFPRCVT] in {
   def : Pat<(v1i64 (scalar_to_vector (i64 (OpN (f16 FPR16:$Rn))))),
             (!cast<Instruction>(INST # DHr) FPR16:$Rn)>;
   def : Pat<(v1i64 (scalar_to_vector (i64 (OpN (f32 FPR32:$Rn))))),
             (!cast<Instruction>(INST # DSr) FPR32:$Rn)>;
+  def : Pat<(v2i64 (scalar_to_vector (i64 (OpN (f16 FPR16:$Rn))))),
+            (INSERT_SUBREG (IMPLICIT_DEF),
+              (!cast<Instruction>(INST # DHr) FPR16:$Rn), dsub)>;
+  def : Pat<(v2i64 (scalar_to_vector (i64 (OpN (f32 FPR32:$Rn))))),
+            (INSERT_SUBREG (IMPLICIT_DEF),
+              (!cast<Instruction>(INST # DSr) FPR32:$Rn), dsub)>;
   }
-  def : Pat<(f32 (bitconvert (i32 (OpN (f32 FPR32:$Rn))))),
-            (!cast<Instruction>(INST # v1i32) FPR32:$Rn)>;
-  def : Pat<(f64 (bitconvert (i64 (OpN (f64 FPR64:$Rn))))),
-            (!cast<Instruction>(INST # v1i64) FPR64:$Rn)>;
 
   def : Pat<(v1i64 (scalar_to_vector (i64 (OpN (f64 FPR64:$Rn))))),
             (!cast<Instruction>(INST # v1i64) FPR64:$Rn)>;
+  def : Pat<(v2i64 (scalar_to_vector (i64 (OpN (f64 FPR64:$Rn))))),
+            (INSERT_SUBREG (IMPLICIT_DEF),
+              (!cast<Instruction>(INST # v1i64) FPR64:$Rn), dsub)>;
 }
 defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtas, "FCVTAS">;
 defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtau, "FCVTAU">;
@@ -6618,19 +6643,44 @@ multiclass FPToIntegerIntPats<Intrinsic round, string INST> {
             (!cast<Instruction>(INST # DSr) $Rn)>;
   def : Pat<(f32 (bitconvert (i32 (round f64:$Rn)))), 
             (!cast<Instruction>(INST # SDr) $Rn)>;
+  }
+  def : Pat<(f32 (bitconvert (i32 (round f32:$Rn)))), 
+            (!cast<Instruction>(INST # v1i32) $Rn)>;
+  def : Pat<(f64 (bitconvert (i64 (round f64:$Rn)))), 
+            (!cast<Instruction>(INST # v1i64) $Rn)>;
 
+  foreach ret_type = [v2i32, v4i32] in {
+    let Predicates = [HasFPRCVT] in {
+    def : Pat<(ret_type (scalar_to_vector (i32 (round f16:$Rn)))), 
+              (INSERT_SUBREG (IMPLICIT_DEF),
+              (!cast<Instruction>(INST # SHr) $Rn), ssub)>;
+    def : Pat<(ret_type (scalar_to_vector (i32 (round f64:$Rn)))), 
+              (INSERT_SUBREG (IMPLICIT_DEF),
+              (!cast<Instruction>(INST # SDr) $Rn), ssub)>;
+    }
+    def : Pat<(ret_type (scalar_to_vector (i32 (round f32:$Rn)))), 
+              (INSERT_SUBREG (IMPLICIT_DEF),
+              (!cast<Instruction>(INST # v1i32) $Rn), ssub)>;
+  }
+
+  let Predicates = [HasFPRCVT] in {
   def : Pat<(v1i64 (scalar_to_vector (i64 (round f16:$Rn)))), 
             (!cast<Instruction>(INST # DHr) $Rn)>;
   def : Pat<(v1i64 (scalar_to_vector (i64 (round f32:$Rn)))), 
             (!cast<Instruction>(INST # DSr) $Rn)>;
+  def : Pat<(v2i64 (scalar_to_vector (i64 (round f16:$Rn)))), 
+            (INSERT_SUBREG (IMPLICIT_DEF),
+            (!cast<Instruction>(INST # DHr) $Rn), dsub)>;
+  def : Pat<(v2i64 (scalar_to_vector (i64 (round f32:$Rn)))), 
+            (INSERT_SUBREG (IMPLICIT_DEF),
+            (!cast<Instruction>(INST # DSr) $Rn), dsub)>;
   }
-  def : Pat<(f32 (bitconvert (i32 (round f32:$Rn)))), 
-            (!cast<Instruction>(INST # v1i32) $Rn)>;
-  def : Pat<(f64 (bitconvert (i64 (round f64:$Rn)))), 
-            (!cast<Instruction>(INST # v1i64) $Rn)>;
 
   def : Pat<(v1i64 (scalar_to_vector (i64 (round f64:$Rn)))), 
             (!cast<Instruction>(INST # v1i64) $Rn)>;
+  def : Pat<(v2i64 (scalar_to_vector (i64 (round f64:$Rn)))), 
+            (INSERT_SUBREG (IMPLICIT_DEF),
+            (!cast<Instruction>(INST # v1i64) $Rn), dsub)>;
 
   let Predicates = [HasFullFP16] in {
   def : Pat<(i32 (round (fmul f16:$Rn, fixedpoint_f16_i32:$scale))),
diff --git a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi-strictfp.ll b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi-strictfp.ll
new file mode 100644
index 0000000000000..1afe981ea816c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi-strictfp.ll
@@ -0,0 +1,575 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple aarch64-unknown-unknown -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK-NOFPRCVT
+; RUN: llc < %s -mtriple aarch64-unknown-unknown -mattr=+fprcvt,+fullfp16 | FileCheck %s --check-prefixes=CHECK
+
+;
+; FPTOI strictfp
+;
+
+define float @fptosi_i32_f16_simd(half %x)  {
+; CHECK-NOFPRCVT-LABEL: fptosi_i32_f16_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs w8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fptosi_i32_f16_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, h0
+; CHECK-NEXT:    ret
+  %val = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %x, metadata !"fpexcept.strict")
+  %sum = bitcast i32 %val to float
+  ret float %sum
+}
+
+define double @fptosi_i64_f16_simd(half %x)  {
+; CHECK-NOFPRCVT-LABEL: fptosi_i64_f16_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs x8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fptosi_i64_f16_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, h0
+; CHECK-NEXT:    ret
+  %val = call i64 @llvm.experimental.constrained.fptosi.i64.f16(half %x, metadata !"fpexcept.strict")
+  %sum = bitcast i64 %val to double
+  ret double %sum
+}
+
+define double @fptosi_i64_f32_simd(float %x)  {
+; CHECK-NOFPRCVT-LABEL: fptosi_i64_f32_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs x8, s0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fptosi_i64_f32_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, s0
+; CHECK-NEXT:    ret
+  %val = call i64 @llvm.experimental.constrained.fptosi.i64.f32(float %x, metadata !"fpexcept.strict")
+  %bc = bitcast i64 %val to double
+  ret double %bc
+}
+
+define float @fptosi_i32_f64_simd(double %x)  {
+; CHECK-NOFPRCVT-LABEL: fptosi_i32_f64_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs w8, d0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fptosi_i32_f64_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, d0
+; CHECK-NEXT:    ret
+  %val = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %x, metadata !"fpexcept.strict")
+  %bc = bitcast i32 %val to float
+  ret float %bc
+}
+
+define double @fptosi_i64_f64_simd(double %x)  {
+; CHECK-NOFPRCVT-LABEL: fptosi_i64_f64_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs d0, d0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fptosi_i64_f64_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, d0
+; CHECK-NEXT:    ret
+  %val = call i64 @llvm.experimental.constrained.fptosi.i64.f64(double %x, metadata !"fpexcept.strict")
+  %bc = bitcast i64 %val to double
+  ret double %bc
+}
+
+define float @fptosi_i32_f32_simd(float %x)  {
+; CHECK-NOFPRCVT-LABEL: fptosi_i32_f32_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs s0, s0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fptosi_i32_f32_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, s0
+; CHECK-NEXT:    ret
+  %val = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %x, metadata !"fpexcept.strict")
+  %bc = bitcast i32 %val to float
+  ret float %bc
+}
+
+define float @fptoui_i32_f16_simd(half %x)  {
+; CHECK-NOFPRCVT-LABEL: fptoui_i32_f16_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu w8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fptoui_i32_f16_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu s0, h0
+; CHECK-NEXT:    ret
+  %val = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %x, metadata !"fpexcept.strict")
+  %sum = bitcast i32 %val to float
+  ret float %sum
+}
+
+define double @fptoui_i64_f16_simd(half %x)  {
+; CHECK-NOFPRCVT-LABEL: fptoui_i64_f16_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu x8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fptoui_i64_f16_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, h0
+; CHECK-NEXT:    ret
+  %val = call i64 @llvm.experimental.constrained.fptoui.i64.f16(half %x, metadata !"fpexcept.strict")
+  %sum = bitcast i64 %val to double
+  ret double %sum
+}
+
+define double @fptoui_i64_f32_simd(float %x)  {
+; CHECK-NOFPRCVT-LABEL: fptoui_i64_f32_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu x8, s0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fptoui_i64_f32_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, s0
+; CHECK-NEXT:    ret
+  %val = call i64 @llvm.experimental.constrained.fptoui.i64.f32(float %x, metadata !"fpexcept.strict")
+  %bc = bitcast i64 %val to double
+  ret double %bc
+}
+
+define float @fptoui_i32_f64_simd(double %x)  {
+; CHECK-NOFPRCVT-LABEL: fptoui_i32_f64_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu w8, d0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fptoui_i32_f64_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu s0, d0
+; CHECK-NEXT:    ret
+  %val = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %x, metadata !"fpexcept.strict")
+  %bc = bitcast i32 %val to float
+  ret float %bc
+}
+
+define double @fptoui_i64_f64_simd(double %x)  {
+; CHECK-NOFPRCVT-LABEL: fptoui_i64_f64_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu d0, d0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fptoui_i64_f64_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, d0
+; CHECK-NEXT:    ret
+  %val = call i64 @llvm.experimental.constrained.fptoui.i64.f64(double %x, metadata !"fpexcept.strict")
+  %bc = bitcast i64 %val to double
+  ret double %bc
+}
+
+define float @fptoui_i32_f32_simd(float %x)  {
+; CHECK-NOFPRCVT-LABEL: fptoui_i32_f32_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu s0, s0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fptoui_i32_f32_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu s0, s0
+; CHECK-NEXT:    ret
+  %val = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %x, metadata !"fpexcept.strict")
+  %bc = bitcast i32 %val to float
+  ret float %bc
+}
+
+;
+; FPTOI scalar_to_vector strictfp
+;
+
+define <2 x i32> @fcvtzs_v2i32_from_f16_scalar_to_vector_simd_strict(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_v2i32_from_f16_scalar_to_vector_simd_strict:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs w8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzs_v2i32_from_f16_scalar_to_vector_simd_strict:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, h0
+; CHECK-NEXT:    ret
+  %fcvtzs_scalar = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %a, metadata !"fpexcept.strict")
+  %fcvtzs_vector = insertelement <2 x i32> poison, i32 %fcvtzs_scalar, i32 0
+  ret <2 x i32> %fcvtzs_vector
+}
+
+define <2 x i32> @fcvtzs_v2i32_from_f32_scalar_to_vector_simd_strict(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_v2i32_from_f32_scalar_to_vector_simd_strict:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs s0, s0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzs_v2i32_from_f32_scalar_to_vector_simd_strict:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, s0
+; CHECK-NEXT:    ret
+  %fcvtzs_scalar = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %a, metadata !"fpexcept.strict")
+  %fcvtzs_vector = insertelement <2 x i32> poison, i32 %fcvtzs_scalar, i32 0
+  ret <2 x i32> %fcvtzs_vector
+}
+
+define <2 x i32> @fcvtzs_v2i32_from_f64_scalar_to_vector_simd_strict(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_v2i32_from_f64_scalar_to_vector_simd_strict:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs w8, d0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzs_v2i32_from_f64_scalar_to_vector_simd_strict:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, d0
+; CHECK-NEXT:    ret
+  %fcvtzs_scalar = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %a, metadata !"fpexcept.strict")
+  %fcvtzs_vector = insertelement <2 x i32> poison, i32 %fcvtzs_scalar, i32 0
+  ret <2 x i32> %fcvtzs_vector
+}
+
+define <4 x i32> @fcvtzs_v4i32_from_f16_scalar_to_vector_simd_strict(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_v4i32_from_f16_scalar_to_vector_simd_strict:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs w8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzs_v4i32_from_f16_scalar_to_vector_simd_strict:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, h0
+; CHECK-NEXT:    ret
+  %fcvtzs_scalar = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %a, metadata !"fpexcept.strict")
+  %fcvtzs_vector = insertelement <4 x i32> poison, i32 %fcvtzs_scalar, i32 0
+  ret <4 x i32> %fcvtzs_vector
+}
+
+define <4 x i32> @fcvtzs_v4i32_from_f32_scalar_to_vector_simd_strict(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_v4i32_from_f32_scalar_to_vector_simd_strict:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs s0, s0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzs_v4i32_from_f32_scalar_to_vector_simd_strict:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, s0
+; CHECK-NEXT:    ret
+  %fcvtzs_scalar = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %a, metadata !"fpexcept.strict")
+  %fcvtzs_vector = insertelement <4 x i32> poison, i32 %fcvtzs_scalar, i32 0
+  ret <4 x i32> %fcvtzs_vector
+}
+
+define <4 x i32> @fcvtzs_v4i32_from_f64_scalar_to_vector_simd_strict(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_v4i32_from_f64_scalar_to_vector_simd_strict:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs w8, d0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzs_v4i32_from_f64_scalar_to_vector_simd_strict:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, d0
+; CHECK-NEXT:    ret
+  %fcvtzs_scalar = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %a, metadata !"fpexcept.strict")
+  %fcvtzs_vector = insertelement <4 x i32> poison, i32 %fcvtzs_scalar, i32 0
+  ret <4 x i32> %fcvtzs_vector
+}
+
+define <1 x i64> @fcvtzs_v1i64_from_f16_scalar_to_vector_simd_strict(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_v1i64_from_f16_scalar_to_vector_simd_strict:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs x8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzs_v1i64_from_f16_scalar_to_vector_simd_strict:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, h0
+; CHECK-NEXT:    ret
+  %fcvtzs_scalar = call i64 @llvm.experimental.constrained.fptosi.i64.f16(half %a, metadata !"fpexcept.strict")
+  %fcvtzs_vector = insertelement <1 x i64> poison, i64 %fcvtzs_scalar, i32 0
+  ret <1 x i64> %fcvtzs_vector
+}
+
+define <1 x i64> @fcvtzs_v1i64_from_f32_scalar_to_vector_simd_strict(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_v1i64_from_f32_scalar_to_vector_simd_strict:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs x8, s0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzs_v1i64_from_f32_scalar_to_vector_simd_strict:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, s0
+; CHECK-NEXT:    ret
+  %fcvtzs_scalar = call i64 @llvm.experimental.constrained.fptosi.i64.f32(float %a, metadata !"fpexcept.strict")
+  %fcvtzs_vector = insertelement <1 x i64> poison, i64 %fcvtzs_scalar, i32 0
+  ret <1 x i64> %fcvtzs_vector
+}
+
+define <1 x i64> @fcvtzs_v1i64_from_f64_scalar_to_vector_simd_strict(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_v1i64_from_f64_scalar_to_vector_simd_strict:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs d0, d0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzs_v1i64_from_f64_scalar_to_vector_simd_strict:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, d0
+; CHECK-NEXT:    ret
+  %fcvtzs_scalar = call i64 @llvm.experimental.constrained.fptosi.i64.f64(double %a, metadata !"fpexcept.strict")
+  %fcvtzs_vector = insertelement <1 x i64> poison, i64 %fcvtzs_scalar, i32 0
+  ret <1 x i64> %fcvtzs_vector
+}
+
+define <2 x i64> @fcvtzs_v2i64_from_f16_scalar_to_vector_simd_strict(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_v2i64_from_f16_scalar_to_vector_simd_strict:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs x8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzs_v2i64_from_f16_scalar_to_vector_simd_strict:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, h0
+; CHECK-NEXT:    ret
+  %fcvtzs_scalar = call i64 @llvm.experimental.constrained.fptosi.i64.f16(half %a, metadata !"fpexcept.strict")
+  %fcvtzs_vector = insertelement <2 x i64> poison, i64 %fcvtzs_scalar, i32 0
+  ret <2 x i64> %fcvtzs_vector
+}
+
+define <2 x i64> @fcvtzs_v2i64_from_f32_scalar_to_vector_simd_strict(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_v2i64_from_f32_scalar_to_vector_simd_strict:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs x8, s0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzs_v2i64_from_f32_scalar_to_vector_simd_strict:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, s0
+; CHECK-NEXT:    ret
+  %fcvtzs_scalar = call i64 @llvm.experimental.constrained.fptosi.i64.f32(float %a, metadata !"fpexcept.strict")
+  %fcvtzs_vector = insertelement <2 x i64> poison, i64 %fcvtzs_scalar, i32 0
+  ret <2 x i64> %fcvtzs_vector
+}
+
+define <2 x i64> @fcvtzs_v2i64_from_f64_scalar_to_vector_simd_strict(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_v2i64_from_f64_scalar_to_vector_simd_strict:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs d0, d0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzs_v2i64_from_f64_scalar_to_vector_simd_strict:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, d0
+; CHECK-NEXT:    ret
+  %fcvtzs_scalar = call i64 @llvm.experimental.constrained.fptosi.i64.f64(double %a, metadata !"fpexcept.strict")
+  %fcvtzs_vector = insertelement <2 x i64> poison, i64 %fcvtzs_scalar, i32 0
+  ret <2 x i64> %fcvtzs_vector
+}
+
+define <2 x i32> @fcvtzu_v2i32_from_f16_scalar_to_vector_simd_strict(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_v2i32_from_f16_scalar_to_vector_simd_strict:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu w8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzu_v2i32_from_f16_scalar_to_vector_simd_strict:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu s0, h0
+; CHECK-NEXT:    ret
+  %fcvtzu_scalar = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %a, metadata !"fpexcept.strict")
+  %fcvtzu_vector = insertelement <2 x i32> poison, i32 %fcvtzu_scalar, i32 0
+  ret <2 x i32> %fcvtzu_vector
+}
+
+define <2 x i32> @fcvtzu_v2i32_from_f32_scalar_to_vector_simd_strict(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_v2i32_from_f32_scalar_to_vector_simd_strict:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu s0, s0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzu_v2i32_from_f32_scalar_to_vector_simd_strict:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu s0, s0
+; CHECK-NEXT:    ret
+  %fcvtzu_scalar = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %a, metadata !"fpexcept.strict")
+  %fcvtzu_vector = insertelement <2 x i32> poison, i32 %fcvtzu_scalar, i32 0
+  ret <2 x i32> %fcvtzu_vector
+}
+
+define <2 x i32> @fcvtzu_v2i32_from_f64_scalar_to_vector_simd_strict(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_v2i32_from_f64_scalar_to_vector_simd_strict:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu w8, d0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzu_v2i32_from_f64_scalar_to_vector_simd_strict:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu s0, d0
+; CHECK-NEXT:    ret
+  %fcvtzu_scalar = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %a, metadata !"fpexcept.strict")
+  %fcvtzu_vector = insertelement <2 x i32> poison, i32 %fcvtzu_scalar, i32 0
+  ret <2 x i32> %fcvtzu_vector
+}
+
+define <4 x i32> @fcvtzu_v4i32_from_f16_scalar_to_vector_simd_strict(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_v4i32_from_f16_scalar_to_vector_simd_strict:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu w8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzu_v4i32_from_f16_scalar_to_vector_simd_strict:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu s0, h0
+; CHECK-NEXT:    ret
+  %fcvtzu_scalar = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %a, metadata !"fpexcept.strict")
+  %fcvtzu_vector = insertelement <4 x i32> poison, i32 %fcvtzu_scalar, i32 0
+  ret <4 x i32> %fcvtzu_vector
+}
+
+define <4 x i32> @fcvtzu_v4i32_from_f32_scalar_to_vector_simd_strict(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_v4i32_from_f32_scalar_to_vector_simd_strict:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu s0, s0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzu_v4i32_from_f32_scalar_to_vector_simd_strict:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu s0, s0
+; CHECK-NEXT:    ret
+  %fcvtzu_scalar = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %a, metadata !"fpexcept.strict")
+  %fcvtzu_vector = insertelement <4 x i32> poison, i32 %fcvtzu_scalar, i32 0
+  ret <4 x i32> %fcvtzu_vector
+}
+
+define <4 x i32> @fcvtzu_v4i32_from_f64_scalar_to_vector_simd_strict(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_v4i32_from_f64_scalar_to_vector_simd_strict:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu w8, d0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzu_v4i32_from_f64_scalar_to_vector_simd_strict:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu s0, d0
+; CHECK-NEXT:    ret
+  %fcvtzu_scalar = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %a, metadata !"fpexcept.strict")
+  %fcvtzu_vector = insertelement <4 x i32> poison, i32 %fcvtzu_scalar, i32 0
+  ret <4 x i32> %fcvtzu_vector
+}
+
+define <1 x i64> @fcvtzu_v1i64_from_f16_scalar_to_vector_simd_strict(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_v1i64_from_f16_scalar_to_vector_simd_strict:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu x8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzu_v1i64_from_f16_scalar_to_vector_simd_strict:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, h0
+; CHECK-NEXT:    ret
+  %fcvtzu_scalar = call i64 @llvm.experimental.constrained.fptoui.i64.f16(half %a, metadata !"fpexcept.strict")
+  %fcvtzu_vector = insertelement <1 x i64> poison, i64 %fcvtzu_scalar, i32 0
+  ret <1 x i64> %fcvtzu_vector
+}
+
+define <1 x i64> @fcvtzu_v1i64_from_f32_scalar_to_vector_simd_strict(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_v1i64_from_f32_scalar_to_vector_simd_strict:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu x8, s0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzu_v1i64_from_f32_scalar_to_vector_simd_strict:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, s0
+; CHECK-NEXT:    ret
+  %fcvtzu_scalar = call i64 @llvm.experimental.constrained.fptoui.i64.f32(float %a, metadata !"fpexcept.strict")
+  %fcvtzu_vector = insertelement <1 x i64> poison, i64 %fcvtzu_scalar, i32 0
+  ret <1 x i64> %fcvtzu_vector
+}
+
+define <1 x i64> @fcvtzu_v1i64_from_f64_scalar_to_vector_simd_strict(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_v1i64_from_f64_scalar_to_vector_simd_strict:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu d0, d0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzu_v1i64_from_f64_scalar_to_vector_simd_strict:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, d0
+; CHECK-NEXT:    ret
+  %fcvtzu_scalar = call i64 @llvm.experimental.constrained.fptoui.i64.f64(double %a, metadata !"fpexcept.strict")
+  %fcvtzu_vector = insertelement <1 x i64> poison, i64 %fcvtzu_scalar, i32 0
+  ret <1 x i64> %fcvtzu_vector
+}
+
+define <2 x i64> @fcvtzu_v2i64_from_f16_scalar_to_vector_simd_strict(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_v2i64_from_f16_scalar_to_vector_simd_strict:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu x8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzu_v2i64_from_f16_scalar_to_vector_simd_strict:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, h0
+; CHECK-NEXT:    ret
+  %fcvtzu_scalar = call i64 @llvm.experimental.constrained.fptoui.i64.f16(half %a, metadata !"fpexcept.strict")
+  %fcvtzu_vector = insertelement <2 x i64> poison, i64 %fcvtzu_scalar, i32 0
+  ret <2 x i64> %fcvtzu_vector
+}
+
+define <2 x i64> @fcvtzu_v2i64_from_f32_scalar_to_vector_simd_strict(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_v2i64_from_f32_scalar_to_vector_simd_strict:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu x8, s0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzu_v2i64_from_f32_scalar_to_vector_simd_strict:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, s0
+; CHECK-NEXT:    ret
+  %fcvtzu_scalar = call i64 @llvm.experimental.constrained.fptoui.i64.f32(float %a, metadata !"fpexcept.strict")
+  %fcvtzu_vector = insertelement <2 x i64> poison, i64 %fcvtzu_scalar, i32 0
+  ret <2 x i64> %fcvtzu_vector
+}
+
+define <2 x i64> @fcvtzu_v2i64_from_f64_scalar_to_vector_simd_strict(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_v2i64_from_f64_scalar_to_vector_simd_strict:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu d0, d0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzu_v2i64_from_f64_scalar_to_vector_simd_strict:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, d0
+; CHECK-NEXT:    ret
+  %fcvtzu_scalar = call i64 @llvm.experimental.constrained.fptoui.i64.f64(double %a, metadata !"fpexcept.strict")
+  %fcvtzu_vector = insertelement <2 x i64> poison, i64 %fcvtzu_scalar, i32 0
+  ret <2 x i64> %fcvtzu_vector
+}
diff --git a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll
index ebaca00d2cdb9..52c35ce872b61 100644
--- a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll
@@ -3,23 +3,6 @@
 ; RUN: llc < %s -mtriple aarch64-unknown-unknown -mattr=+fprcvt,+fullfp16 | FileCheck %s --check-prefixes=CHECK
 ; RUN: llc < %s -mtriple aarch64-unknown-unknown -global-isel -global-isel-abort=2 -mattr=+fprcvt,+fullfp16 2>&1  | FileCheck %s --check-prefixes=CHECK
 
-; CHECK-GI: warning: Instruction selection used fallback path for fptosi_i32_f16_simd
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i64_f16_simd
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i64_f32_simd
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i32_f64_simd
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i64_f64_simd
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i32_f32_simd
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i32_f16_simd
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i64_f16_simd
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i64_f32_simd
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i32_f64_simd
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i64_f64_simd
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i32_f32_simd
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcvtzs_scalar_to_vector_h_strict
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcvtzs_scalar_to_vector_s_strict
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcvtzu_scalar_to_vector_h_strict
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcvtzu_scalar_to_vector_s_strict
-
 ;
 ; FPTOI
 ;
@@ -215,200 +198,6 @@ define float @test_fptoui_f32_i32_simd(float %a)  {
 }
 
 
-;
-; FPTOI strictfp
-;
-
-define float @fptosi_i32_f16_simd(half %x)  {
-; CHECK-NOFPRCVT-LABEL: fptosi_i32_f16_simd:
-; CHECK-NOFPRCVT:       // %bb.0:
-; CHECK-NOFPRCVT-NEXT:    fcvtzs w8, h0
-; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
-; CHECK-NOFPRCVT-NEXT:    ret
-;
-; CHECK-LABEL: fptosi_i32_f16_simd:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs s0, h0
-; CHECK-NEXT:    ret
-  %val = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %x, metadata !"fpexcept.strict")
-  %sum = bitcast i32 %val to float
-  ret float %sum
-}
-
-define double @fptosi_i64_f16_simd(half %x)  {
-; CHECK-NOFPRCVT-LABEL: fptosi_i64_f16_simd:
-; CHECK-NOFPRCVT:       // %bb.0:
-; CHECK-NOFPRCVT-NEXT:    fcvtzs x8, h0
-; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
-; CHECK-NOFPRCVT-NEXT:    ret
-;
-; CHECK-LABEL: fptosi_i64_f16_simd:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs d0, h0
-; CHECK-NEXT:    ret
-  %val = call i64 @llvm.experimental.constrained.fptosi.i64.f16(half %x, metadata !"fpexcept.strict")
-  %sum = bitcast i64 %val to double
-  ret double %sum
-}
-
-define double @fptosi_i64_f32_simd(float %x)  {
-; CHECK-NOFPRCVT-LABEL: fptosi_i64_f32_simd:
-; CHECK-NOFPRCVT:       // %bb.0:
-; CHECK-NOFPRCVT-NEXT:    fcvtzs x8, s0
-; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
-; CHECK-NOFPRCVT-NEXT:    ret
-;
-; CHECK-LABEL: fptosi_i64_f32_simd:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs d0, s0
-; CHECK-NEXT:    ret
-  %val = call i64 @llvm.experimental.constrained.fptosi.i64.f32(float %x, metadata !"fpexcept.strict")
-  %bc = bitcast i64 %val to double
-  ret double %bc
-}
-
-define float @fptosi_i32_f64_simd(double %x)  {
-; CHECK-NOFPRCVT-LABEL: fptosi_i32_f64_simd:
-; CHECK-NOFPRCVT:       // %bb.0:
-; CHECK-NOFPRCVT-NEXT:    fcvtzs w8, d0
-; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
-; CHECK-NOFPRCVT-NEXT:    ret
-;
-; CHECK-LABEL: fptosi_i32_f64_simd:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs s0, d0
-; CHECK-NEXT:    ret
-  %val = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %x, metadata !"fpexcept.strict")
-  %bc = bitcast i32 %val to float
-  ret float %bc
-}
-
-define double @fptosi_i64_f64_simd(double %x)  {
-; CHECK-NOFPRCVT-LABEL: fptosi_i64_f64_simd:
-; CHECK-NOFPRCVT:       // %bb.0:
-; CHECK-NOFPRCVT-NEXT:    fcvtzs d0, d0
-; CHECK-NOFPRCVT-NEXT:    ret
-;
-; CHECK-LABEL: fptosi_i64_f64_simd:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs d0, d0
-; CHECK-NEXT:    ret
-  %val = call i64 @llvm.experimental.constrained.fptosi.i64.f64(double %x, metadata !"fpexcept.strict")
-  %bc = bitcast i64 %val to double
-  ret double %bc
-}
-
-define float @fptosi_i32_f32_simd(float %x)  {
-; CHECK-NOFPRCVT-LABEL: fptosi_i32_f32_simd:
-; CHECK-NOFPRCVT:       // %bb.0:
-; CHECK-NOFPRCVT-NEXT:    fcvtzs s0, s0
-; CHECK-NOFPRCVT-NEXT:    ret
-;
-; CHECK-LABEL: fptosi_i32_f32_simd:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs s0, s0
-; CHECK-NEXT:    ret
-  %val = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %x, metadata !"fpexcept.strict")
-  %bc = bitcast i32 %val to float
-  ret float %bc
-}
-
-
-
-define float @fptoui_i32_f16_simd(half %x)  {
-; CHECK-NOFPRCVT-LABEL: fptoui_i32_f16_simd:
-; CHECK-NOFPRCVT:       // %bb.0:
-; CHECK-NOFPRCVT-NEXT:    fcvtzu w8, h0
-; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
-; CHECK-NOFPRCVT-NEXT:    ret
-;
-; CHECK-LABEL: fptoui_i32_f16_simd:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu s0, h0
-; CHECK-NEXT:    ret
-  %val = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %x, metadata !"fpexcept.strict")
-  %sum = bitcast i32 %val to float
-  ret float %sum
-}
-
-define double @fptoui_i64_f16_simd(half %x)  {
-; CHECK-NOFPRCVT-LABEL: fptoui_i64_f16_simd:
-; CHECK-NOFPRCVT:       // %bb.0:
-; CHECK-NOFPRCVT-NEXT:    fcvtzu x8, h0
-; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
-; CHECK-NOFPRCVT-NEXT:    ret
-;
-; CHECK-LABEL: fptoui_i64_f16_simd:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu d0, h0
-; CHECK-NEXT:    ret
-  %val = call i64 @llvm.experimental.constrained.fptoui.i64.f16(half %x, metadata !"fpexcept.strict")
-  %sum = bitcast i64 %val to double
-  ret double %sum
-}
-
-define double @fptoui_i64_f32_simd(float %x)  {
-; CHECK-NOFPRCVT-LABEL: fptoui_i64_f32_simd:
-; CHECK-NOFPRCVT:       // %bb.0:
-; CHECK-NOFPRCVT-NEXT:    fcvtzu x8, s0
-; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
-; CHECK-NOFPRCVT-NEXT:    ret
-;
-; CHECK-LABEL: fptoui_i64_f32_simd:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu d0, s0
-; CHECK-NEXT:    ret
-  %val = call i64 @llvm.experimental.constrained.fptoui.i64.f32(float %x, metadata !"fpexcept.strict")
-  %bc = bitcast i64 %val to double
-  ret double %bc
-}
-
-define float @fptoui_i32_f64_simd(double %x)  {
-; CHECK-NOFPRCVT-LABEL: fptoui_i32_f64_simd:
-; CHECK-NOFPRCVT:       // %bb.0:
-; CHECK-NOFPRCVT-NEXT:    fcvtzu w8, d0
-; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
-; CHECK-NOFPRCVT-NEXT:    ret
-;
-; CHECK-LABEL: fptoui_i32_f64_simd:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu s0, d0
-; CHECK-NEXT:    ret
-  %val = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %x, metadata !"fpexcept.strict")
-  %bc = bitcast i32 %val to float
-  ret float %bc
-}
-
-define double @fptoui_i64_f64_simd(double %x)  {
-; CHECK-NOFPRCVT-LABEL: fptoui_i64_f64_simd:
-; CHECK-NOFPRCVT:       // %bb.0:
-; CHECK-NOFPRCVT-NEXT:    fcvtzu d0, d0
-; CHECK-NOFPRCVT-NEXT:    ret
-;
-; CHECK-LABEL: fptoui_i64_f64_simd:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu d0, d0
-; CHECK-NEXT:    ret
-  %val = call i64 @llvm.experimental.constrained.fptoui.i64.f64(double %x, metadata !"fpexcept.strict")
-  %bc = bitcast i64 %val to double
-  ret double %bc
-}
-
-define float @fptoui_i32_f32_simd(float %x)  {
-; CHECK-NOFPRCVT-LABEL: fptoui_i32_f32_simd:
-; CHECK-NOFPRCVT:       // %bb.0:
-; CHECK-NOFPRCVT-NEXT:    fcvtzu s0, s0
-; CHECK-NOFPRCVT-NEXT:    ret
-;
-; CHECK-LABEL: fptoui_i32_f32_simd:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu s0, s0
-; CHECK-NEXT:    ret
-  %val = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %x, metadata !"fpexcept.strict")
-  %bc = bitcast i32 %val to float
-  ret float %bc
-}
-
 ;
 ; FPTOI rounding
 ;
@@ -1950,164 +1739,378 @@ define double @fcvtzu_dd_simd(double %a) {
 ; FPTOI scalar_to_vector
 ;
 
-define <1 x i64> @fcvtzs_scalar_to_vector_h(half %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzs_scalar_to_vector_h:
+define <2 x i32> @fcvtzs_v2i32_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_v2i32_from_f16_scalar_to_vector_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs w8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzs_v2i32_from_f16_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, h0
+; CHECK-NEXT:    ret
+  %fcvtzs_scalar = fptosi half %a to i32
+  %fcvtzs_vector = insertelement <2 x i32> poison, i32 %fcvtzs_scalar, i32 0
+  ret <2 x i32> %fcvtzs_vector
+}
+
+define <2 x i32> @fcvtzs_v2i32_from_f32_scalar_to_vector_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_v2i32_from_f32_scalar_to_vector_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs s0, s0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzs_v2i32_from_f32_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, s0
+; CHECK-NEXT:    ret
+  %fcvtzs_scalar = fptosi float %a to i32
+  %fcvtzs_vector = insertelement <2 x i32> poison, i32 %fcvtzs_scalar, i32 0
+  ret <2 x i32> %fcvtzs_vector
+}
+
+define <2 x i32> @fcvtzs_v2i32_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_v2i32_from_f64_scalar_to_vector_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs w8, d0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzs_v2i32_from_f64_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, d0
+; CHECK-NEXT:    ret
+  %fcvtzs_scalar = fptosi double %a to i32
+  %fcvtzs_vector = insertelement <2 x i32> poison, i32 %fcvtzs_scalar, i32 0
+  ret <2 x i32> %fcvtzs_vector
+}
+
+define <4 x i32> @fcvtzs_v4i32_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_v4i32_from_f16_scalar_to_vector_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs w8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzs_v4i32_from_f16_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, h0
+; CHECK-NEXT:    ret
+  %fcvtzs_scalar = fptosi half %a to i32
+  %fcvtzs_vector = insertelement <4 x i32> poison, i32 %fcvtzs_scalar, i32 0
+  ret <4 x i32> %fcvtzs_vector
+}
+
+define <4 x i32> @fcvtzs_v4i32_from_f32_scalar_to_vector_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_v4i32_from_f32_scalar_to_vector_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs s0, s0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzs_v4i32_from_f32_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, s0
+; CHECK-NEXT:    ret
+  %fcvtzs_scalar = fptosi float %a to i32
+  %fcvtzs_vector = insertelement <4 x i32> poison, i32 %fcvtzs_scalar, i32 0
+  ret <4 x i32> %fcvtzs_vector
+}
+
+define <4 x i32> @fcvtzs_v4i32_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_v4i32_from_f64_scalar_to_vector_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs w8, d0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzs_v4i32_from_f64_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, d0
+; CHECK-NEXT:    ret
+  %fcvtzs_scalar = fptosi double %a to i32
+  %fcvtzs_vector = insertelement <4 x i32> poison, i32 %fcvtzs_scalar, i32 0
+  ret <4 x i32> %fcvtzs_vector
+}
+
+define <1 x i64> @fcvtzs_v1i64_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_v1i64_from_f16_scalar_to_vector_simd:
 ; CHECK-NOFPRCVT:       // %bb.0:
 ; CHECK-NOFPRCVT-NEXT:    fcvtzs x8, h0
 ; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
 ; CHECK-NOFPRCVT-NEXT:    ret
 ;
-; CHECK-LABEL: fcvtzs_scalar_to_vector_h:
+; CHECK-LABEL: fcvtzs_v1i64_from_f16_scalar_to_vector_simd:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvtzs d0, h0
 ; CHECK-NEXT:    ret
-  %val = fptosi half %a to i64
-  %vec = insertelement <1 x i64> poison, i64 %val, i32 0
-  ret <1 x i64> %vec
+  %fcvtzs_scalar = fptosi half %a to i64
+  %fcvtzs_vector = insertelement <1 x i64> poison, i64 %fcvtzs_scalar, i32 0
+  ret <1 x i64> %fcvtzs_vector
 }
 
-define <1 x i64> @fcvtzs_scalar_to_vector_s(float %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzs_scalar_to_vector_s:
+define <1 x i64> @fcvtzs_v1i64_from_f32_scalar_to_vector_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_v1i64_from_f32_scalar_to_vector_simd:
 ; CHECK-NOFPRCVT:       // %bb.0:
 ; CHECK-NOFPRCVT-NEXT:    fcvtzs x8, s0
 ; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
 ; CHECK-NOFPRCVT-NEXT:    ret
 ;
-; CHECK-LABEL: fcvtzs_scalar_to_vector_s:
+; CHECK-LABEL: fcvtzs_v1i64_from_f32_scalar_to_vector_simd:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvtzs d0, s0
 ; CHECK-NEXT:    ret
-  %val = fptosi float %a to i64
-  %vec = insertelement <1 x i64> poison, i64 %val, i32 0
-  ret <1 x i64> %vec
+  %fcvtzs_scalar = fptosi float %a to i64
+  %fcvtzs_vector = insertelement <1 x i64> poison, i64 %fcvtzs_scalar, i32 0
+  ret <1 x i64> %fcvtzs_vector
 }
 
-define <1 x i64> @fcvtzs_scalar_to_vector_d(double %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzs_scalar_to_vector_d:
+define <1 x i64> @fcvtzs_v1i64_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_v1i64_from_f64_scalar_to_vector_simd:
 ; CHECK-NOFPRCVT:       // %bb.0:
 ; CHECK-NOFPRCVT-NEXT:    fcvtzs d0, d0
 ; CHECK-NOFPRCVT-NEXT:    ret
 ;
-; CHECK-LABEL: fcvtzs_scalar_to_vector_d:
+; CHECK-LABEL: fcvtzs_v1i64_from_f64_scalar_to_vector_simd:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvtzs d0, d0
 ; CHECK-NEXT:    ret
-  %val = fptosi double %a to i64
-  %vec = insertelement <1 x i64> poison, i64 %val, i32 0
-  ret <1 x i64> %vec
+  %fcvtzs_scalar = fptosi double %a to i64
+  %fcvtzs_vector = insertelement <1 x i64> poison, i64 %fcvtzs_scalar, i32 0
+  ret <1 x i64> %fcvtzs_vector
 }
 
-define <1 x i64> @fcvtzu_scalar_to_vector_h(half %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzu_scalar_to_vector_h:
+define <2 x i64> @fcvtzs_v2i64_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_v2i64_from_f16_scalar_to_vector_simd:
 ; CHECK-NOFPRCVT:       // %bb.0:
-; CHECK-NOFPRCVT-NEXT:    fcvtzu x8, h0
+; CHECK-NOFPRCVT-NEXT:    fcvtzs x8, h0
 ; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
 ; CHECK-NOFPRCVT-NEXT:    ret
 ;
-; CHECK-LABEL: fcvtzu_scalar_to_vector_h:
+; CHECK-LABEL: fcvtzs_v2i64_from_f16_scalar_to_vector_simd:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu d0, h0
+; CHECK-NEXT:    fcvtzs d0, h0
 ; CHECK-NEXT:    ret
-  %val = fptoui half %a to i64
-  %vec = insertelement <1 x i64> poison, i64 %val, i32 0
-  ret <1 x i64> %vec
+  %fcvtzs_scalar = fptosi half %a to i64
+  %fcvtzs_vector = insertelement <2 x i64> poison, i64 %fcvtzs_scalar, i32 0
+  ret <2 x i64> %fcvtzs_vector
 }
 
-define <1 x i64> @fcvtzu_scalar_to_vector_s(float %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzu_scalar_to_vector_s:
+define <2 x i64> @fcvtzs_v2i64_from_f32_scalar_to_vector_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_v2i64_from_f32_scalar_to_vector_simd:
 ; CHECK-NOFPRCVT:       // %bb.0:
-; CHECK-NOFPRCVT-NEXT:    fcvtzu x8, s0
+; CHECK-NOFPRCVT-NEXT:    fcvtzs x8, s0
 ; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
 ; CHECK-NOFPRCVT-NEXT:    ret
 ;
-; CHECK-LABEL: fcvtzu_scalar_to_vector_s:
+; CHECK-LABEL: fcvtzs_v2i64_from_f32_scalar_to_vector_simd:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu d0, s0
+; CHECK-NEXT:    fcvtzs d0, s0
 ; CHECK-NEXT:    ret
-  %val = fptoui float %a to i64
-  %vec = insertelement <1 x i64> poison, i64 %val, i32 0
-  ret <1 x i64> %vec
+  %fcvtzs_scalar = fptosi float %a to i64
+  %fcvtzs_vector = insertelement <2 x i64> poison, i64 %fcvtzs_scalar, i32 0
+  ret <2 x i64> %fcvtzs_vector
 }
 
-define <1 x i64> @fcvtzu_scalar_to_vector_d(double %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzu_scalar_to_vector_d:
+define <2 x i64> @fcvtzs_v2i64_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_v2i64_from_f64_scalar_to_vector_simd:
 ; CHECK-NOFPRCVT:       // %bb.0:
-; CHECK-NOFPRCVT-NEXT:    fcvtzu d0, d0
+; CHECK-NOFPRCVT-NEXT:    fcvtzs d0, d0
 ; CHECK-NOFPRCVT-NEXT:    ret
 ;
-; CHECK-LABEL: fcvtzu_scalar_to_vector_d:
+; CHECK-LABEL: fcvtzs_v2i64_from_f64_scalar_to_vector_simd:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu d0, d0
+; CHECK-NEXT:    fcvtzs d0, d0
+; CHECK-NEXT:    ret
+  %fcvtzs_scalar = fptosi double %a to i64
+  %fcvtzs_vector = insertelement <2 x i64> poison, i64 %fcvtzs_scalar, i32 0
+  ret <2 x i64> %fcvtzs_vector
+}
+
+define <2 x i32> @fcvtzu_v2i32_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_v2i32_from_f16_scalar_to_vector_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu w8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzu_v2i32_from_f16_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu s0, h0
+; CHECK-NEXT:    ret
+  %fcvtzu_scalar = fptoui half %a to i32
+  %fcvtzu_vector = insertelement <2 x i32> poison, i32 %fcvtzu_scalar, i32 0
+  ret <2 x i32> %fcvtzu_vector
+}
+
+define <2 x i32> @fcvtzu_v2i32_from_f32_scalar_to_vector_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_v2i32_from_f32_scalar_to_vector_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu s0, s0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzu_v2i32_from_f32_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu s0, s0
 ; CHECK-NEXT:    ret
-  %val = fptoui double %a to i64
-  %vec = insertelement <1 x i64> poison, i64 %val, i32 0
-  ret <1 x i64> %vec
+  %fcvtzu_scalar = fptoui float %a to i32
+  %fcvtzu_vector = insertelement <2 x i32> poison, i32 %fcvtzu_scalar, i32 0
+  ret <2 x i32> %fcvtzu_vector
 }
 
+define <2 x i32> @fcvtzu_v2i32_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_v2i32_from_f64_scalar_to_vector_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu w8, d0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
 ;
-; FPTOI scalar_to_vector strictfp
+; CHECK-LABEL: fcvtzu_v2i32_from_f64_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu s0, d0
+; CHECK-NEXT:    ret
+  %fcvtzu_scalar = fptoui double %a to i32
+  %fcvtzu_vector = insertelement <2 x i32> poison, i32 %fcvtzu_scalar, i32 0
+  ret <2 x i32> %fcvtzu_vector
+}
+
+define <4 x i32> @fcvtzu_v4i32_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_v4i32_from_f16_scalar_to_vector_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu w8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
 ;
+; CHECK-LABEL: fcvtzu_v4i32_from_f16_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu s0, h0
+; CHECK-NEXT:    ret
+  %fcvtzu_scalar = fptoui half %a to i32
+  %fcvtzu_vector = insertelement <4 x i32> poison, i32 %fcvtzu_scalar, i32 0
+  ret <4 x i32> %fcvtzu_vector
+}
 
-define <1 x i64> @fcvtzs_scalar_to_vector_h_strict(half %x) {
-; CHECK-NOFPRCVT-LABEL: fcvtzs_scalar_to_vector_h_strict:
+define <4 x i32> @fcvtzu_v4i32_from_f32_scalar_to_vector_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_v4i32_from_f32_scalar_to_vector_simd:
 ; CHECK-NOFPRCVT:       // %bb.0:
-; CHECK-NOFPRCVT-NEXT:    fcvtzs x8, h0
+; CHECK-NOFPRCVT-NEXT:    fcvtzu s0, s0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzu_v4i32_from_f32_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu s0, s0
+; CHECK-NEXT:    ret
+  %fcvtzu_scalar = fptoui float %a to i32
+  %fcvtzu_vector = insertelement <4 x i32> poison, i32 %fcvtzu_scalar, i32 0
+  ret <4 x i32> %fcvtzu_vector
+}
+
+define <4 x i32> @fcvtzu_v4i32_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_v4i32_from_f64_scalar_to_vector_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu w8, d0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzu_v4i32_from_f64_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu s0, d0
+; CHECK-NEXT:    ret
+  %fcvtzu_scalar = fptoui double %a to i32
+  %fcvtzu_vector = insertelement <4 x i32> poison, i32 %fcvtzu_scalar, i32 0
+  ret <4 x i32> %fcvtzu_vector
+}
+
+define <1 x i64> @fcvtzu_v1i64_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_v1i64_from_f16_scalar_to_vector_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu x8, h0
 ; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
 ; CHECK-NOFPRCVT-NEXT:    ret
 ;
-; CHECK-LABEL: fcvtzs_scalar_to_vector_h_strict:
+; CHECK-LABEL: fcvtzu_v1i64_from_f16_scalar_to_vector_simd:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs d0, h0
+; CHECK-NEXT:    fcvtzu d0, h0
 ; CHECK-NEXT:    ret
-  %val = call i64 @llvm.experimental.constrained.fptosi.i64.f16(half %x, metadata !"fpexcept.strict")
-  %vec = insertelement <1 x i64> poison, i64 %val, i32 0
-  ret <1 x i64> %vec
+  %fcvtzu_scalar = fptoui half %a to i64
+  %fcvtzu_vector = insertelement <1 x i64> poison, i64 %fcvtzu_scalar, i32 0
+  ret <1 x i64> %fcvtzu_vector
 }
 
-define <1 x i64> @fcvtzs_scalar_to_vector_s_strict(float %x) {
-; CHECK-NOFPRCVT-LABEL: fcvtzs_scalar_to_vector_s_strict:
+define <1 x i64> @fcvtzu_v1i64_from_f32_scalar_to_vector_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_v1i64_from_f32_scalar_to_vector_simd:
 ; CHECK-NOFPRCVT:       // %bb.0:
-; CHECK-NOFPRCVT-NEXT:    fcvtzs x8, s0
+; CHECK-NOFPRCVT-NEXT:    fcvtzu x8, s0
 ; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
 ; CHECK-NOFPRCVT-NEXT:    ret
 ;
-; CHECK-LABEL: fcvtzs_scalar_to_vector_s_strict:
+; CHECK-LABEL: fcvtzu_v1i64_from_f32_scalar_to_vector_simd:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs d0, s0
+; CHECK-NEXT:    fcvtzu d0, s0
 ; CHECK-NEXT:    ret
-  %val = call i64 @llvm.experimental.constrained.fptosi.i64.f32(float %x, metadata !"fpexcept.strict")
-  %vec = insertelement <1 x i64> poison, i64 %val, i32 0
-  ret <1 x i64> %vec
+  %fcvtzu_scalar = fptoui float %a to i64
+  %fcvtzu_vector = insertelement <1 x i64> poison, i64 %fcvtzu_scalar, i32 0
+  ret <1 x i64> %fcvtzu_vector
 }
 
-define <1 x i64> @fcvtzu_scalar_to_vector_h_strict(half %x) {
-; CHECK-NOFPRCVT-LABEL: fcvtzu_scalar_to_vector_h_strict:
+define <1 x i64> @fcvtzu_v1i64_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_v1i64_from_f64_scalar_to_vector_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu d0, d0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzu_v1i64_from_f64_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, d0
+; CHECK-NEXT:    ret
+  %fcvtzu_scalar = fptoui double %a to i64
+  %fcvtzu_vector = insertelement <1 x i64> poison, i64 %fcvtzu_scalar, i32 0
+  ret <1 x i64> %fcvtzu_vector
+}
+
+define <2 x i64> @fcvtzu_v2i64_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_v2i64_from_f16_scalar_to_vector_simd:
 ; CHECK-NOFPRCVT:       // %bb.0:
 ; CHECK-NOFPRCVT-NEXT:    fcvtzu x8, h0
 ; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
 ; CHECK-NOFPRCVT-NEXT:    ret
 ;
-; CHECK-LABEL: fcvtzu_scalar_to_vector_h_strict:
+; CHECK-LABEL: fcvtzu_v2i64_from_f16_scalar_to_vector_simd:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvtzu d0, h0
 ; CHECK-NEXT:    ret
-  %val = call i64 @llvm.experimental.constrained.fptoui.i64.f16(half %x, metadata !"fpexcept.strict")
-  %vec = insertelement <1 x i64> poison, i64 %val, i32 0
-  ret <1 x i64> %vec
+  %fcvtzu_scalar = fptoui half %a to i64
+  %fcvtzu_vector = insertelement <2 x i64> poison, i64 %fcvtzu_scalar, i32 0
+  ret <2 x i64> %fcvtzu_vector
 }
 
-define <1 x i64> @fcvtzu_scalar_to_vector_s_strict(float %x) {
-; CHECK-NOFPRCVT-LABEL: fcvtzu_scalar_to_vector_s_strict:
+define <2 x i64> @fcvtzu_v2i64_from_f32_scalar_to_vector_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_v2i64_from_f32_scalar_to_vector_simd:
 ; CHECK-NOFPRCVT:       // %bb.0:
 ; CHECK-NOFPRCVT-NEXT:    fcvtzu x8, s0
 ; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
 ; CHECK-NOFPRCVT-NEXT:    ret
 ;
-; CHECK-LABEL: fcvtzu_scalar_to_vector_s_strict:
+; CHECK-LABEL: fcvtzu_v2i64_from_f32_scalar_to_vector_simd:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvtzu d0, s0
 ; CHECK-NEXT:    ret
-  %val = call i64 @llvm.experimental.constrained.fptoui.i64.f32(float %x, metadata !"fpexcept.strict")
-  %vec = insertelement <1 x i64> poison, i64 %val, i32 0
-  ret <1 x i64> %vec
+  %fcvtzu_scalar = fptoui float %a to i64
+  %fcvtzu_vector = insertelement <2 x i64> poison, i64 %fcvtzu_scalar, i32 0
+  ret <2 x i64> %fcvtzu_vector
+}
+
+define <2 x i64> @fcvtzu_v2i64_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_v2i64_from_f64_scalar_to_vector_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu d0, d0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzu_v2i64_from_f64_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, d0
+; CHECK-NEXT:    ret
+  %fcvtzu_scalar = fptoui double %a to i64
+  %fcvtzu_vector = insertelement <2 x i64> poison, i64 %fcvtzu_scalar, i32 0
+  ret <2 x i64> %fcvtzu_vector
 }
diff --git a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll
index 68c24f2a30709..55af566b9f4c1 100644
--- a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll
@@ -612,330 +612,1202 @@ define  float @fcvtzu_1s1s_simd(float %a) {
 ; Intriniscs (scalar_to_vector)
 ;
 
-define <1 x i64> @fcvtas_1d1s_scalar_to_vector_simd(float %A) nounwind {
-; CHECK-LABEL: fcvtas_1d1s_scalar_to_vector_simd:
+define <2 x i32> @fcvtas_v2i32_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtas_v2i32_from_f16_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas s0, h0
+; CHECK-NEXT:    ret
+  %fcvtas_scalar = tail call i32 @llvm.aarch64.neon.fcvtas.i32.f16(half %a)
+  %fcvtas_vector = insertelement <2 x i32> poison, i32 %fcvtas_scalar, i32 0
+  ret <2 x i32> %fcvtas_vector
+}
+
+define <2 x i32> @fcvtas_v2i32_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtas_v2i32_from_f32_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas s0, s0
+; CHECK-NEXT:    ret
+  %fcvtas_scalar = call i32 @llvm.aarch64.neon.fcvtas.i32.f32(float %A)
+  %fcvtas_vector = insertelement <2 x i32> poison, i32 %fcvtas_scalar, i32 0
+  ret <2 x i32> %fcvtas_vector
+}
+
+define <2 x i32> @fcvtas_v2i32_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtas_v2i32_from_f64_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas s0, d0
+; CHECK-NEXT:    ret
+  %fcvtas_scalar = tail call i32 @llvm.aarch64.neon.fcvtas.i32.f64(double %a)
+  %fcvtas_vector = insertelement <2 x i32> poison, i32 %fcvtas_scalar, i32 0
+  ret <2 x i32> %fcvtas_vector
+}
+
+define <4 x i32> @fcvtas_v4i32_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtas_v4i32_from_f16_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas s0, h0
+; CHECK-NEXT:    ret
+  %fcvtas_scalar = tail call i32 @llvm.aarch64.neon.fcvtas.i32.f16(half %a)
+  %fcvtas_vector = insertelement <4 x i32> poison, i32 %fcvtas_scalar, i32 0
+  ret <4 x i32> %fcvtas_vector
+}
+
+define <4 x i32> @fcvtas_v4i32_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtas_v4i32_from_f32_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas s0, s0
+; CHECK-NEXT:    ret
+  %fcvtas_scalar = call i32 @llvm.aarch64.neon.fcvtas.i32.f32(float %A)
+  %fcvtas_vector = insertelement <4 x i32> poison, i32 %fcvtas_scalar, i32 0
+  ret <4 x i32> %fcvtas_vector
+}
+
+define <4 x i32> @fcvtas_v4i32_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtas_v4i32_from_f64_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas s0, d0
+; CHECK-NEXT:    ret
+  %fcvtas_scalar = tail call i32 @llvm.aarch64.neon.fcvtas.i32.f64(double %a)
+  %fcvtas_vector = insertelement <4 x i32> poison, i32 %fcvtas_scalar, i32 0
+  ret <4 x i32> %fcvtas_vector
+}
+
+define <1 x i64> @fcvtas_v1i64_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtas_v1i64_from_f16_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas d0, h0
+; CHECK-NEXT:    ret
+  %fcvtas_scalar = tail call i64 @llvm.aarch64.neon.fcvtas.i64.f16(half %a)
+  %fcvtas_vector = insertelement <1 x i64> poison, i64 %fcvtas_scalar, i32 0
+  ret <1 x i64> %fcvtas_vector
+}
+
+define <1 x i64> @fcvtas_v1i64_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtas_v1i64_from_f32_scalar_to_vector_simd:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvtas d0, s0
 ; CHECK-NEXT:    ret
-  %i = call i64 @llvm.aarch64.neon.fcvtas.i64.f32(float %A)
-  %vec = insertelement <1 x i64> poison, i64 %i, i32 0
-  ret <1 x i64> %vec
+  %fcvtas_scalar = call i64 @llvm.aarch64.neon.fcvtas.i64.f32(float %A)
+  %fcvtas_vector = insertelement <1 x i64> poison, i64 %fcvtas_scalar, i32 0
+  ret <1 x i64> %fcvtas_vector
 }
 
+define <1 x i64> @fcvtas_v1i64_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtas_v1i64_from_f64_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas d0, d0
+; CHECK-NEXT:    ret
+  %fcvtas_scalar = tail call i64 @llvm.aarch64.neon.fcvtas.i64.f64(double %a)
+  %fcvtas_vector = insertelement <1 x i64> poison, i64 %fcvtas_scalar, i32 0
+  ret <1 x i64> %fcvtas_vector
+}
 
-define  <1 x i64> @fcvtas_1d1h_scalar_to_vector_simd(half %a) {
-; CHECK-LABEL: fcvtas_1d1h_scalar_to_vector_simd:
+define <2 x i64> @fcvtas_v2i64_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtas_v2i64_from_f16_scalar_to_vector_simd:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvtas d0, h0
 ; CHECK-NEXT:    ret
-  %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtas.i64.f16(half %a)
-  %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f16, i32 0
-  ret <1 x i64> %vec
+  %fcvtas_scalar = tail call i64 @llvm.aarch64.neon.fcvtas.i64.f16(half %a)
+  %fcvtas_vector = insertelement <2 x i64> poison, i64 %fcvtas_scalar, i32 0
+  ret <2 x i64> %fcvtas_vector
+}
+
+define <2 x i64> @fcvtas_v2i64_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtas_v2i64_from_f32_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas d0, s0
+; CHECK-NEXT:    ret
+  %fcvtas_scalar = call i64 @llvm.aarch64.neon.fcvtas.i64.f32(float %A)
+  %fcvtas_vector = insertelement <2 x i64> poison, i64 %fcvtas_scalar, i32 0
+  ret <2 x i64> %fcvtas_vector
 }
 
-define  <1 x i64> @fcvtas_1d1d_scalar_to_vector_simd(double %a) {
-; CHECK-LABEL: fcvtas_1d1d_scalar_to_vector_simd:
+define <2 x i64> @fcvtas_v2i64_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtas_v2i64_from_f64_scalar_to_vector_simd:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvtas d0, d0
 ; CHECK-NEXT:    ret
-  %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtas.i64.f64(double %a)
-  %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f64, i32 0
-  ret <1 x i64> %vec
+  %fcvtas_scalar = tail call i64 @llvm.aarch64.neon.fcvtas.i64.f64(double %a)
+  %fcvtas_vector = insertelement <2 x i64> poison, i64 %fcvtas_scalar, i32 0
+  ret <2 x i64> %fcvtas_vector
 }
 
+define <2 x i32> @fcvtau_v2i32_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtau_v2i32_from_f16_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtau s0, h0
+; CHECK-NEXT:    ret
+  %fcvtau_scalar = tail call i32 @llvm.aarch64.neon.fcvtau.i32.f16(half %a)
+  %fcvtau_vector = insertelement <2 x i32> poison, i32 %fcvtau_scalar, i32 0
+  ret <2 x i32> %fcvtau_vector
+}
 
+define <2 x i32> @fcvtau_v2i32_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtau_v2i32_from_f32_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtau s0, s0
+; CHECK-NEXT:    ret
+  %fcvtau_scalar = call i32 @llvm.aarch64.neon.fcvtau.i32.f32(float %A)
+  %fcvtau_vector = insertelement <2 x i32> poison, i32 %fcvtau_scalar, i32 0
+  ret <2 x i32> %fcvtau_vector
+}
 
-define <1 x i64> @fcvtau_1d1s_scalar_to_vector_simd(float %A) nounwind {
-; CHECK-LABEL: fcvtau_1d1s_scalar_to_vector_simd:
+define <2 x i32> @fcvtau_v2i32_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtau_v2i32_from_f64_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtau s0, d0
+; CHECK-NEXT:    ret
+  %fcvtau_scalar = tail call i32 @llvm.aarch64.neon.fcvtau.i32.f64(double %a)
+  %fcvtau_vector = insertelement <2 x i32> poison, i32 %fcvtau_scalar, i32 0
+  ret <2 x i32> %fcvtau_vector
+}
+
+define <4 x i32> @fcvtau_v4i32_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtau_v4i32_from_f16_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtau s0, h0
+; CHECK-NEXT:    ret
+  %fcvtau_scalar = tail call i32 @llvm.aarch64.neon.fcvtau.i32.f16(half %a)
+  %fcvtau_vector = insertelement <4 x i32> poison, i32 %fcvtau_scalar, i32 0
+  ret <4 x i32> %fcvtau_vector
+}
+
+define <4 x i32> @fcvtau_v4i32_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtau_v4i32_from_f32_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtau s0, s0
+; CHECK-NEXT:    ret
+  %fcvtau_scalar = call i32 @llvm.aarch64.neon.fcvtau.i32.f32(float %A)
+  %fcvtau_vector = insertelement <4 x i32> poison, i32 %fcvtau_scalar, i32 0
+  ret <4 x i32> %fcvtau_vector
+}
+
+define <4 x i32> @fcvtau_v4i32_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtau_v4i32_from_f64_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtau s0, d0
+; CHECK-NEXT:    ret
+  %fcvtau_scalar = tail call i32 @llvm.aarch64.neon.fcvtau.i32.f64(double %a)
+  %fcvtau_vector = insertelement <4 x i32> poison, i32 %fcvtau_scalar, i32 0
+  ret <4 x i32> %fcvtau_vector
+}
+
+define <1 x i64> @fcvtau_v1i64_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtau_v1i64_from_f16_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtau d0, h0
+; CHECK-NEXT:    ret
+  %fcvtau_scalar = tail call i64 @llvm.aarch64.neon.fcvtau.i64.f16(half %a)
+  %fcvtau_vector = insertelement <1 x i64> poison, i64 %fcvtau_scalar, i32 0
+  ret <1 x i64> %fcvtau_vector
+}
+
+define <1 x i64> @fcvtau_v1i64_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtau_v1i64_from_f32_scalar_to_vector_simd:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvtau d0, s0
 ; CHECK-NEXT:    ret
-  %i = call i64 @llvm.aarch64.neon.fcvtau.i64.f32(float %A)
-  %vec = insertelement <1 x i64> poison, i64 %i, i32 0
-  ret <1 x i64> %vec
+  %fcvtau_scalar = call i64 @llvm.aarch64.neon.fcvtau.i64.f32(float %A)
+  %fcvtau_vector = insertelement <1 x i64> poison, i64 %fcvtau_scalar, i32 0
+  ret <1 x i64> %fcvtau_vector
 }
 
+define <1 x i64> @fcvtau_v1i64_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtau_v1i64_from_f64_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtau d0, d0
+; CHECK-NEXT:    ret
+  %fcvtau_scalar = tail call i64 @llvm.aarch64.neon.fcvtau.i64.f64(double %a)
+  %fcvtau_vector = insertelement <1 x i64> poison, i64 %fcvtau_scalar, i32 0
+  ret <1 x i64> %fcvtau_vector
+}
 
-define  <1 x i64> @fcvtau_1d1h_scalar_to_vector_simd(half %a) {
-; CHECK-LABEL: fcvtau_1d1h_scalar_to_vector_simd:
+define <2 x i64> @fcvtau_v2i64_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtau_v2i64_from_f16_scalar_to_vector_simd:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvtau d0, h0
 ; CHECK-NEXT:    ret
-  %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtau.i64.f16(half %a)
-  %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f16, i32 0
-  ret <1 x i64> %vec
+  %fcvtau_scalar = tail call i64 @llvm.aarch64.neon.fcvtau.i64.f16(half %a)
+  %fcvtau_vector = insertelement <2 x i64> poison, i64 %fcvtau_scalar, i32 0
+  ret <2 x i64> %fcvtau_vector
 }
 
-define  <1 x i64> @fcvtau_1d1d_scalar_to_vector_simd(double %a) {
-; CHECK-LABEL: fcvtau_1d1d_scalar_to_vector_simd:
+define <2 x i64> @fcvtau_v2i64_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtau_v2i64_from_f32_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtau d0, s0
+; CHECK-NEXT:    ret
+  %fcvtau_scalar = call i64 @llvm.aarch64.neon.fcvtau.i64.f32(float %A)
+  %fcvtau_vector = insertelement <2 x i64> poison, i64 %fcvtau_scalar, i32 0
+  ret <2 x i64> %fcvtau_vector
+}
+
+define <2 x i64> @fcvtau_v2i64_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtau_v2i64_from_f64_scalar_to_vector_simd:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvtau d0, d0
 ; CHECK-NEXT:    ret
-  %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtau.i64.f64(double %a)
-  %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f64, i32 0
-  ret <1 x i64> %vec
+  %fcvtau_scalar = tail call i64 @llvm.aarch64.neon.fcvtau.i64.f64(double %a)
+  %fcvtau_vector = insertelement <2 x i64> poison, i64 %fcvtau_scalar, i32 0
+  ret <2 x i64> %fcvtau_vector
+}
+
+define <2 x i32> @fcvtms_v2i32_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtms_v2i32_from_f16_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtms s0, h0
+; CHECK-NEXT:    ret
+  %fcvtms_scalar = tail call i32 @llvm.aarch64.neon.fcvtms.i32.f16(half %a)
+  %fcvtms_vector = insertelement <2 x i32> poison, i32 %fcvtms_scalar, i32 0
+  ret <2 x i32> %fcvtms_vector
+}
+
+define <2 x i32> @fcvtms_v2i32_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtms_v2i32_from_f32_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtms s0, s0
+; CHECK-NEXT:    ret
+  %fcvtms_scalar = call i32 @llvm.aarch64.neon.fcvtms.i32.f32(float %A)
+  %fcvtms_vector = insertelement <2 x i32> poison, i32 %fcvtms_scalar, i32 0
+  ret <2 x i32> %fcvtms_vector
+}
+
+define <2 x i32> @fcvtms_v2i32_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtms_v2i32_from_f64_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtms s0, d0
+; CHECK-NEXT:    ret
+  %fcvtms_scalar = tail call i32 @llvm.aarch64.neon.fcvtms.i32.f64(double %a)
+  %fcvtms_vector = insertelement <2 x i32> poison, i32 %fcvtms_scalar, i32 0
+  ret <2 x i32> %fcvtms_vector
+}
+
+define <4 x i32> @fcvtms_v4i32_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtms_v4i32_from_f16_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtms s0, h0
+; CHECK-NEXT:    ret
+  %fcvtms_scalar = tail call i32 @llvm.aarch64.neon.fcvtms.i32.f16(half %a)
+  %fcvtms_vector = insertelement <4 x i32> poison, i32 %fcvtms_scalar, i32 0
+  ret <4 x i32> %fcvtms_vector
+}
+
+define <4 x i32> @fcvtms_v4i32_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtms_v4i32_from_f32_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtms s0, s0
+; CHECK-NEXT:    ret
+  %fcvtms_scalar = call i32 @llvm.aarch64.neon.fcvtms.i32.f32(float %A)
+  %fcvtms_vector = insertelement <4 x i32> poison, i32 %fcvtms_scalar, i32 0
+  ret <4 x i32> %fcvtms_vector
 }
 
+define <4 x i32> @fcvtms_v4i32_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtms_v4i32_from_f64_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtms s0, d0
+; CHECK-NEXT:    ret
+  %fcvtms_scalar = tail call i32 @llvm.aarch64.neon.fcvtms.i32.f64(double %a)
+  %fcvtms_vector = insertelement <4 x i32> poison, i32 %fcvtms_scalar, i32 0
+  ret <4 x i32> %fcvtms_vector
+}
 
+define <1 x i64> @fcvtms_v1i64_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtms_v1i64_from_f16_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtms d0, h0
+; CHECK-NEXT:    ret
+  %fcvtms_scalar = tail call i64 @llvm.aarch64.neon.fcvtms.i64.f16(half %a)
+  %fcvtms_vector = insertelement <1 x i64> poison, i64 %fcvtms_scalar, i32 0
+  ret <1 x i64> %fcvtms_vector
+}
 
-define <1 x i64> @fcvtms_1d1s_scalar_to_vector_simd(float %A) nounwind {
-; CHECK-LABEL: fcvtms_1d1s_scalar_to_vector_simd:
+define <1 x i64> @fcvtms_v1i64_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtms_v1i64_from_f32_scalar_to_vector_simd:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvtms d0, s0
 ; CHECK-NEXT:    ret
-  %i = call i64 @llvm.aarch64.neon.fcvtms.i64.f32(float %A)
-  %vec = insertelement <1 x i64> poison, i64 %i, i32 0
-  ret <1 x i64> %vec
+  %fcvtms_scalar = call i64 @llvm.aarch64.neon.fcvtms.i64.f32(float %A)
+  %fcvtms_vector = insertelement <1 x i64> poison, i64 %fcvtms_scalar, i32 0
+  ret <1 x i64> %fcvtms_vector
 }
 
+define <1 x i64> @fcvtms_v1i64_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtms_v1i64_from_f64_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtms d0, d0
+; CHECK-NEXT:    ret
+  %fcvtms_scalar = tail call i64 @llvm.aarch64.neon.fcvtms.i64.f64(double %a)
+  %fcvtms_vector = insertelement <1 x i64> poison, i64 %fcvtms_scalar, i32 0
+  ret <1 x i64> %fcvtms_vector
+}
 
-define  <1 x i64> @fcvtms_1d1h_scalar_to_vector_simd(half %a) {
-; CHECK-LABEL: fcvtms_1d1h_scalar_to_vector_simd:
+define <2 x i64> @fcvtms_v2i64_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtms_v2i64_from_f16_scalar_to_vector_simd:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvtms d0, h0
 ; CHECK-NEXT:    ret
-  %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtms.i64.f16(half %a)
-  %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f16, i32 0
-  ret <1 x i64> %vec
+  %fcvtms_scalar = tail call i64 @llvm.aarch64.neon.fcvtms.i64.f16(half %a)
+  %fcvtms_vector = insertelement <2 x i64> poison, i64 %fcvtms_scalar, i32 0
+  ret <2 x i64> %fcvtms_vector
+}
+
+define <2 x i64> @fcvtms_v2i64_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtms_v2i64_from_f32_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtms d0, s0
+; CHECK-NEXT:    ret
+  %fcvtms_scalar = call i64 @llvm.aarch64.neon.fcvtms.i64.f32(float %A)
+  %fcvtms_vector = insertelement <2 x i64> poison, i64 %fcvtms_scalar, i32 0
+  ret <2 x i64> %fcvtms_vector
 }
 
-define  <1 x i64> @fcvtms_1d1d_scalar_to_vector_simd(double %a) {
-; CHECK-LABEL: fcvtms_1d1d_scalar_to_vector_simd:
+define <2 x i64> @fcvtms_v2i64_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtms_v2i64_from_f64_scalar_to_vector_simd:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvtms d0, d0
 ; CHECK-NEXT:    ret
-  %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtms.i64.f64(double %a)
-  %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f64, i32 0
-  ret <1 x i64> %vec
+  %fcvtms_scalar = tail call i64 @llvm.aarch64.neon.fcvtms.i64.f64(double %a)
+  %fcvtms_vector = insertelement <2 x i64> poison, i64 %fcvtms_scalar, i32 0
+  ret <2 x i64> %fcvtms_vector
 }
 
+define <2 x i32> @fcvtmu_v2i32_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtmu_v2i32_from_f16_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtmu s0, h0
+; CHECK-NEXT:    ret
+  %fcvtmu_scalar = tail call i32 @llvm.aarch64.neon.fcvtmu.i32.f16(half %a)
+  %fcvtmu_vector = insertelement <2 x i32> poison, i32 %fcvtmu_scalar, i32 0
+  ret <2 x i32> %fcvtmu_vector
+}
 
+define <2 x i32> @fcvtmu_v2i32_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtmu_v2i32_from_f32_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtmu s0, s0
+; CHECK-NEXT:    ret
+  %fcvtmu_scalar = call i32 @llvm.aarch64.neon.fcvtmu.i32.f32(float %A)
+  %fcvtmu_vector = insertelement <2 x i32> poison, i32 %fcvtmu_scalar, i32 0
+  ret <2 x i32> %fcvtmu_vector
+}
+
+define <2 x i32> @fcvtmu_v2i32_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtmu_v2i32_from_f64_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtmu s0, d0
+; CHECK-NEXT:    ret
+  %fcvtmu_scalar = tail call i32 @llvm.aarch64.neon.fcvtmu.i32.f64(double %a)
+  %fcvtmu_vector = insertelement <2 x i32> poison, i32 %fcvtmu_scalar, i32 0
+  ret <2 x i32> %fcvtmu_vector
+}
+
+define <4 x i32> @fcvtmu_v4i32_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtmu_v4i32_from_f16_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtmu s0, h0
+; CHECK-NEXT:    ret
+  %fcvtmu_scalar = tail call i32 @llvm.aarch64.neon.fcvtmu.i32.f16(half %a)
+  %fcvtmu_vector = insertelement <4 x i32> poison, i32 %fcvtmu_scalar, i32 0
+  ret <4 x i32> %fcvtmu_vector
+}
+
+define <4 x i32> @fcvtmu_v4i32_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtmu_v4i32_from_f32_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtmu s0, s0
+; CHECK-NEXT:    ret
+  %fcvtmu_scalar = call i32 @llvm.aarch64.neon.fcvtmu.i32.f32(float %A)
+  %fcvtmu_vector = insertelement <4 x i32> poison, i32 %fcvtmu_scalar, i32 0
+  ret <4 x i32> %fcvtmu_vector
+}
+
+define <4 x i32> @fcvtmu_v4i32_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtmu_v4i32_from_f64_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtmu s0, d0
+; CHECK-NEXT:    ret
+  %fcvtmu_scalar = tail call i32 @llvm.aarch64.neon.fcvtmu.i32.f64(double %a)
+  %fcvtmu_vector = insertelement <4 x i32> poison, i32 %fcvtmu_scalar, i32 0
+  ret <4 x i32> %fcvtmu_vector
+}
+
+define <1 x i64> @fcvtmu_v1i64_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtmu_v1i64_from_f16_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtmu d0, h0
+; CHECK-NEXT:    ret
+  %fcvtmu_scalar = tail call i64 @llvm.aarch64.neon.fcvtmu.i64.f16(half %a)
+  %fcvtmu_vector = insertelement <1 x i64> poison, i64 %fcvtmu_scalar, i32 0
+  ret <1 x i64> %fcvtmu_vector
+}
 
-define <1 x i64> @fcvtmu_1d1s_scalar_to_vector_simd(float %A) nounwind {
-; CHECK-LABEL: fcvtmu_1d1s_scalar_to_vector_simd:
+define <1 x i64> @fcvtmu_v1i64_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtmu_v1i64_from_f32_scalar_to_vector_simd:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvtmu d0, s0
 ; CHECK-NEXT:    ret
-  %i = call i64 @llvm.aarch64.neon.fcvtmu.i64.f32(float %A)
-  %vec = insertelement <1 x i64> poison, i64 %i, i32 0
-  ret <1 x i64> %vec
+  %fcvtmu_scalar = call i64 @llvm.aarch64.neon.fcvtmu.i64.f32(float %A)
+  %fcvtmu_vector = insertelement <1 x i64> poison, i64 %fcvtmu_scalar, i32 0
+  ret <1 x i64> %fcvtmu_vector
 }
 
+define <1 x i64> @fcvtmu_v1i64_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtmu_v1i64_from_f64_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtmu d0, d0
+; CHECK-NEXT:    ret
+  %fcvtmu_scalar = tail call i64 @llvm.aarch64.neon.fcvtmu.i64.f64(double %a)
+  %fcvtmu_vector = insertelement <1 x i64> poison, i64 %fcvtmu_scalar, i32 0
+  ret <1 x i64> %fcvtmu_vector
+}
 
-define  <1 x i64> @fcvtmu_1d1h_scalar_to_vector_simd(half %a) {
-; CHECK-LABEL: fcvtmu_1d1h_scalar_to_vector_simd:
+define <2 x i64> @fcvtmu_v2i64_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtmu_v2i64_from_f16_scalar_to_vector_simd:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvtmu d0, h0
 ; CHECK-NEXT:    ret
-  %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtmu.i64.f16(half %a)
-  %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f16, i32 0
-  ret <1 x i64> %vec
+  %fcvtmu_scalar = tail call i64 @llvm.aarch64.neon.fcvtmu.i64.f16(half %a)
+  %fcvtmu_vector = insertelement <2 x i64> poison, i64 %fcvtmu_scalar, i32 0
+  ret <2 x i64> %fcvtmu_vector
+}
+
+define <2 x i64> @fcvtmu_v2i64_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtmu_v2i64_from_f32_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtmu d0, s0
+; CHECK-NEXT:    ret
+  %fcvtmu_scalar = call i64 @llvm.aarch64.neon.fcvtmu.i64.f32(float %A)
+  %fcvtmu_vector = insertelement <2 x i64> poison, i64 %fcvtmu_scalar, i32 0
+  ret <2 x i64> %fcvtmu_vector
 }
 
-define  <1 x i64> @fcvtmu_1d1d_scalar_to_vector_simd(double %a) {
-; CHECK-LABEL: fcvtmu_1d1d_scalar_to_vector_simd:
+define <2 x i64> @fcvtmu_v2i64_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtmu_v2i64_from_f64_scalar_to_vector_simd:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvtmu d0, d0
 ; CHECK-NEXT:    ret
-  %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtmu.i64.f64(double %a)
-  %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f64, i32 0
-  ret <1 x i64> %vec
+  %fcvtmu_scalar = tail call i64 @llvm.aarch64.neon.fcvtmu.i64.f64(double %a)
+  %fcvtmu_vector = insertelement <2 x i64> poison, i64 %fcvtmu_scalar, i32 0
+  ret <2 x i64> %fcvtmu_vector
+}
+
+define <2 x i32> @fcvtns_v2i32_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtns_v2i32_from_f16_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtns s0, h0
+; CHECK-NEXT:    ret
+  %fcvtns_scalar = tail call i32 @llvm.aarch64.neon.fcvtns.i32.f16(half %a)
+  %fcvtns_vector = insertelement <2 x i32> poison, i32 %fcvtns_scalar, i32 0
+  ret <2 x i32> %fcvtns_vector
+}
+
+define <2 x i32> @fcvtns_v2i32_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtns_v2i32_from_f32_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtns s0, s0
+; CHECK-NEXT:    ret
+  %fcvtns_scalar = call i32 @llvm.aarch64.neon.fcvtns.i32.f32(float %A)
+  %fcvtns_vector = insertelement <2 x i32> poison, i32 %fcvtns_scalar, i32 0
+  ret <2 x i32> %fcvtns_vector
+}
+
+define <2 x i32> @fcvtns_v2i32_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtns_v2i32_from_f64_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtns s0, d0
+; CHECK-NEXT:    ret
+  %fcvtns_scalar = tail call i32 @llvm.aarch64.neon.fcvtns.i32.f64(double %a)
+  %fcvtns_vector = insertelement <2 x i32> poison, i32 %fcvtns_scalar, i32 0
+  ret <2 x i32> %fcvtns_vector
+}
+
+define <4 x i32> @fcvtns_v4i32_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtns_v4i32_from_f16_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtns s0, h0
+; CHECK-NEXT:    ret
+  %fcvtns_scalar = tail call i32 @llvm.aarch64.neon.fcvtns.i32.f16(half %a)
+  %fcvtns_vector = insertelement <4 x i32> poison, i32 %fcvtns_scalar, i32 0
+  ret <4 x i32> %fcvtns_vector
+}
+
+define <4 x i32> @fcvtns_v4i32_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtns_v4i32_from_f32_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtns s0, s0
+; CHECK-NEXT:    ret
+  %fcvtns_scalar = call i32 @llvm.aarch64.neon.fcvtns.i32.f32(float %A)
+  %fcvtns_vector = insertelement <4 x i32> poison, i32 %fcvtns_scalar, i32 0
+  ret <4 x i32> %fcvtns_vector
 }
 
+define <4 x i32> @fcvtns_v4i32_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtns_v4i32_from_f64_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtns s0, d0
+; CHECK-NEXT:    ret
+  %fcvtns_scalar = tail call i32 @llvm.aarch64.neon.fcvtns.i32.f64(double %a)
+  %fcvtns_vector = insertelement <4 x i32> poison, i32 %fcvtns_scalar, i32 0
+  ret <4 x i32> %fcvtns_vector
+}
 
+define <1 x i64> @fcvtns_v1i64_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtns_v1i64_from_f16_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtns d0, h0
+; CHECK-NEXT:    ret
+  %fcvtns_scalar = tail call i64 @llvm.aarch64.neon.fcvtns.i64.f16(half %a)
+  %fcvtns_vector = insertelement <1 x i64> poison, i64 %fcvtns_scalar, i32 0
+  ret <1 x i64> %fcvtns_vector
+}
 
-define <1 x i64> @fcvtns_1d1s_scalar_to_vector_simd(float %A) nounwind {
-; CHECK-LABEL: fcvtns_1d1s_scalar_to_vector_simd:
+define <1 x i64> @fcvtns_v1i64_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtns_v1i64_from_f32_scalar_to_vector_simd:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvtns d0, s0
 ; CHECK-NEXT:    ret
-  %i = call i64 @llvm.aarch64.neon.fcvtns.i64.f32(float %A)
-  %vec = insertelement <1 x i64> poison, i64 %i, i32 0
-  ret <1 x i64> %vec
+  %fcvtns_scalar = call i64 @llvm.aarch64.neon.fcvtns.i64.f32(float %A)
+  %fcvtns_vector = insertelement <1 x i64> poison, i64 %fcvtns_scalar, i32 0
+  ret <1 x i64> %fcvtns_vector
 }
 
+define <1 x i64> @fcvtns_v1i64_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtns_v1i64_from_f64_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtns d0, d0
+; CHECK-NEXT:    ret
+  %fcvtns_scalar = tail call i64 @llvm.aarch64.neon.fcvtns.i64.f64(double %a)
+  %fcvtns_vector = insertelement <1 x i64> poison, i64 %fcvtns_scalar, i32 0
+  ret <1 x i64> %fcvtns_vector
+}
 
-define  <1 x i64> @fcvtns_1d1h_scalar_to_vector_simd(half %a) {
-; CHECK-LABEL: fcvtns_1d1h_scalar_to_vector_simd:
+define <2 x i64> @fcvtns_v2i64_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtns_v2i64_from_f16_scalar_to_vector_simd:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvtns d0, h0
 ; CHECK-NEXT:    ret
-  %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtns.i64.f16(half %a)
-  %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f16, i32 0
-  ret <1 x i64> %vec
+  %fcvtns_scalar = tail call i64 @llvm.aarch64.neon.fcvtns.i64.f16(half %a)
+  %fcvtns_vector = insertelement <2 x i64> poison, i64 %fcvtns_scalar, i32 0
+  ret <2 x i64> %fcvtns_vector
+}
+
+define <2 x i64> @fcvtns_v2i64_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtns_v2i64_from_f32_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtns d0, s0
+; CHECK-NEXT:    ret
+  %fcvtns_scalar = call i64 @llvm.aarch64.neon.fcvtns.i64.f32(float %A)
+  %fcvtns_vector = insertelement <2 x i64> poison, i64 %fcvtns_scalar, i32 0
+  ret <2 x i64> %fcvtns_vector
 }
 
-define  <1 x i64> @fcvtns_1d1d_scalar_to_vector_simd(double %a) {
-; CHECK-LABEL: fcvtns_1d1d_scalar_to_vector_simd:
+define <2 x i64> @fcvtns_v2i64_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtns_v2i64_from_f64_scalar_to_vector_simd:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvtns d0, d0
 ; CHECK-NEXT:    ret
-  %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtns.i64.f64(double %a)
-  %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f64, i32 0
-  ret <1 x i64> %vec
+  %fcvtns_scalar = tail call i64 @llvm.aarch64.neon.fcvtns.i64.f64(double %a)
+  %fcvtns_vector = insertelement <2 x i64> poison, i64 %fcvtns_scalar, i32 0
+  ret <2 x i64> %fcvtns_vector
+}
+
+define <2 x i32> @fcvtnu_v2i32_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtnu_v2i32_from_f16_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtnu s0, h0
+; CHECK-NEXT:    ret
+  %fcvtnu_scalar = tail call i32 @llvm.aarch64.neon.fcvtnu.i32.f16(half %a)
+  %fcvtnu_vector = insertelement <2 x i32> poison, i32 %fcvtnu_scalar, i32 0
+  ret <2 x i32> %fcvtnu_vector
 }
 
+define <2 x i32> @fcvtnu_v2i32_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtnu_v2i32_from_f32_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtnu s0, s0
+; CHECK-NEXT:    ret
+  %fcvtnu_scalar = call i32 @llvm.aarch64.neon.fcvtnu.i32.f32(float %A)
+  %fcvtnu_vector = insertelement <2 x i32> poison, i32 %fcvtnu_scalar, i32 0
+  ret <2 x i32> %fcvtnu_vector
+}
+
+define <2 x i32> @fcvtnu_v2i32_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtnu_v2i32_from_f64_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtnu s0, d0
+; CHECK-NEXT:    ret
+  %fcvtnu_scalar = tail call i32 @llvm.aarch64.neon.fcvtnu.i32.f64(double %a)
+  %fcvtnu_vector = insertelement <2 x i32> poison, i32 %fcvtnu_scalar, i32 0
+  ret <2 x i32> %fcvtnu_vector
+}
 
+define <4 x i32> @fcvtnu_v4i32_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtnu_v4i32_from_f16_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtnu s0, h0
+; CHECK-NEXT:    ret
+  %fcvtnu_scalar = tail call i32 @llvm.aarch64.neon.fcvtnu.i32.f16(half %a)
+  %fcvtnu_vector = insertelement <4 x i32> poison, i32 %fcvtnu_scalar, i32 0
+  ret <4 x i32> %fcvtnu_vector
+}
+
+define <4 x i32> @fcvtnu_v4i32_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtnu_v4i32_from_f32_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtnu s0, s0
+; CHECK-NEXT:    ret
+  %fcvtnu_scalar = call i32 @llvm.aarch64.neon.fcvtnu.i32.f32(float %A)
+  %fcvtnu_vector = insertelement <4 x i32> poison, i32 %fcvtnu_scalar, i32 0
+  ret <4 x i32> %fcvtnu_vector
+}
+
+define <4 x i32> @fcvtnu_v4i32_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtnu_v4i32_from_f64_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtnu s0, d0
+; CHECK-NEXT:    ret
+  %fcvtnu_scalar = tail call i32 @llvm.aarch64.neon.fcvtnu.i32.f64(double %a)
+  %fcvtnu_vector = insertelement <4 x i32> poison, i32 %fcvtnu_scalar, i32 0
+  ret <4 x i32> %fcvtnu_vector
+}
 
-define <1 x i64> @fcvtnu_1d1s_scalar_to_vector_simd(float %A) nounwind {
-; CHECK-LABEL: fcvtnu_1d1s_scalar_to_vector_simd:
+define <1 x i64> @fcvtnu_v1i64_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtnu_v1i64_from_f16_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtnu d0, h0
+; CHECK-NEXT:    ret
+  %fcvtnu_scalar = tail call i64 @llvm.aarch64.neon.fcvtnu.i64.f16(half %a)
+  %fcvtnu_vector = insertelement <1 x i64> poison, i64 %fcvtnu_scalar, i32 0
+  ret <1 x i64> %fcvtnu_vector
+}
+
+define <1 x i64> @fcvtnu_v1i64_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtnu_v1i64_from_f32_scalar_to_vector_simd:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvtnu d0, s0
 ; CHECK-NEXT:    ret
-  %i = call i64 @llvm.aarch64.neon.fcvtnu.i64.f32(float %A)
-  %vec = insertelement <1 x i64> poison, i64 %i, i32 0
-  ret <1 x i64> %vec
+  %fcvtnu_scalar = call i64 @llvm.aarch64.neon.fcvtnu.i64.f32(float %A)
+  %fcvtnu_vector = insertelement <1 x i64> poison, i64 %fcvtnu_scalar, i32 0
+  ret <1 x i64> %fcvtnu_vector
 }
 
+define <1 x i64> @fcvtnu_v1i64_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtnu_v1i64_from_f64_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtnu d0, d0
+; CHECK-NEXT:    ret
+  %fcvtnu_scalar = tail call i64 @llvm.aarch64.neon.fcvtnu.i64.f64(double %a)
+  %fcvtnu_vector = insertelement <1 x i64> poison, i64 %fcvtnu_scalar, i32 0
+  ret <1 x i64> %fcvtnu_vector
+}
 
-define  <1 x i64> @fcvtnu_1d1h_scalar_to_vector_simd(half %a) {
-; CHECK-LABEL: fcvtnu_1d1h_scalar_to_vector_simd:
+define <2 x i64> @fcvtnu_v2i64_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtnu_v2i64_from_f16_scalar_to_vector_simd:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvtnu d0, h0
 ; CHECK-NEXT:    ret
-  %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtnu.i64.f16(half %a)
-  %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f16, i32 0
-  ret <1 x i64> %vec
+  %fcvtnu_scalar = tail call i64 @llvm.aarch64.neon.fcvtnu.i64.f16(half %a)
+  %fcvtnu_vector = insertelement <2 x i64> poison, i64 %fcvtnu_scalar, i32 0
+  ret <2 x i64> %fcvtnu_vector
+}
+
+define <2 x i64> @fcvtnu_v2i64_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtnu_v2i64_from_f32_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtnu d0, s0
+; CHECK-NEXT:    ret
+  %fcvtnu_scalar = call i64 @llvm.aarch64.neon.fcvtnu.i64.f32(float %A)
+  %fcvtnu_vector = insertelement <2 x i64> poison, i64 %fcvtnu_scalar, i32 0
+  ret <2 x i64> %fcvtnu_vector
 }
 
-define  <1 x i64> @fcvtnu_1d1d_scalar_to_vector_simd(double %a) {
-; CHECK-LABEL: fcvtnu_1d1d_scalar_to_vector_simd:
+define <2 x i64> @fcvtnu_v2i64_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtnu_v2i64_from_f64_scalar_to_vector_simd:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvtnu d0, d0
 ; CHECK-NEXT:    ret
-  %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtnu.i64.f64(double %a)
-  %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f64, i32 0
-  ret <1 x i64> %vec
+  %fcvtnu_scalar = tail call i64 @llvm.aarch64.neon.fcvtnu.i64.f64(double %a)
+  %fcvtnu_vector = insertelement <2 x i64> poison, i64 %fcvtnu_scalar, i32 0
+  ret <2 x i64> %fcvtnu_vector
 }
 
+define <2 x i32> @fcvtps_v2i32_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtps_v2i32_from_f16_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtps s0, h0
+; CHECK-NEXT:    ret
+  %fcvtps_scalar = tail call i32 @llvm.aarch64.neon.fcvtps.i32.f16(half %a)
+  %fcvtps_vector = insertelement <2 x i32> poison, i32 %fcvtps_scalar, i32 0
+  ret <2 x i32> %fcvtps_vector
+}
 
+define <2 x i32> @fcvtps_v2i32_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtps_v2i32_from_f32_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtps s0, s0
+; CHECK-NEXT:    ret
+  %fcvtps_scalar = call i32 @llvm.aarch64.neon.fcvtps.i32.f32(float %A)
+  %fcvtps_vector = insertelement <2 x i32> poison, i32 %fcvtps_scalar, i32 0
+  ret <2 x i32> %fcvtps_vector
+}
 
-define <1 x i64> @fcvtps_1d1s_scalar_to_vector_simd(float %A) nounwind {
-; CHECK-LABEL: fcvtps_1d1s_scalar_to_vector_simd:
+define <2 x i32> @fcvtps_v2i32_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtps_v2i32_from_f64_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtps s0, d0
+; CHECK-NEXT:    ret
+  %fcvtps_scalar = tail call i32 @llvm.aarch64.neon.fcvtps.i32.f64(double %a)
+  %fcvtps_vector = insertelement <2 x i32> poison, i32 %fcvtps_scalar, i32 0
+  ret <2 x i32> %fcvtps_vector
+}
+
+define <4 x i32> @fcvtps_v4i32_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtps_v4i32_from_f16_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtps s0, h0
+; CHECK-NEXT:    ret
+  %fcvtps_scalar = tail call i32 @llvm.aarch64.neon.fcvtps.i32.f16(half %a)
+  %fcvtps_vector = insertelement <4 x i32> poison, i32 %fcvtps_scalar, i32 0
+  ret <4 x i32> %fcvtps_vector
+}
+
+define <4 x i32> @fcvtps_v4i32_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtps_v4i32_from_f32_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtps s0, s0
+; CHECK-NEXT:    ret
+  %fcvtps_scalar = call i32 @llvm.aarch64.neon.fcvtps.i32.f32(float %A)
+  %fcvtps_vector = insertelement <4 x i32> poison, i32 %fcvtps_scalar, i32 0
+  ret <4 x i32> %fcvtps_vector
+}
+
+define <4 x i32> @fcvtps_v4i32_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtps_v4i32_from_f64_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtps s0, d0
+; CHECK-NEXT:    ret
+  %fcvtps_scalar = tail call i32 @llvm.aarch64.neon.fcvtps.i32.f64(double %a)
+  %fcvtps_vector = insertelement <4 x i32> poison, i32 %fcvtps_scalar, i32 0
+  ret <4 x i32> %fcvtps_vector
+}
+
+define <1 x i64> @fcvtps_v1i64_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtps_v1i64_from_f16_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtps d0, h0
+; CHECK-NEXT:    ret
+  %fcvtps_scalar = tail call i64 @llvm.aarch64.neon.fcvtps.i64.f16(half %a)
+  %fcvtps_vector = insertelement <1 x i64> poison, i64 %fcvtps_scalar, i32 0
+  ret <1 x i64> %fcvtps_vector
+}
+
+define <1 x i64> @fcvtps_v1i64_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtps_v1i64_from_f32_scalar_to_vector_simd:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvtps d0, s0
 ; CHECK-NEXT:    ret
-  %i = call i64 @llvm.aarch64.neon.fcvtps.i64.f32(float %A)
-  %vec = insertelement <1 x i64> poison, i64 %i, i32 0
-  ret <1 x i64> %vec
+  %fcvtps_scalar = call i64 @llvm.aarch64.neon.fcvtps.i64.f32(float %A)
+  %fcvtps_vector = insertelement <1 x i64> poison, i64 %fcvtps_scalar, i32 0
+  ret <1 x i64> %fcvtps_vector
 }
 
+define <1 x i64> @fcvtps_v1i64_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtps_v1i64_from_f64_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtps d0, d0
+; CHECK-NEXT:    ret
+  %fcvtps_scalar = tail call i64 @llvm.aarch64.neon.fcvtps.i64.f64(double %a)
+  %fcvtps_vector = insertelement <1 x i64> poison, i64 %fcvtps_scalar, i32 0
+  ret <1 x i64> %fcvtps_vector
+}
 
-define  <1 x i64> @fcvtps_1d1h_scalar_to_vector_simd(half %a) {
-; CHECK-LABEL: fcvtps_1d1h_scalar_to_vector_simd:
+define <2 x i64> @fcvtps_v2i64_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtps_v2i64_from_f16_scalar_to_vector_simd:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvtps d0, h0
 ; CHECK-NEXT:    ret
-  %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtps.i64.f16(half %a)
-  %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f16, i32 0
-  ret <1 x i64> %vec
+  %fcvtps_scalar = tail call i64 @llvm.aarch64.neon.fcvtps.i64.f16(half %a)
+  %fcvtps_vector = insertelement <2 x i64> poison, i64 %fcvtps_scalar, i32 0
+  ret <2 x i64> %fcvtps_vector
 }
 
-define  <1 x i64> @fcvtps_1d1d_scalar_to_vector_simd(double %a) {
-; CHECK-LABEL: fcvtps_1d1d_scalar_to_vector_simd:
+define <2 x i64> @fcvtps_v2i64_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtps_v2i64_from_f32_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtps d0, s0
+; CHECK-NEXT:    ret
+  %fcvtps_scalar = call i64 @llvm.aarch64.neon.fcvtps.i64.f32(float %A)
+  %fcvtps_vector = insertelement <2 x i64> poison, i64 %fcvtps_scalar, i32 0
+  ret <2 x i64> %fcvtps_vector
+}
+
+define <2 x i64> @fcvtps_v2i64_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtps_v2i64_from_f64_scalar_to_vector_simd:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvtps d0, d0
 ; CHECK-NEXT:    ret
-  %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtps.i64.f64(double %a)
-  %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f64, i32 0
-  ret <1 x i64> %vec
+  %fcvtps_scalar = tail call i64 @llvm.aarch64.neon.fcvtps.i64.f64(double %a)
+  %fcvtps_vector = insertelement <2 x i64> poison, i64 %fcvtps_scalar, i32 0
+  ret <2 x i64> %fcvtps_vector
+}
+
+define <2 x i32> @fcvtpu_v2i32_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtpu_v2i32_from_f16_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtpu s0, h0
+; CHECK-NEXT:    ret
+  %fcvtpu_scalar = tail call i32 @llvm.aarch64.neon.fcvtpu.i32.f16(half %a)
+  %fcvtpu_vector = insertelement <2 x i32> poison, i32 %fcvtpu_scalar, i32 0
+  ret <2 x i32> %fcvtpu_vector
+}
+
+define <2 x i32> @fcvtpu_v2i32_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtpu_v2i32_from_f32_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtpu s0, s0
+; CHECK-NEXT:    ret
+  %fcvtpu_scalar = call i32 @llvm.aarch64.neon.fcvtpu.i32.f32(float %A)
+  %fcvtpu_vector = insertelement <2 x i32> poison, i32 %fcvtpu_scalar, i32 0
+  ret <2 x i32> %fcvtpu_vector
+}
+
+define <2 x i32> @fcvtpu_v2i32_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtpu_v2i32_from_f64_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtpu s0, d0
+; CHECK-NEXT:    ret
+  %fcvtpu_scalar = tail call i32 @llvm.aarch64.neon.fcvtpu.i32.f64(double %a)
+  %fcvtpu_vector = insertelement <2 x i32> poison, i32 %fcvtpu_scalar, i32 0
+  ret <2 x i32> %fcvtpu_vector
+}
+
+define <4 x i32> @fcvtpu_v4i32_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtpu_v4i32_from_f16_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtpu s0, h0
+; CHECK-NEXT:    ret
+  %fcvtpu_scalar = tail call i32 @llvm.aarch64.neon.fcvtpu.i32.f16(half %a)
+  %fcvtpu_vector = insertelement <4 x i32> poison, i32 %fcvtpu_scalar, i32 0
+  ret <4 x i32> %fcvtpu_vector
+}
+
+define <4 x i32> @fcvtpu_v4i32_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtpu_v4i32_from_f32_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtpu s0, s0
+; CHECK-NEXT:    ret
+  %fcvtpu_scalar = call i32 @llvm.aarch64.neon.fcvtpu.i32.f32(float %A)
+  %fcvtpu_vector = insertelement <4 x i32> poison, i32 %fcvtpu_scalar, i32 0
+  ret <4 x i32> %fcvtpu_vector
 }
 
+define <4 x i32> @fcvtpu_v4i32_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtpu_v4i32_from_f64_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtpu s0, d0
+; CHECK-NEXT:    ret
+  %fcvtpu_scalar = tail call i32 @llvm.aarch64.neon.fcvtpu.i32.f64(double %a)
+  %fcvtpu_vector = insertelement <4 x i32> poison, i32 %fcvtpu_scalar, i32 0
+  ret <4 x i32> %fcvtpu_vector
+}
 
+define <1 x i64> @fcvtpu_v1i64_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtpu_v1i64_from_f16_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtpu d0, h0
+; CHECK-NEXT:    ret
+  %fcvtpu_scalar = tail call i64 @llvm.aarch64.neon.fcvtpu.i64.f16(half %a)
+  %fcvtpu_vector = insertelement <1 x i64> poison, i64 %fcvtpu_scalar, i32 0
+  ret <1 x i64> %fcvtpu_vector
+}
 
-define <1 x i64> @fcvtpu_1d1s_scalar_to_vector_simd(float %A) nounwind {
-; CHECK-LABEL: fcvtpu_1d1s_scalar_to_vector_simd:
+define <1 x i64> @fcvtpu_v1i64_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtpu_v1i64_from_f32_scalar_to_vector_simd:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvtpu d0, s0
 ; CHECK-NEXT:    ret
-  %i = call i64 @llvm.aarch64.neon.fcvtpu.i64.f32(float %A)
-  %vec = insertelement <1 x i64> poison, i64 %i, i32 0
-  ret <1 x i64> %vec
+  %fcvtpu_scalar = call i64 @llvm.aarch64.neon.fcvtpu.i64.f32(float %A)
+  %fcvtpu_vector = insertelement <1 x i64> poison, i64 %fcvtpu_scalar, i32 0
+  ret <1 x i64> %fcvtpu_vector
 }
 
+define <1 x i64> @fcvtpu_v1i64_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtpu_v1i64_from_f64_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtpu d0, d0
+; CHECK-NEXT:    ret
+  %fcvtpu_scalar = tail call i64 @llvm.aarch64.neon.fcvtpu.i64.f64(double %a)
+  %fcvtpu_vector = insertelement <1 x i64> poison, i64 %fcvtpu_scalar, i32 0
+  ret <1 x i64> %fcvtpu_vector
+}
 
-define  <1 x i64> @fcvtpu_1d1h_scalar_to_vector_simd(half %a) {
-; CHECK-LABEL: fcvtpu_1d1h_scalar_to_vector_simd:
+define <2 x i64> @fcvtpu_v2i64_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtpu_v2i64_from_f16_scalar_to_vector_simd:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvtpu d0, h0
 ; CHECK-NEXT:    ret
-  %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtpu.i64.f16(half %a)
-  %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f16, i32 0
-  ret <1 x i64> %vec
+  %fcvtpu_scalar = tail call i64 @llvm.aarch64.neon.fcvtpu.i64.f16(half %a)
+  %fcvtpu_vector = insertelement <2 x i64> poison, i64 %fcvtpu_scalar, i32 0
+  ret <2 x i64> %fcvtpu_vector
+}
+
+define <2 x i64> @fcvtpu_v2i64_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtpu_v2i64_from_f32_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtpu d0, s0
+; CHECK-NEXT:    ret
+  %fcvtpu_scalar = call i64 @llvm.aarch64.neon.fcvtpu.i64.f32(float %A)
+  %fcvtpu_vector = insertelement <2 x i64> poison, i64 %fcvtpu_scalar, i32 0
+  ret <2 x i64> %fcvtpu_vector
 }
 
-define  <1 x i64> @fcvtpu_1d1d_scalar_to_vector_simd(double %a) {
-; CHECK-LABEL: fcvtpu_1d1d_scalar_to_vector_simd:
+define <2 x i64> @fcvtpu_v2i64_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtpu_v2i64_from_f64_scalar_to_vector_simd:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvtpu d0, d0
 ; CHECK-NEXT:    ret
-  %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtpu.i64.f64(double %a)
-  %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f64, i32 0
-  ret <1 x i64> %vec
+  %fcvtpu_scalar = tail call i64 @llvm.aarch64.neon.fcvtpu.i64.f64(double %a)
+  %fcvtpu_vector = insertelement <2 x i64> poison, i64 %fcvtpu_scalar, i32 0
+  ret <2 x i64> %fcvtpu_vector
 }
 
+define <2 x i32> @fcvtzs_v2i32_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtzs_v2i32_from_f16_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, h0
+; CHECK-NEXT:    ret
+  %fcvtzs_scalar = tail call i32 @llvm.aarch64.neon.fcvtzs.i32.f16(half %a)
+  %fcvtzs_vector = insertelement <2 x i32> poison, i32 %fcvtzs_scalar, i32 0
+  ret <2 x i32> %fcvtzs_vector
+}
 
+define <2 x i32> @fcvtzs_v2i32_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtzs_v2i32_from_f32_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, s0
+; CHECK-NEXT:    ret
+  %fcvtzs_scalar = call i32 @llvm.aarch64.neon.fcvtzs.i32.f32(float %A)
+  %fcvtzs_vector = insertelement <2 x i32> poison, i32 %fcvtzs_scalar, i32 0
+  ret <2 x i32> %fcvtzs_vector
+}
+
+define <2 x i32> @fcvtzs_v2i32_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtzs_v2i32_from_f64_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, d0
+; CHECK-NEXT:    ret
+  %fcvtzs_scalar = tail call i32 @llvm.aarch64.neon.fcvtzs.i32.f64(double %a)
+  %fcvtzs_vector = insertelement <2 x i32> poison, i32 %fcvtzs_scalar, i32 0
+  ret <2 x i32> %fcvtzs_vector
+}
+
+define <4 x i32> @fcvtzs_v4i32_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtzs_v4i32_from_f16_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, h0
+; CHECK-NEXT:    ret
+  %fcvtzs_scalar = tail call i32 @llvm.aarch64.neon.fcvtzs.i32.f16(half %a)
+  %fcvtzs_vector = insertelement <4 x i32> poison, i32 %fcvtzs_scalar, i32 0
+  ret <4 x i32> %fcvtzs_vector
+}
+
+define <4 x i32> @fcvtzs_v4i32_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtzs_v4i32_from_f32_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, s0
+; CHECK-NEXT:    ret
+  %fcvtzs_scalar = call i32 @llvm.aarch64.neon.fcvtzs.i32.f32(float %A)
+  %fcvtzs_vector = insertelement <4 x i32> poison, i32 %fcvtzs_scalar, i32 0
+  ret <4 x i32> %fcvtzs_vector
+}
+
+define <4 x i32> @fcvtzs_v4i32_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtzs_v4i32_from_f64_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, d0
+; CHECK-NEXT:    ret
+  %fcvtzs_scalar = tail call i32 @llvm.aarch64.neon.fcvtzs.i32.f64(double %a)
+  %fcvtzs_vector = insertelement <4 x i32> poison, i32 %fcvtzs_scalar, i32 0
+  ret <4 x i32> %fcvtzs_vector
+}
 
-define <1 x i64> @fcvtzs_1d1s_scalar_to_vector_simd(float %A) nounwind {
-; CHECK-LABEL: fcvtzs_1d1s_scalar_to_vector_simd:
+define <1 x i64> @fcvtzs_v1i64_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtzs_v1i64_from_f16_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, h0
+; CHECK-NEXT:    ret
+  %fcvtzs_scalar = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f16(half %a)
+  %fcvtzs_vector = insertelement <1 x i64> poison, i64 %fcvtzs_scalar, i32 0
+  ret <1 x i64> %fcvtzs_vector
+}
+
+define <1 x i64> @fcvtzs_v1i64_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtzs_v1i64_from_f32_scalar_to_vector_simd:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvtzs d0, s0
 ; CHECK-NEXT:    ret
-  %i = call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %A)
-  %vec = insertelement <1 x i64> poison, i64 %i, i32 0
-  ret <1 x i64> %vec
+  %fcvtzs_scalar = call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %A)
+  %fcvtzs_vector = insertelement <1 x i64> poison, i64 %fcvtzs_scalar, i32 0
+  ret <1 x i64> %fcvtzs_vector
 }
 
+define <1 x i64> @fcvtzs_v1i64_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtzs_v1i64_from_f64_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, d0
+; CHECK-NEXT:    ret
+  %fcvtzs_scalar = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f64(double %a)
+  %fcvtzs_vector = insertelement <1 x i64> poison, i64 %fcvtzs_scalar, i32 0
+  ret <1 x i64> %fcvtzs_vector
+}
 
-define  <1 x i64> @fcvtzs_1d1h_scalar_to_vector_simd(half %a) {
-; CHECK-LABEL: fcvtzs_1d1h_scalar_to_vector_simd:
+define <2 x i64> @fcvtzs_v2i64_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtzs_v2i64_from_f16_scalar_to_vector_simd:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvtzs d0, h0
 ; CHECK-NEXT:    ret
-  %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f16(half %a)
-  %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f16, i32 0
-  ret <1 x i64> %vec
+  %fcvtzs_scalar = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f16(half %a)
+  %fcvtzs_vector = insertelement <2 x i64> poison, i64 %fcvtzs_scalar, i32 0
+  ret <2 x i64> %fcvtzs_vector
+}
+
+define <2 x i64> @fcvtzs_v2i64_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtzs_v2i64_from_f32_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, s0
+; CHECK-NEXT:    ret
+  %fcvtzs_scalar = call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %A)
+  %fcvtzs_vector = insertelement <2 x i64> poison, i64 %fcvtzs_scalar, i32 0
+  ret <2 x i64> %fcvtzs_vector
 }
 
-define  <1 x i64> @fcvtzs_1d1d_scalar_to_vector_simd(double %a) {
-; CHECK-LABEL: fcvtzs_1d1d_scalar_to_vector_simd:
+define <2 x i64> @fcvtzs_v2i64_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtzs_v2i64_from_f64_scalar_to_vector_simd:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvtzs d0, d0
 ; CHECK-NEXT:    ret
-  %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f64(double %a)
-  %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f64, i32 0
-  ret <1 x i64> %vec
+  %fcvtzs_scalar = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f64(double %a)
+  %fcvtzs_vector = insertelement <2 x i64> poison, i64 %fcvtzs_scalar, i32 0
+  ret <2 x i64> %fcvtzs_vector
+}
+
+define <2 x i32> @fcvtzu_v2i32_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtzu_v2i32_from_f16_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu s0, h0
+; CHECK-NEXT:    ret
+  %fcvtzu_scalar = tail call i32 @llvm.aarch64.neon.fcvtzu.i32.f16(half %a)
+  %fcvtzu_vector = insertelement <2 x i32> poison, i32 %fcvtzu_scalar, i32 0
+  ret <2 x i32> %fcvtzu_vector
+}
+
+define <2 x i32> @fcvtzu_v2i32_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtzu_v2i32_from_f32_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu s0, s0
+; CHECK-NEXT:    ret
+  %fcvtzu_scalar = call i32 @llvm.aarch64.neon.fcvtzu.i32.f32(float %A)
+  %fcvtzu_vector = insertelement <2 x i32> poison, i32 %fcvtzu_scalar, i32 0
+  ret <2 x i32> %fcvtzu_vector
+}
+
+define <2 x i32> @fcvtzu_v2i32_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtzu_v2i32_from_f64_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu s0, d0
+; CHECK-NEXT:    ret
+  %fcvtzu_scalar = tail call i32 @llvm.aarch64.neon.fcvtzu.i32.f64(double %a)
+  %fcvtzu_vector = insertelement <2 x i32> poison, i32 %fcvtzu_scalar, i32 0
+  ret <2 x i32> %fcvtzu_vector
+}
+
+define <4 x i32> @fcvtzu_v4i32_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtzu_v4i32_from_f16_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu s0, h0
+; CHECK-NEXT:    ret
+  %fcvtzu_scalar = tail call i32 @llvm.aarch64.neon.fcvtzu.i32.f16(half %a)
+  %fcvtzu_vector = insertelement <4 x i32> poison, i32 %fcvtzu_scalar, i32 0
+  ret <4 x i32> %fcvtzu_vector
+}
+
+define <4 x i32> @fcvtzu_v4i32_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtzu_v4i32_from_f32_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu s0, s0
+; CHECK-NEXT:    ret
+  %fcvtzu_scalar = call i32 @llvm.aarch64.neon.fcvtzu.i32.f32(float %A)
+  %fcvtzu_vector = insertelement <4 x i32> poison, i32 %fcvtzu_scalar, i32 0
+  ret <4 x i32> %fcvtzu_vector
 }
 
+define <4 x i32> @fcvtzu_v4i32_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtzu_v4i32_from_f64_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu s0, d0
+; CHECK-NEXT:    ret
+  %fcvtzu_scalar = tail call i32 @llvm.aarch64.neon.fcvtzu.i32.f64(double %a)
+  %fcvtzu_vector = insertelement <4 x i32> poison, i32 %fcvtzu_scalar, i32 0
+  ret <4 x i32> %fcvtzu_vector
+}
 
+define <1 x i64> @fcvtzu_v1i64_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtzu_v1i64_from_f16_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, h0
+; CHECK-NEXT:    ret
+  %fcvtzu_scalar = tail call i64 @llvm.aarch64.neon.fcvtzu.i64.f16(half %a)
+  %fcvtzu_vector = insertelement <1 x i64> poison, i64 %fcvtzu_scalar, i32 0
+  ret <1 x i64> %fcvtzu_vector
+}
 
-define <1 x i64> @fcvtzu_1d1s_scalar_to_vector_simd(float %A) nounwind {
-; CHECK-LABEL: fcvtzu_1d1s_scalar_to_vector_simd:
+define <1 x i64> @fcvtzu_v1i64_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtzu_v1i64_from_f32_scalar_to_vector_simd:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvtzu d0, s0
 ; CHECK-NEXT:    ret
-  %i = call i64 @llvm.aarch64.neon.fcvtzu.i64.f32(float %A)
-  %vec = insertelement <1 x i64> poison, i64 %i, i32 0
-  ret <1 x i64> %vec
+  %fcvtzu_scalar = call i64 @llvm.aarch64.neon.fcvtzu.i64.f32(float %A)
+  %fcvtzu_vector = insertelement <1 x i64> poison, i64 %fcvtzu_scalar, i32 0
+  ret <1 x i64> %fcvtzu_vector
 }
 
+define <1 x i64> @fcvtzu_v1i64_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtzu_v1i64_from_f64_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, d0
+; CHECK-NEXT:    ret
+  %fcvtzu_scalar = tail call i64 @llvm.aarch64.neon.fcvtzu.i64.f64(double %a)
+  %fcvtzu_vector = insertelement <1 x i64> poison, i64 %fcvtzu_scalar, i32 0
+  ret <1 x i64> %fcvtzu_vector
+}
 
-define  <1 x i64> @fcvtzu_1d1h_scalar_to_vector_simd(half %a) {
-; CHECK-LABEL: fcvtzu_1d1h_scalar_to_vector_simd:
+define <2 x i64> @fcvtzu_v2i64_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtzu_v2i64_from_f16_scalar_to_vector_simd:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvtzu d0, h0
 ; CHECK-NEXT:    ret
-  %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtzu.i64.f16(half %a)
-  %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f16, i32 0
-  ret <1 x i64> %vec
+  %fcvtzu_scalar = tail call i64 @llvm.aarch64.neon.fcvtzu.i64.f16(half %a)
+  %fcvtzu_vector = insertelement <2 x i64> poison, i64 %fcvtzu_scalar, i32 0
+  ret <2 x i64> %fcvtzu_vector
 }
 
-define  <1 x i64> @fcvtzu_1d1d_scalar_to_vector_simd(double %a) {
-; CHECK-LABEL: fcvtzu_1d1d_scalar_to_vector_simd:
+define <2 x i64> @fcvtzu_v2i64_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtzu_v2i64_from_f32_scalar_to_vector_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, s0
+; CHECK-NEXT:    ret
+  %fcvtzu_scalar = call i64 @llvm.aarch64.neon.fcvtzu.i64.f32(float %A)
+  %fcvtzu_vector = insertelement <2 x i64> poison, i64 %fcvtzu_scalar, i32 0
+  ret <2 x i64> %fcvtzu_vector
+}
+
+define <2 x i64> @fcvtzu_v2i64_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtzu_v2i64_from_f64_scalar_to_vector_simd:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvtzu d0, d0
 ; CHECK-NEXT:    ret
-  %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtzu.i64.f64(double %a)
-  %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f64, i32 0
-  ret <1 x i64> %vec
+  %fcvtzu_scalar = tail call i64 @llvm.aarch64.neon.fcvtzu.i64.f64(double %a)
+  %fcvtzu_vector = insertelement <2 x i64> poison, i64 %fcvtzu_scalar, i32 0
+  ret <2 x i64> %fcvtzu_vector
 }
diff --git a/llvm/test/CodeGen/AArch64/arm64-vcvt.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt.ll
index dcb3b9b24627b..c70dac3f21a53 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vcvt.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vcvt.ll
@@ -357,7 +357,6 @@ define <2 x i64> @fcvtzs_2d(<2 x double> %A) nounwind {
 	ret <2 x i64> %tmp3
 }
 
-; FIXME: Generate "fcvtzs d0, d0"?
 define <1 x i64> @fcvtzs_1d(<1 x double> %A) nounwind {
 ; CHECK-LABEL: fcvtzs_1d:
 ; CHECK:       // %bb.0:

>From dfe66a991fef1707961cd6dce4e7093422401019 Mon Sep 17 00:00:00 2001
From: Lukacma <Marian.Lukac at arm.com>
Date: Thu, 15 Jan 2026 15:03:51 +0000
Subject: [PATCH 4/8] Apply suggestion from @kmclaughlin-arm

Co-authored-by: Kerry McLaughlin <kerry.mclaughlin at arm.com>
---
 llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll
index 55af566b9f4c1..e0fe663bc625d 100644
--- a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll
@@ -609,7 +609,7 @@ define  float @fcvtzu_1s1s_simd(float %a) {
 }
 
 ;
-; Intriniscs (scalar_to_vector)
+; Intrinsics (scalar_to_vector)
 ;
 
 define <2 x i32> @fcvtas_v2i32_from_f16_scalar_to_vector_simd(half %a) {

>From 5f5eb5c22b07714b5d3c5ed6d0e0426275d79b2d Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Wed, 21 Jan 2026 15:33:43 +0000
Subject: [PATCH 5/8] Update to use custom lowering of Scalar to Vector instead
 of patterns

---
 .../Target/AArch64/AArch64ISelDAGToDAG.cpp    |   63 +
 .../lib/Target/AArch64/AArch64InstrAtomics.td |    2 +-
 .../lib/Target/AArch64/AArch64InstrFormats.td |   19 +-
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   |  124 +-
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td |    2 +-
 .../AArch64/aarch64-matrix-umull-smull.ll     |    8 +-
 llvm/test/CodeGen/AArch64/aarch64-pmull2.ll   |    4 +-
 .../CodeGen/AArch64/arm64-cvt-simd-fptoi.ll   |  380 ------
 .../CodeGen/AArch64/arm64-neon-select_cc.ll   |   24 +-
 llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll |   56 +-
 llvm/test/CodeGen/AArch64/arm64-vshift.ll     |    9 +-
 llvm/test/CodeGen/AArch64/avoid-pre-trunc.ll  |   24 +-
 .../AArch64/concat-vector-add-combine.ll      |   20 +-
 llvm/test/CodeGen/AArch64/ctpop.ll            |   48 +-
 .../test/CodeGen/AArch64/fptosi-sat-vector.ll |   17 +-
 .../test/CodeGen/AArch64/fptoui-sat-vector.ll |   17 +-
 llvm/test/CodeGen/AArch64/fsh.ll              |   16 +-
 .../AArch64/ragreedy-local-interval-cost.ll   |  126 +-
 .../AArch64/scalar-to-vector-bitcasts.ll      |   45 +
 llvm/test/CodeGen/AArch64/sext.ll             |   42 +-
 .../AArch64/sve-fixed-vector-llrint.ll        |  365 +++---
 .../CodeGen/AArch64/sve-fixed-vector-lrint.ll | 1039 ++++++++---------
 llvm/test/CodeGen/AArch64/vector-llrint.ll    |   17 +-
 llvm/test/CodeGen/AArch64/vector-lrint.ll     |  507 ++++----
 llvm/test/CodeGen/AArch64/zext.ll             |   38 +-
 25 files changed, 1301 insertions(+), 1711 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/scalar-to-vector-bitcasts.ll

diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 54ad7beb823ac..b1377aeaaa69c 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -61,6 +61,7 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
   }
 
   void Select(SDNode *Node) override;
+  void PreprocessISelDAG() override;
 
   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
   /// inline asm expressions.
@@ -532,6 +533,28 @@ char AArch64DAGToDAGISelLegacy::ID = 0;
 
 INITIALIZE_PASS(AArch64DAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
 
+/// addBitcastHints - This method adds bitcast hints to the operands of a node
+/// to help instruction selector determine which operands are in Neon registers.
+static SDValue addBitcastHints(SelectionDAG &DAG, SDNode &N) {
+  SDLoc DL(&N);
+  auto getFloatVT = [](EVT VT) {
+    EVT ScalarVT = VT.getScalarType();
+    assert((ScalarVT == MVT::i32 || ScalarVT == MVT::i64) && "Unexpected VT");
+    return VT.changeElementType(ScalarVT == MVT::i32 ? MVT::f32 : MVT::f64);
+  };
+  auto bitcastToFloat = [&](SDValue Val) {
+    return DAG.getBitcast(getFloatVT(Val.getValueType()), Val);
+  };
+  SmallVector<SDValue, 2> NewOps;
+  NewOps.reserve(N.getNumOperands() - 1);
+
+  for (unsigned I = 0, E = N.getNumOperands(); I < E; ++I)
+    NewOps.push_back(bitcastToFloat(N.getOperand(I)));
+  EVT OrigVT = N.getValueType(0);
+  SDValue OpNode = DAG.getNode(N.getOpcode(), DL, getFloatVT(OrigVT), NewOps);
+  return DAG.getBitcast(OrigVT, OpNode);
+}
+
 /// isIntImmediate - This method tests to see if the node is a constant
 /// operand. If so Imm will receive the 32-bit value.
 static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
@@ -7774,3 +7797,43 @@ bool AArch64DAGToDAGISel::SelectCmpBranchExtOperand(SDValue N, SDValue &Reg,
 
   return false;
 }
+
+void AArch64DAGToDAGISel::PreprocessISelDAG() {
+  bool MadeChange = false;
+  for (SDNode &N : llvm::make_early_inc_range(CurDAG->allnodes())) {
+    if (N.use_empty())
+      continue;
+
+    SDValue Result;
+    switch (N.getOpcode()) {
+    case ISD::SCALAR_TO_VECTOR: {
+      EVT VT = N.getValueType(0);
+      if (!VT.isVector() || VT.isScalableVector() || !VT.isInteger())
+        break;
+      if (VT.getVectorElementType() != N.getOperand(0).getValueType())
+        break;
+
+      Result = addBitcastHints(*CurDAG, N);
+      break;
+    }
+    default:
+      break;
+    }
+
+    if (Result) {
+      LLVM_DEBUG(dbgs() << "AArch64 DAG preprocessing replacing:\nOld:    ");
+      LLVM_DEBUG(N.dump(CurDAG));
+      LLVM_DEBUG(dbgs() << "\nNew: ");
+      LLVM_DEBUG(Result.dump(CurDAG));
+      LLVM_DEBUG(dbgs() << "\n");
+
+      CurDAG->ReplaceAllUsesOfValueWith(SDValue(&N, 0), Result);
+      MadeChange = true;
+    }
+  }
+
+  if (MadeChange)
+    CurDAG->RemoveDeadNodes();
+
+  SelectionDAGISel::PreprocessISelDAG();
+}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
index 5d9215dd71233..d9bd43ce70522 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -577,7 +577,7 @@ let Predicates = [HasRCPC3, HasNEON] in {
   def : Pat<(vector_insert (v2f64 VecListOne128:$Rd),
                 (f64 (bitconvert (i64 (acquiring_load<atomic_load_nonext_64> GPR64sp:$Rn)))), (i64 VectorIndexD:$idx)),
             (LDAP1 VecListOne128:$Rd, VectorIndexD:$idx, GPR64sp:$Rn)>;
-  def : Pat<(v1i64 (scalar_to_vector
+  def : Pat<(v1i64 (scalar_to_vector_any_64
                 (i64 (acquiring_load<atomic_load_nonext_64> GPR64sp:$Rn)))),
             (EXTRACT_SUBREG (LDAP1 (v2i64 (IMPLICIT_DEF)), (i64 0), GPR64sp:$Rn), dsub)>;
   def : Pat<(v1f64 (scalar_to_vector
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 43319f7eb8a8f..3b41f373835dd 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -200,10 +200,27 @@ def dup_v4f32 :
              [(v2f32 (extract_subvector (v4f32 (AArch64duplane32 (v4f32 node:$LHS), node:$RHS)), (i64 0))),
               (v2f32 (AArch64duplane32 (v4f32 node:$LHS), node:$RHS))]>;
 
+// Match scalar_to_vector values, optionally wrapped in a bitcast through
+// f32/f64.
+def scalar_to_vector_any_64 : PatFrags<(ops node:$src),
+                          [(scalar_to_vector node:$src),
+                           (bitconvert (v2f32 (scalar_to_vector (f32 (bitconvert node:$src))))),
+                           (bitconvert (v1f64 (scalar_to_vector (f64 (bitconvert node:$src)))))]>;
+def scalar_to_vector_any_128 : PatFrags<(ops node:$src),
+                          [(scalar_to_vector node:$src),
+                           (bitconvert (v4f32 (scalar_to_vector (f32 (bitconvert node:$src))))),
+                           (bitconvert (v2f64 (scalar_to_vector (f64 (bitconvert node:$src)))))]>;
+
 // Match either a scalar_to_vector (from SDAG) or a vector_insert of undef (from GISel)
 def vec_ins_or_scal_vec : PatFrags<(ops node:$src),
                           [(vector_insert undef, node:$src, (i64 0)),
                            (scalar_to_vector node:$src)]>;
+def vec_ins_or_scal_vec_64 : PatFrags<(ops node:$src),
+                             [(vector_insert undef, node:$src, (i64 0)),
+                              (scalar_to_vector_any_64 node:$src)]>;
+def vec_ins_or_scal_vec_128 : PatFrags<(ops node:$src),
+                              [(vector_insert undef, node:$src, (i64 0)),
+                               (scalar_to_vector_any_128 node:$src)]>;
 
 //===----------------------------------------------------------------------===//
 // Asm Operand Classes.
@@ -8693,7 +8710,7 @@ multiclass SIMDScalarDUP<string asm> {
     let Inst{19-16} = 0b1000;
   }
 
-  def : Pat<(v1i64 (scalar_to_vector (i64 (vector_extract (v2i64 V128:$src),
+  def : Pat<(v1i64 (scalar_to_vector_any_64 (i64 (vector_extract (v2i64 V128:$src),
                                                           VectorIndexD:$idx)))),
             (!cast<Instruction>(NAME # i64) V128:$src, VectorIndexD:$idx)>;
 
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 6a0fe9b4619c6..8f1aa8dd3aba2 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -4304,7 +4304,8 @@ multiclass LoadInsertVTPatterns<SDPatternOperator LoadOp, ValueType VT, ValueTyp
                                 Instruction LoadInst, Instruction UnscaledLoadInst,
                                 Instruction ROWLoadInst, Instruction ROXLoadInst,
                                 ROAddrMode ro, ComplexPattern Addr, ComplexPattern UnscaledAddr,
-                                Operand AddrImm, SubRegIndex SubReg> {
+                                Operand AddrImm, SubRegIndex SubReg,
+                                SDPatternOperator VecInsFrag> {
   // Scaled
   def : Pat <(vector_insert (VT immAllZerosV),
                 (ScalarVT (LoadOp (Addr GPR64sp:$Rn, AddrImm:$offset))), (i64 0)),
@@ -4323,16 +4324,16 @@ multiclass LoadInsertVTPatterns<SDPatternOperator LoadOp, ValueType VT, ValueTyp
              (SUBREG_TO_REG (i64 0), (ROXLoadInst GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend), SubReg)>;
 
   // Undef equivalents of the patterns above.
-  def : Pat <(VT (vec_ins_or_scal_vec
+  def : Pat <(VT (VecInsFrag
                 (ScalarVT (LoadOp (Addr GPR64sp:$Rn, AddrImm:$offset))))),
             (SUBREG_TO_REG (i64 0), (LoadInst GPR64sp:$Rn, AddrImm:$offset), SubReg)>;
-  def : Pat <(VT (vec_ins_or_scal_vec
+  def : Pat <(VT (VecInsFrag
                  (ScalarVT (LoadOp (UnscaledAddr GPR64sp:$Rn, simm9:$offset))))),
              (SUBREG_TO_REG (i64 0), (UnscaledLoadInst GPR64sp:$Rn, simm9:$offset), SubReg)>;
-  def : Pat <(VT (vec_ins_or_scal_vec
+  def : Pat <(VT (VecInsFrag
                  (ScalarVT (LoadOp (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))))),
              (SUBREG_TO_REG (i64 0), (ROWLoadInst GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend), SubReg)>;
-  def : Pat <(VT (vec_ins_or_scal_vec
+  def : Pat <(VT (VecInsFrag
                  (ScalarVT (LoadOp (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))))),
              (SUBREG_TO_REG (i64 0), (ROXLoadInst GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend), SubReg)>;
 }
@@ -4343,11 +4344,11 @@ multiclass LoadInsertPatterns<SDPatternOperator LoadOp, ValueType VT, ValueType
                               ROAddrMode ro, ComplexPattern Addr, ComplexPattern UnscaledAddr,
                               Operand AddrImm, SubRegIndex SubReg> {
   defm : LoadInsertVTPatterns<LoadOp, VT, ScalarVT, LoadInst, UnscaledLoadInst, ROWLoadInst,
-                              ROXLoadInst, ro, Addr, UnscaledAddr, AddrImm, SubReg>;
+                              ROXLoadInst, ro, Addr, UnscaledAddr, AddrImm, SubReg, vec_ins_or_scal_vec_128>;
   defm : LoadInsertVTPatterns<LoadOp, HVT, ScalarVT, LoadInst, UnscaledLoadInst, ROWLoadInst,
-                              ROXLoadInst, ro, Addr, UnscaledAddr, AddrImm, SubReg>;
+                              ROXLoadInst, ro, Addr, UnscaledAddr, AddrImm, SubReg, vec_ins_or_scal_vec_64>;
   defm : LoadInsertVTPatterns<LoadOp, SVT, ScalarVT, LoadInst, UnscaledLoadInst, ROWLoadInst,
-                              ROXLoadInst, ro, Addr, UnscaledAddr, AddrImm, SubReg>;
+                              ROXLoadInst, ro, Addr, UnscaledAddr, AddrImm, SubReg, vec_ins_or_scal_vec>;
 }
 
 // Accept i8 scalar argument in GlobalISel.
@@ -4381,16 +4382,16 @@ defm : LoadInsertPatterns<load,       v2f64,  isVoid, nxv2f64,  f64,
 
 // Extra patterns for v1f64 scalar_to_vector(load), which need to avoid the
 // SUBREG_TO_REG used above.
-def : Pat <(v1i64 (scalar_to_vector (i64
+def : Pat <(v1i64 (scalar_to_vector_any_64 (i64
                (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))),
            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
-def : Pat <(v1i64 (scalar_to_vector (i64
+def : Pat <(v1i64 (scalar_to_vector_any_64 (i64
                (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))))),
            (LDURDi GPR64sp:$Rn, simm9:$offset)>;
-def : Pat <(v1i64 (scalar_to_vector (i64
+def : Pat <(v1i64 (scalar_to_vector_any_64 (i64
                (load (ro64.Wpat GPR64sp:$Rn, GPR32:$Rm, ro64.Wext:$extend))))),
            (LDRDroW GPR64sp:$Rn, GPR32:$Rm, ro64.Wext:$extend)>;
-def : Pat <(v1i64 (scalar_to_vector (i64
+def : Pat <(v1i64 (scalar_to_vector_any_64 (i64
                (load (ro64.Xpat GPR64sp:$Rn, GPR64:$Rm, ro64.Xext:$extend))))),
            (LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro64.Xext:$extend)>;
 
@@ -4398,7 +4399,7 @@ def : Pat <(v1i64 (scalar_to_vector (i64
 // Enables direct SIMD register loads for small integer types (i8/i16) that are
 // naturally zero-extended to i32/i64.
 multiclass ExtLoad8_16AllModes<ValueType OutTy, ValueType InnerTy,
-                                SDPatternOperator OuterOp,
+                                PatFrags OuterOp,
                                 PatFrags LoadOp8, PatFrags LoadOp16> {
   // 8-bit loads.
   def : Pat<(OutTy (OuterOp (InnerTy (LoadOp8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
@@ -4423,7 +4424,7 @@ multiclass ExtLoad8_16AllModes<ValueType OutTy, ValueType InnerTy,
 
 // Extended multiclass that includes 32-bit loads in addition to 8-bit and 16-bit.
 multiclass ExtLoad8_16_32AllModes<ValueType OutTy, ValueType InnerTy,
-                                   SDPatternOperator OuterOp,
+                                   PatFrags OuterOp,
                                    PatFrags LoadOp8, PatFrags LoadOp16, PatFrags LoadOp32> {
   defm : ExtLoad8_16AllModes<OutTy, InnerTy, OuterOp, LoadOp8, LoadOp16>;
 
@@ -4439,18 +4440,22 @@ multiclass ExtLoad8_16_32AllModes<ValueType OutTy, ValueType InnerTy,
 }
 
 // Instantiate bitconvert patterns for floating-point types.
-defm : ExtLoad8_16AllModes<f32, i32, bitconvert, zextloadi8, zextloadi16>;
-defm : ExtLoad8_16_32AllModes<f64, i64, bitconvert, zextloadi8, zextloadi16, zextloadi32>;
+
+// Create a fragment to reuse same multiclass.
+def bitconvert_frag : PatFrags<(ops node:$src),
+                               [(bitconvert node:$src)]>;
+defm : ExtLoad8_16AllModes<f32, i32, bitconvert_frag, zextloadi8, zextloadi16>;
+defm : ExtLoad8_16_32AllModes<f64, i64, bitconvert_frag, zextloadi8, zextloadi16, zextloadi32>;
 
 // Instantiate scalar_to_vector patterns for all vector types.
-defm : ExtLoad8_16AllModes<v16i8, i32, scalar_to_vector, zextloadi8, zextloadi16>;
-defm : ExtLoad8_16AllModes<v16i8, i32, scalar_to_vector, extloadi8, extloadi16>;
-defm : ExtLoad8_16AllModes<v8i16, i32, scalar_to_vector, zextloadi8, zextloadi16>;
-defm : ExtLoad8_16AllModes<v8i16, i32, scalar_to_vector, extloadi8, extloadi16>;
-defm : ExtLoad8_16AllModes<v4i32, i32, scalar_to_vector, zextloadi8, zextloadi16>;
-defm : ExtLoad8_16AllModes<v4i32, i32, scalar_to_vector, extloadi8, extloadi16>;
-defm : ExtLoad8_16_32AllModes<v2i64, i64, scalar_to_vector, zextloadi8, zextloadi16, zextloadi32>;
-defm : ExtLoad8_16_32AllModes<v2i64, i64, scalar_to_vector, extloadi8, extloadi16, extloadi32>;
+defm : ExtLoad8_16AllModes<v16i8, i32, scalar_to_vector_any_128, zextloadi8, zextloadi16>;
+defm : ExtLoad8_16AllModes<v16i8, i32, scalar_to_vector_any_128, extloadi8, extloadi16>;
+defm : ExtLoad8_16AllModes<v8i16, i32, scalar_to_vector_any_128, zextloadi8, zextloadi16>;
+defm : ExtLoad8_16AllModes<v8i16, i32, scalar_to_vector_any_128, extloadi8, extloadi16>;
+defm : ExtLoad8_16AllModes<v4i32, i32, scalar_to_vector_any_128, zextloadi8, zextloadi16>;
+defm : ExtLoad8_16AllModes<v4i32, i32, scalar_to_vector_any_128, extloadi8, extloadi16>;
+defm : ExtLoad8_16_32AllModes<v2i64, i64, scalar_to_vector_any_128, zextloadi8, zextloadi16, zextloadi32>;
+defm : ExtLoad8_16_32AllModes<v2i64, i64, scalar_to_vector_any_128, extloadi8, extloadi16, extloadi32>;
 
 // Pre-fetch.
 defm PRFUM : PrefetchUnscaled<0b11, 0, 0b10, "prfum",
@@ -6569,38 +6574,6 @@ multiclass FPToIntegerSIMDScalarPatterns<SDPatternOperator OpN, string INST> {
   def : Pat<(f64 (bitconvert (i64 (OpN (f64 FPR64:$Rn))))),
             (!cast<Instruction>(INST # v1i64) FPR64:$Rn)>;
 
-  foreach ret_type = [v2i32, v4i32] in {
-    let Predicates = [HasFPRCVT] in {
-    def : Pat<(ret_type (scalar_to_vector (i32 (OpN (f64 FPR64:$Rn))))),
-              (INSERT_SUBREG (IMPLICIT_DEF),
-              (!cast<Instruction>(INST # SDr) FPR64:$Rn), ssub)>;
-    def : Pat<(ret_type (scalar_to_vector (i32 (OpN (f16 FPR16:$Rn))))),
-              (INSERT_SUBREG (IMPLICIT_DEF),
-              (!cast<Instruction>(INST # SHr) FPR16:$Rn), ssub)>;
-    }
-    def : Pat<(ret_type (scalar_to_vector (i32 (OpN (f32 FPR32:$Rn))))),
-              (INSERT_SUBREG (IMPLICIT_DEF),
-              (!cast<Instruction>(INST # v1i32) FPR32:$Rn), ssub)>;
-  }
-
-  let Predicates = [HasFPRCVT] in {
-  def : Pat<(v1i64 (scalar_to_vector (i64 (OpN (f16 FPR16:$Rn))))),
-            (!cast<Instruction>(INST # DHr) FPR16:$Rn)>;
-  def : Pat<(v1i64 (scalar_to_vector (i64 (OpN (f32 FPR32:$Rn))))),
-            (!cast<Instruction>(INST # DSr) FPR32:$Rn)>;
-  def : Pat<(v2i64 (scalar_to_vector (i64 (OpN (f16 FPR16:$Rn))))),
-            (INSERT_SUBREG (IMPLICIT_DEF),
-              (!cast<Instruction>(INST # DHr) FPR16:$Rn), dsub)>;
-  def : Pat<(v2i64 (scalar_to_vector (i64 (OpN (f32 FPR32:$Rn))))),
-            (INSERT_SUBREG (IMPLICIT_DEF),
-              (!cast<Instruction>(INST # DSr) FPR32:$Rn), dsub)>;
-  }
-
-  def : Pat<(v1i64 (scalar_to_vector (i64 (OpN (f64 FPR64:$Rn))))),
-            (!cast<Instruction>(INST # v1i64) FPR64:$Rn)>;
-  def : Pat<(v2i64 (scalar_to_vector (i64 (OpN (f64 FPR64:$Rn))))),
-            (INSERT_SUBREG (IMPLICIT_DEF),
-              (!cast<Instruction>(INST # v1i64) FPR64:$Rn), dsub)>;
 }
 defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtas, "FCVTAS">;
 defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtau, "FCVTAU">;
@@ -6649,39 +6622,6 @@ multiclass FPToIntegerIntPats<Intrinsic round, string INST> {
   def : Pat<(f64 (bitconvert (i64 (round f64:$Rn)))), 
             (!cast<Instruction>(INST # v1i64) $Rn)>;
 
-  foreach ret_type = [v2i32, v4i32] in {
-    let Predicates = [HasFPRCVT] in {
-    def : Pat<(ret_type (scalar_to_vector (i32 (round f16:$Rn)))), 
-              (INSERT_SUBREG (IMPLICIT_DEF),
-              (!cast<Instruction>(INST # SHr) $Rn), ssub)>;
-    def : Pat<(ret_type (scalar_to_vector (i32 (round f64:$Rn)))), 
-              (INSERT_SUBREG (IMPLICIT_DEF),
-              (!cast<Instruction>(INST # SDr) $Rn), ssub)>;
-    }
-    def : Pat<(ret_type (scalar_to_vector (i32 (round f32:$Rn)))), 
-              (INSERT_SUBREG (IMPLICIT_DEF),
-              (!cast<Instruction>(INST # v1i32) $Rn), ssub)>;
-  }
-
-  let Predicates = [HasFPRCVT] in {
-  def : Pat<(v1i64 (scalar_to_vector (i64 (round f16:$Rn)))), 
-            (!cast<Instruction>(INST # DHr) $Rn)>;
-  def : Pat<(v1i64 (scalar_to_vector (i64 (round f32:$Rn)))), 
-            (!cast<Instruction>(INST # DSr) $Rn)>;
-  def : Pat<(v2i64 (scalar_to_vector (i64 (round f16:$Rn)))), 
-            (INSERT_SUBREG (IMPLICIT_DEF),
-            (!cast<Instruction>(INST # DHr) $Rn), dsub)>;
-  def : Pat<(v2i64 (scalar_to_vector (i64 (round f32:$Rn)))), 
-            (INSERT_SUBREG (IMPLICIT_DEF),
-            (!cast<Instruction>(INST # DSr) $Rn), dsub)>;
-  }
-
-  def : Pat<(v1i64 (scalar_to_vector (i64 (round f64:$Rn)))), 
-            (!cast<Instruction>(INST # v1i64) $Rn)>;
-  def : Pat<(v2i64 (scalar_to_vector (i64 (round f64:$Rn)))), 
-            (INSERT_SUBREG (IMPLICIT_DEF),
-            (!cast<Instruction>(INST # v1i64) $Rn), dsub)>;
-
   let Predicates = [HasFullFP16] in {
   def : Pat<(i32 (round (fmul f16:$Rn, fixedpoint_f16_i32:$scale))),
             (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
@@ -8105,7 +8045,7 @@ multiclass Neon_INS_elt_ext_pattern<ValueType VT128, ValueType VT64, ValueType O
               (INS (INSERT_SUBREG (VT128 (IMPLICIT_DEF)), V64:$src, dsub), (VecIndexMult imm:$Immd),
                    (INSERT_SUBREG (VT128 (IMPLICIT_DEF)), V64:$Rn, dsub), imm:$Immn),
               dsub)>;
-  def : Pat<(OutVT (scalar_to_vector (i32 (vector_extract (VT64 V64:$Rn), (i64 imm:$Immn))))),
+  def : Pat<(OutVT (scalar_to_vector_any_64 (i32 (vector_extract (VT64 V64:$Rn), (i64 imm:$Immn))))),
             (EXTRACT_SUBREG
               (VT128 (SUBREG_TO_REG
                 (i64 0),
@@ -8121,7 +8061,7 @@ multiclass Neon_INS_elt_ext_pattern<ValueType VT128, ValueType VT64, ValueType O
               (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), (VecIndexMult imm:$Immd),
                    V128:$Rn, imm:$Immn),
               dsub)>;
-  def : Pat<(OutVT (scalar_to_vector (i32 (vector_extract (VT128 V128:$Rn), (i64 imm:$Immn))))),
+  def : Pat<(OutVT (scalar_to_vector_any_64 (i32 (vector_extract (VT128 V128:$Rn), (i64 imm:$Immn))))),
             (EXTRACT_SUBREG
               (VT128 (SUBREG_TO_REG
                 (i64 0),
@@ -9667,7 +9607,7 @@ def : Ld1Lane64IdxOpPat<extloadi8, VectorIndexH, v4i16, i32, LD1i8, VectorIndexH
 let Predicates = [HasNEON] in {
   class Ld1Lane128FirstElm<ValueType ResultTy, ValueType VecTy,
                           SDPatternOperator ExtLoad, Instruction LD1>
-    : Pat<(ResultTy (vec_ins_or_scal_vec (i32 (ExtLoad GPR64sp:$Rn)))),
+    : Pat<(ResultTy (vec_ins_or_scal_vec_64 (i32 (ExtLoad GPR64sp:$Rn)))),
             (ResultTy (EXTRACT_SUBREG
               (LD1 (VecTy (IMPLICIT_DEF)), 0, GPR64sp:$Rn), dsub))>;
 
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index ffb24dfbcd527..c7460b082301f 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -3607,7 +3607,7 @@ let Predicates = [HasSVE_or_SME] in {
   def : Pat<(v1f64 (scalar_to_vector
                      (f64 (vector_extract nxv2f64:$vec, VectorIndexD:$index)))),
             (DUPi64 (EXTRACT_SUBREG nxv2f64:$vec, zsub), VectorIndexD:$index)>;
-  def : Pat<(v1i64 (scalar_to_vector
+  def : Pat<(v1i64 (scalar_to_vector_any_64
                      (i64 (vector_extract nxv2i64:$vec, VectorIndexD:$index)))),
             (DUPi64 (EXTRACT_SUBREG nxv2i64:$vec, zsub), VectorIndexD:$index)>;
   } // End HasNEON
diff --git a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
index 99c540366fb12..d68186aee8a9e 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
@@ -1520,10 +1520,10 @@ for.end12:                                        ; preds = %vector.body
 define void @matrix_mul_signed_and(i32 %N, ptr nocapture %C, ptr nocapture readonly %A, i32 %val) {
 ; CHECK-SD-LABEL: matrix_mul_signed_and:
 ; CHECK-SD:       // %bb.0: // %vector.header
-; CHECK-SD-NEXT:    and w9, w3, #0xffff
+; CHECK-SD-NEXT:    and w8, w3, #0xffff
 ; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-SD-NEXT:    fmov s0, w8
 ; CHECK-SD-NEXT:    and x8, x0, #0xfffffff8
-; CHECK-SD-NEXT:    fmov s0, w9
 ; CHECK-SD-NEXT:  .LBB13_1: // %vector.body
 ; CHECK-SD-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-SD-NEXT:    add x9, x2, w0, uxtw #1
@@ -1608,10 +1608,10 @@ for.end12:                                        ; preds = %vector.body
 define void @matrix_mul_signed_and_double(i32 %N, ptr nocapture %C, ptr nocapture readonly %A, i32 %val) {
 ; CHECK-SD-LABEL: matrix_mul_signed_and_double:
 ; CHECK-SD:       // %bb.0: // %vector.header
-; CHECK-SD-NEXT:    and w9, w3, #0xffff
+; CHECK-SD-NEXT:    and w8, w3, #0xffff
 ; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-SD-NEXT:    fmov s0, w8
 ; CHECK-SD-NEXT:    and x8, x0, #0xfffffff0
-; CHECK-SD-NEXT:    fmov s0, w9
 ; CHECK-SD-NEXT:  .LBB14_1: // %vector.body
 ; CHECK-SD-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-SD-NEXT:    add x9, x2, w0, uxtw #1
diff --git a/llvm/test/CodeGen/AArch64/aarch64-pmull2.ll b/llvm/test/CodeGen/AArch64/aarch64-pmull2.ll
index 9d7aa78ec139f..b5cee616ee9dc 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-pmull2.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-pmull2.ll
@@ -9,9 +9,9 @@ define void @test1(ptr %0, ptr %1) {
 ; CHECK-LABEL: test1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #56824 // =0xddf8
-; CHECK-NEXT:    mov w9, #61186 // =0xef02
+; CHECK-NEXT:    mov x9, #61186 // =0xef02
 ; CHECK-NEXT:    movk w8, #40522, lsl #16
-; CHECK-NEXT:    movk w9, #29710, lsl #16
+; CHECK-NEXT:    movk x9, #29710, lsl #16
 ; CHECK-NEXT:    ldp q0, q1, [x1]
 ; CHECK-NEXT:    dup v2.2d, x8
 ; CHECK-NEXT:    fmov d3, x9
diff --git a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll
index 52c35ce872b61..1c93b7e67e8a2 100644
--- a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll
@@ -1734,383 +1734,3 @@ define double @fcvtzu_dd_simd(double %a) {
   %bc = bitcast i64 %i to double
   ret double %bc
 }
-
-;
-; FPTOI scalar_to_vector
-;
-
-define <2 x i32> @fcvtzs_v2i32_from_f16_scalar_to_vector_simd(half %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzs_v2i32_from_f16_scalar_to_vector_simd:
-; CHECK-NOFPRCVT:       // %bb.0:
-; CHECK-NOFPRCVT-NEXT:    fcvtzs w8, h0
-; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
-; CHECK-NOFPRCVT-NEXT:    ret
-;
-; CHECK-LABEL: fcvtzs_v2i32_from_f16_scalar_to_vector_simd:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs s0, h0
-; CHECK-NEXT:    ret
-  %fcvtzs_scalar = fptosi half %a to i32
-  %fcvtzs_vector = insertelement <2 x i32> poison, i32 %fcvtzs_scalar, i32 0
-  ret <2 x i32> %fcvtzs_vector
-}
-
-define <2 x i32> @fcvtzs_v2i32_from_f32_scalar_to_vector_simd(float %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzs_v2i32_from_f32_scalar_to_vector_simd:
-; CHECK-NOFPRCVT:       // %bb.0:
-; CHECK-NOFPRCVT-NEXT:    fcvtzs s0, s0
-; CHECK-NOFPRCVT-NEXT:    ret
-;
-; CHECK-LABEL: fcvtzs_v2i32_from_f32_scalar_to_vector_simd:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs s0, s0
-; CHECK-NEXT:    ret
-  %fcvtzs_scalar = fptosi float %a to i32
-  %fcvtzs_vector = insertelement <2 x i32> poison, i32 %fcvtzs_scalar, i32 0
-  ret <2 x i32> %fcvtzs_vector
-}
-
-define <2 x i32> @fcvtzs_v2i32_from_f64_scalar_to_vector_simd(double %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzs_v2i32_from_f64_scalar_to_vector_simd:
-; CHECK-NOFPRCVT:       // %bb.0:
-; CHECK-NOFPRCVT-NEXT:    fcvtzs w8, d0
-; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
-; CHECK-NOFPRCVT-NEXT:    ret
-;
-; CHECK-LABEL: fcvtzs_v2i32_from_f64_scalar_to_vector_simd:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs s0, d0
-; CHECK-NEXT:    ret
-  %fcvtzs_scalar = fptosi double %a to i32
-  %fcvtzs_vector = insertelement <2 x i32> poison, i32 %fcvtzs_scalar, i32 0
-  ret <2 x i32> %fcvtzs_vector
-}
-
-define <4 x i32> @fcvtzs_v4i32_from_f16_scalar_to_vector_simd(half %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzs_v4i32_from_f16_scalar_to_vector_simd:
-; CHECK-NOFPRCVT:       // %bb.0:
-; CHECK-NOFPRCVT-NEXT:    fcvtzs w8, h0
-; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
-; CHECK-NOFPRCVT-NEXT:    ret
-;
-; CHECK-LABEL: fcvtzs_v4i32_from_f16_scalar_to_vector_simd:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs s0, h0
-; CHECK-NEXT:    ret
-  %fcvtzs_scalar = fptosi half %a to i32
-  %fcvtzs_vector = insertelement <4 x i32> poison, i32 %fcvtzs_scalar, i32 0
-  ret <4 x i32> %fcvtzs_vector
-}
-
-define <4 x i32> @fcvtzs_v4i32_from_f32_scalar_to_vector_simd(float %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzs_v4i32_from_f32_scalar_to_vector_simd:
-; CHECK-NOFPRCVT:       // %bb.0:
-; CHECK-NOFPRCVT-NEXT:    fcvtzs s0, s0
-; CHECK-NOFPRCVT-NEXT:    ret
-;
-; CHECK-LABEL: fcvtzs_v4i32_from_f32_scalar_to_vector_simd:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs s0, s0
-; CHECK-NEXT:    ret
-  %fcvtzs_scalar = fptosi float %a to i32
-  %fcvtzs_vector = insertelement <4 x i32> poison, i32 %fcvtzs_scalar, i32 0
-  ret <4 x i32> %fcvtzs_vector
-}
-
-define <4 x i32> @fcvtzs_v4i32_from_f64_scalar_to_vector_simd(double %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzs_v4i32_from_f64_scalar_to_vector_simd:
-; CHECK-NOFPRCVT:       // %bb.0:
-; CHECK-NOFPRCVT-NEXT:    fcvtzs w8, d0
-; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
-; CHECK-NOFPRCVT-NEXT:    ret
-;
-; CHECK-LABEL: fcvtzs_v4i32_from_f64_scalar_to_vector_simd:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs s0, d0
-; CHECK-NEXT:    ret
-  %fcvtzs_scalar = fptosi double %a to i32
-  %fcvtzs_vector = insertelement <4 x i32> poison, i32 %fcvtzs_scalar, i32 0
-  ret <4 x i32> %fcvtzs_vector
-}
-
-define <1 x i64> @fcvtzs_v1i64_from_f16_scalar_to_vector_simd(half %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzs_v1i64_from_f16_scalar_to_vector_simd:
-; CHECK-NOFPRCVT:       // %bb.0:
-; CHECK-NOFPRCVT-NEXT:    fcvtzs x8, h0
-; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
-; CHECK-NOFPRCVT-NEXT:    ret
-;
-; CHECK-LABEL: fcvtzs_v1i64_from_f16_scalar_to_vector_simd:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs d0, h0
-; CHECK-NEXT:    ret
-  %fcvtzs_scalar = fptosi half %a to i64
-  %fcvtzs_vector = insertelement <1 x i64> poison, i64 %fcvtzs_scalar, i32 0
-  ret <1 x i64> %fcvtzs_vector
-}
-
-define <1 x i64> @fcvtzs_v1i64_from_f32_scalar_to_vector_simd(float %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzs_v1i64_from_f32_scalar_to_vector_simd:
-; CHECK-NOFPRCVT:       // %bb.0:
-; CHECK-NOFPRCVT-NEXT:    fcvtzs x8, s0
-; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
-; CHECK-NOFPRCVT-NEXT:    ret
-;
-; CHECK-LABEL: fcvtzs_v1i64_from_f32_scalar_to_vector_simd:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs d0, s0
-; CHECK-NEXT:    ret
-  %fcvtzs_scalar = fptosi float %a to i64
-  %fcvtzs_vector = insertelement <1 x i64> poison, i64 %fcvtzs_scalar, i32 0
-  ret <1 x i64> %fcvtzs_vector
-}
-
-define <1 x i64> @fcvtzs_v1i64_from_f64_scalar_to_vector_simd(double %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzs_v1i64_from_f64_scalar_to_vector_simd:
-; CHECK-NOFPRCVT:       // %bb.0:
-; CHECK-NOFPRCVT-NEXT:    fcvtzs d0, d0
-; CHECK-NOFPRCVT-NEXT:    ret
-;
-; CHECK-LABEL: fcvtzs_v1i64_from_f64_scalar_to_vector_simd:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs d0, d0
-; CHECK-NEXT:    ret
-  %fcvtzs_scalar = fptosi double %a to i64
-  %fcvtzs_vector = insertelement <1 x i64> poison, i64 %fcvtzs_scalar, i32 0
-  ret <1 x i64> %fcvtzs_vector
-}
-
-define <2 x i64> @fcvtzs_v2i64_from_f16_scalar_to_vector_simd(half %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzs_v2i64_from_f16_scalar_to_vector_simd:
-; CHECK-NOFPRCVT:       // %bb.0:
-; CHECK-NOFPRCVT-NEXT:    fcvtzs x8, h0
-; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
-; CHECK-NOFPRCVT-NEXT:    ret
-;
-; CHECK-LABEL: fcvtzs_v2i64_from_f16_scalar_to_vector_simd:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs d0, h0
-; CHECK-NEXT:    ret
-  %fcvtzs_scalar = fptosi half %a to i64
-  %fcvtzs_vector = insertelement <2 x i64> poison, i64 %fcvtzs_scalar, i32 0
-  ret <2 x i64> %fcvtzs_vector
-}
-
-define <2 x i64> @fcvtzs_v2i64_from_f32_scalar_to_vector_simd(float %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzs_v2i64_from_f32_scalar_to_vector_simd:
-; CHECK-NOFPRCVT:       // %bb.0:
-; CHECK-NOFPRCVT-NEXT:    fcvtzs x8, s0
-; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
-; CHECK-NOFPRCVT-NEXT:    ret
-;
-; CHECK-LABEL: fcvtzs_v2i64_from_f32_scalar_to_vector_simd:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs d0, s0
-; CHECK-NEXT:    ret
-  %fcvtzs_scalar = fptosi float %a to i64
-  %fcvtzs_vector = insertelement <2 x i64> poison, i64 %fcvtzs_scalar, i32 0
-  ret <2 x i64> %fcvtzs_vector
-}
-
-define <2 x i64> @fcvtzs_v2i64_from_f64_scalar_to_vector_simd(double %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzs_v2i64_from_f64_scalar_to_vector_simd:
-; CHECK-NOFPRCVT:       // %bb.0:
-; CHECK-NOFPRCVT-NEXT:    fcvtzs d0, d0
-; CHECK-NOFPRCVT-NEXT:    ret
-;
-; CHECK-LABEL: fcvtzs_v2i64_from_f64_scalar_to_vector_simd:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs d0, d0
-; CHECK-NEXT:    ret
-  %fcvtzs_scalar = fptosi double %a to i64
-  %fcvtzs_vector = insertelement <2 x i64> poison, i64 %fcvtzs_scalar, i32 0
-  ret <2 x i64> %fcvtzs_vector
-}
-
-define <2 x i32> @fcvtzu_v2i32_from_f16_scalar_to_vector_simd(half %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzu_v2i32_from_f16_scalar_to_vector_simd:
-; CHECK-NOFPRCVT:       // %bb.0:
-; CHECK-NOFPRCVT-NEXT:    fcvtzu w8, h0
-; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
-; CHECK-NOFPRCVT-NEXT:    ret
-;
-; CHECK-LABEL: fcvtzu_v2i32_from_f16_scalar_to_vector_simd:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu s0, h0
-; CHECK-NEXT:    ret
-  %fcvtzu_scalar = fptoui half %a to i32
-  %fcvtzu_vector = insertelement <2 x i32> poison, i32 %fcvtzu_scalar, i32 0
-  ret <2 x i32> %fcvtzu_vector
-}
-
-define <2 x i32> @fcvtzu_v2i32_from_f32_scalar_to_vector_simd(float %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzu_v2i32_from_f32_scalar_to_vector_simd:
-; CHECK-NOFPRCVT:       // %bb.0:
-; CHECK-NOFPRCVT-NEXT:    fcvtzu s0, s0
-; CHECK-NOFPRCVT-NEXT:    ret
-;
-; CHECK-LABEL: fcvtzu_v2i32_from_f32_scalar_to_vector_simd:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu s0, s0
-; CHECK-NEXT:    ret
-  %fcvtzu_scalar = fptoui float %a to i32
-  %fcvtzu_vector = insertelement <2 x i32> poison, i32 %fcvtzu_scalar, i32 0
-  ret <2 x i32> %fcvtzu_vector
-}
-
-define <2 x i32> @fcvtzu_v2i32_from_f64_scalar_to_vector_simd(double %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzu_v2i32_from_f64_scalar_to_vector_simd:
-; CHECK-NOFPRCVT:       // %bb.0:
-; CHECK-NOFPRCVT-NEXT:    fcvtzu w8, d0
-; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
-; CHECK-NOFPRCVT-NEXT:    ret
-;
-; CHECK-LABEL: fcvtzu_v2i32_from_f64_scalar_to_vector_simd:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu s0, d0
-; CHECK-NEXT:    ret
-  %fcvtzu_scalar = fptoui double %a to i32
-  %fcvtzu_vector = insertelement <2 x i32> poison, i32 %fcvtzu_scalar, i32 0
-  ret <2 x i32> %fcvtzu_vector
-}
-
-define <4 x i32> @fcvtzu_v4i32_from_f16_scalar_to_vector_simd(half %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzu_v4i32_from_f16_scalar_to_vector_simd:
-; CHECK-NOFPRCVT:       // %bb.0:
-; CHECK-NOFPRCVT-NEXT:    fcvtzu w8, h0
-; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
-; CHECK-NOFPRCVT-NEXT:    ret
-;
-; CHECK-LABEL: fcvtzu_v4i32_from_f16_scalar_to_vector_simd:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu s0, h0
-; CHECK-NEXT:    ret
-  %fcvtzu_scalar = fptoui half %a to i32
-  %fcvtzu_vector = insertelement <4 x i32> poison, i32 %fcvtzu_scalar, i32 0
-  ret <4 x i32> %fcvtzu_vector
-}
-
-define <4 x i32> @fcvtzu_v4i32_from_f32_scalar_to_vector_simd(float %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzu_v4i32_from_f32_scalar_to_vector_simd:
-; CHECK-NOFPRCVT:       // %bb.0:
-; CHECK-NOFPRCVT-NEXT:    fcvtzu s0, s0
-; CHECK-NOFPRCVT-NEXT:    ret
-;
-; CHECK-LABEL: fcvtzu_v4i32_from_f32_scalar_to_vector_simd:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu s0, s0
-; CHECK-NEXT:    ret
-  %fcvtzu_scalar = fptoui float %a to i32
-  %fcvtzu_vector = insertelement <4 x i32> poison, i32 %fcvtzu_scalar, i32 0
-  ret <4 x i32> %fcvtzu_vector
-}
-
-define <4 x i32> @fcvtzu_v4i32_from_f64_scalar_to_vector_simd(double %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzu_v4i32_from_f64_scalar_to_vector_simd:
-; CHECK-NOFPRCVT:       // %bb.0:
-; CHECK-NOFPRCVT-NEXT:    fcvtzu w8, d0
-; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
-; CHECK-NOFPRCVT-NEXT:    ret
-;
-; CHECK-LABEL: fcvtzu_v4i32_from_f64_scalar_to_vector_simd:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu s0, d0
-; CHECK-NEXT:    ret
-  %fcvtzu_scalar = fptoui double %a to i32
-  %fcvtzu_vector = insertelement <4 x i32> poison, i32 %fcvtzu_scalar, i32 0
-  ret <4 x i32> %fcvtzu_vector
-}
-
-define <1 x i64> @fcvtzu_v1i64_from_f16_scalar_to_vector_simd(half %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzu_v1i64_from_f16_scalar_to_vector_simd:
-; CHECK-NOFPRCVT:       // %bb.0:
-; CHECK-NOFPRCVT-NEXT:    fcvtzu x8, h0
-; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
-; CHECK-NOFPRCVT-NEXT:    ret
-;
-; CHECK-LABEL: fcvtzu_v1i64_from_f16_scalar_to_vector_simd:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu d0, h0
-; CHECK-NEXT:    ret
-  %fcvtzu_scalar = fptoui half %a to i64
-  %fcvtzu_vector = insertelement <1 x i64> poison, i64 %fcvtzu_scalar, i32 0
-  ret <1 x i64> %fcvtzu_vector
-}
-
-define <1 x i64> @fcvtzu_v1i64_from_f32_scalar_to_vector_simd(float %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzu_v1i64_from_f32_scalar_to_vector_simd:
-; CHECK-NOFPRCVT:       // %bb.0:
-; CHECK-NOFPRCVT-NEXT:    fcvtzu x8, s0
-; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
-; CHECK-NOFPRCVT-NEXT:    ret
-;
-; CHECK-LABEL: fcvtzu_v1i64_from_f32_scalar_to_vector_simd:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu d0, s0
-; CHECK-NEXT:    ret
-  %fcvtzu_scalar = fptoui float %a to i64
-  %fcvtzu_vector = insertelement <1 x i64> poison, i64 %fcvtzu_scalar, i32 0
-  ret <1 x i64> %fcvtzu_vector
-}
-
-define <1 x i64> @fcvtzu_v1i64_from_f64_scalar_to_vector_simd(double %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzu_v1i64_from_f64_scalar_to_vector_simd:
-; CHECK-NOFPRCVT:       // %bb.0:
-; CHECK-NOFPRCVT-NEXT:    fcvtzu d0, d0
-; CHECK-NOFPRCVT-NEXT:    ret
-;
-; CHECK-LABEL: fcvtzu_v1i64_from_f64_scalar_to_vector_simd:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu d0, d0
-; CHECK-NEXT:    ret
-  %fcvtzu_scalar = fptoui double %a to i64
-  %fcvtzu_vector = insertelement <1 x i64> poison, i64 %fcvtzu_scalar, i32 0
-  ret <1 x i64> %fcvtzu_vector
-}
-
-define <2 x i64> @fcvtzu_v2i64_from_f16_scalar_to_vector_simd(half %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzu_v2i64_from_f16_scalar_to_vector_simd:
-; CHECK-NOFPRCVT:       // %bb.0:
-; CHECK-NOFPRCVT-NEXT:    fcvtzu x8, h0
-; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
-; CHECK-NOFPRCVT-NEXT:    ret
-;
-; CHECK-LABEL: fcvtzu_v2i64_from_f16_scalar_to_vector_simd:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu d0, h0
-; CHECK-NEXT:    ret
-  %fcvtzu_scalar = fptoui half %a to i64
-  %fcvtzu_vector = insertelement <2 x i64> poison, i64 %fcvtzu_scalar, i32 0
-  ret <2 x i64> %fcvtzu_vector
-}
-
-define <2 x i64> @fcvtzu_v2i64_from_f32_scalar_to_vector_simd(float %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzu_v2i64_from_f32_scalar_to_vector_simd:
-; CHECK-NOFPRCVT:       // %bb.0:
-; CHECK-NOFPRCVT-NEXT:    fcvtzu x8, s0
-; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
-; CHECK-NOFPRCVT-NEXT:    ret
-;
-; CHECK-LABEL: fcvtzu_v2i64_from_f32_scalar_to_vector_simd:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu d0, s0
-; CHECK-NEXT:    ret
-  %fcvtzu_scalar = fptoui float %a to i64
-  %fcvtzu_vector = insertelement <2 x i64> poison, i64 %fcvtzu_scalar, i32 0
-  ret <2 x i64> %fcvtzu_vector
-}
-
-define <2 x i64> @fcvtzu_v2i64_from_f64_scalar_to_vector_simd(double %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzu_v2i64_from_f64_scalar_to_vector_simd:
-; CHECK-NOFPRCVT:       // %bb.0:
-; CHECK-NOFPRCVT-NEXT:    fcvtzu d0, d0
-; CHECK-NOFPRCVT-NEXT:    ret
-;
-; CHECK-LABEL: fcvtzu_v2i64_from_f64_scalar_to_vector_simd:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu d0, d0
-; CHECK-NEXT:    ret
-  %fcvtzu_scalar = fptoui double %a to i64
-  %fcvtzu_vector = insertelement <2 x i64> poison, i64 %fcvtzu_scalar, i32 0
-  ret <2 x i64> %fcvtzu_vector
-}
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-select_cc.ll b/llvm/test/CodeGen/AArch64/arm64-neon-select_cc.ll
index cad3fb58086d6..b72fbe0a91684 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-select_cc.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-select_cc.ll
@@ -114,9 +114,9 @@ define <8x i16> @test_select_cc_v8i16(i16 %a, i16 %b, <8x i16> %c, <8x i16> %d )
 define <2x i32> @test_select_cc_v2i32(i32 %a, i32 %b, <2x i32> %c, <2x i32> %d ) {
 ; CHECK-LABEL: test_select_cc_v2i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    fmov s2, w1
-; CHECK-NEXT:    fmov s3, w0
-; CHECK-NEXT:    cmeq v2.2s, v3.2s, v2.2s
+; CHECK-NEXT:    fmov s2, w0
+; CHECK-NEXT:    fmov s3, w1
+; CHECK-NEXT:    cmeq v2.2s, v2.2s, v3.2s
 ; CHECK-NEXT:    dup v2.2s, v2.s[0]
 ; CHECK-NEXT:    bif v0.8b, v1.8b, v2.8b
 ; CHECK-NEXT:    ret
@@ -128,9 +128,9 @@ define <2x i32> @test_select_cc_v2i32(i32 %a, i32 %b, <2x i32> %c, <2x i32> %d )
 define <4x i32> @test_select_cc_v4i32(i32 %a, i32 %b, <4x i32> %c, <4x i32> %d ) {
 ; CHECK-LABEL: test_select_cc_v4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    fmov s2, w1
-; CHECK-NEXT:    fmov s3, w0
-; CHECK-NEXT:    cmeq v2.4s, v3.4s, v2.4s
+; CHECK-NEXT:    fmov s2, w0
+; CHECK-NEXT:    fmov s3, w1
+; CHECK-NEXT:    cmeq v2.4s, v2.4s, v3.4s
 ; CHECK-NEXT:    dup v2.4s, v2.s[0]
 ; CHECK-NEXT:    bif v0.16b, v1.16b, v2.16b
 ; CHECK-NEXT:    ret
@@ -155,9 +155,9 @@ define <1x i64> @test_select_cc_v1i64(i64 %a, i64 %b, <1x i64> %c, <1x i64> %d )
 define <2x i64> @test_select_cc_v2i64(i64 %a, i64 %b, <2x i64> %c, <2x i64> %d ) {
 ; CHECK-LABEL: test_select_cc_v2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    fmov d2, x1
-; CHECK-NEXT:    fmov d3, x0
-; CHECK-NEXT:    cmeq v2.2d, v3.2d, v2.2d
+; CHECK-NEXT:    fmov d2, x0
+; CHECK-NEXT:    fmov d3, x1
+; CHECK-NEXT:    cmeq v2.2d, v2.2d, v3.2d
 ; CHECK-NEXT:    dup v2.2d, v2.d[0]
 ; CHECK-NEXT:    bif v0.16b, v1.16b, v2.16b
 ; CHECK-NEXT:    ret
@@ -210,9 +210,9 @@ define <4x float> @test_select_cc_v4f32(float %a, float %b, <4x float> %c, <4x f
 define <4x float> @test_select_cc_v4f32_icmp(i32 %a, i32 %b, <4x float> %c, <4x float> %d ) {
 ; CHECK-LABEL: test_select_cc_v4f32_icmp:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    fmov s2, w1
-; CHECK-NEXT:    fmov s3, w0
-; CHECK-NEXT:    cmeq v2.4s, v3.4s, v2.4s
+; CHECK-NEXT:    fmov s2, w0
+; CHECK-NEXT:    fmov s3, w1
+; CHECK-NEXT:    cmeq v2.4s, v2.4s, v3.4s
 ; CHECK-NEXT:    dup v2.4s, v2.s[0]
 ; CHECK-NEXT:    bif v0.16b, v1.16b, v2.16b
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll b/llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll
index cb14adc00df00..2b567d8f148f7 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll
@@ -405,15 +405,25 @@ define i16 @test_sqrdmlah_v1i16(i16 %acc, i16 %x, i16 %y) {
 }
 
 define i32 @test_sqrdmlah_v1i32(i32 %acc, i32 %x, i32 %y) {
-; CHECK-LABEL: test_sqrdmlah_v1i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fmov s0, w1
-; CHECK-NEXT:    fmov s1, w2
-; CHECK-NEXT:    sqrdmulh v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    fmov s1, w0
-; CHECK-NEXT:    sqadd v0.4s, v1.4s, v0.4s
-; CHECK-NEXT:    fmov w0, s0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_sqrdmlah_v1i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fmov s0, w2
+; CHECK-SD-NEXT:    fmov s1, w1
+; CHECK-SD-NEXT:    sqrdmulh v0.4s, v1.4s, v0.4s
+; CHECK-SD-NEXT:    fmov s1, w0
+; CHECK-SD-NEXT:    sqadd v0.4s, v1.4s, v0.4s
+; CHECK-SD-NEXT:    fmov w0, s0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_sqrdmlah_v1i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fmov s0, w1
+; CHECK-GI-NEXT:    fmov s1, w2
+; CHECK-GI-NEXT:    sqrdmulh v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    fmov s1, w0
+; CHECK-GI-NEXT:    sqadd v0.4s, v1.4s, v0.4s
+; CHECK-GI-NEXT:    fmov w0, s0
+; CHECK-GI-NEXT:    ret
   %x_vec = insertelement <4 x i32> undef, i32 %x, i64 0
   %y_vec = insertelement <4 x i32> undef, i32 %y, i64 0
   %prod_vec = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %x_vec,  <4 x i32> %y_vec)
@@ -444,15 +454,25 @@ define i16 @test_sqrdmlsh_v1i16(i16 %acc, i16 %x, i16 %y) {
 }
 
 define i32 @test_sqrdmlsh_v1i32(i32 %acc, i32 %x, i32 %y) {
-; CHECK-LABEL: test_sqrdmlsh_v1i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fmov s0, w1
-; CHECK-NEXT:    fmov s1, w2
-; CHECK-NEXT:    sqrdmulh v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    fmov s1, w0
-; CHECK-NEXT:    sqsub v0.4s, v1.4s, v0.4s
-; CHECK-NEXT:    fmov w0, s0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_sqrdmlsh_v1i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fmov s0, w2
+; CHECK-SD-NEXT:    fmov s1, w1
+; CHECK-SD-NEXT:    sqrdmulh v0.4s, v1.4s, v0.4s
+; CHECK-SD-NEXT:    fmov s1, w0
+; CHECK-SD-NEXT:    sqsub v0.4s, v1.4s, v0.4s
+; CHECK-SD-NEXT:    fmov w0, s0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_sqrdmlsh_v1i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fmov s0, w1
+; CHECK-GI-NEXT:    fmov s1, w2
+; CHECK-GI-NEXT:    sqrdmulh v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    fmov s1, w0
+; CHECK-GI-NEXT:    sqsub v0.4s, v1.4s, v0.4s
+; CHECK-GI-NEXT:    fmov w0, s0
+; CHECK-GI-NEXT:    ret
   %x_vec = insertelement <4 x i32> undef, i32 %x, i64 0
   %y_vec = insertelement <4 x i32> undef, i32 %y, i64 0
   %prod_vec = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %x_vec,  <4 x i32> %y_vec)
diff --git a/llvm/test/CodeGen/AArch64/arm64-vshift.ll b/llvm/test/CodeGen/AArch64/arm64-vshift.ll
index 9743639d99d9b..360183b5006b6 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vshift.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vshift.ll
@@ -2555,8 +2555,7 @@ define <1 x i64> @neon_ushl_vscalar_constant_shift(ptr %A) nounwind {
 define i64 @neon_ushl_scalar_constant_shift(ptr %A) nounwind {
 ; CHECK-SD-LABEL: neon_ushl_scalar_constant_shift:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    ldr w8, [x0]
-; CHECK-SD-NEXT:    fmov d0, x8
+; CHECK-SD-NEXT:    ldr s0, [x0]
 ; CHECK-SD-NEXT:    shl d0, d0, #1
 ; CHECK-SD-NEXT:    fmov x0, d0
 ; CHECK-SD-NEXT:    ret
@@ -2848,8 +2847,7 @@ define <1 x i64> @neon_sshll_vscalar_constant_shift(ptr %A) nounwind {
 define i64 @neon_sshll_scalar_constant_shift(ptr %A) nounwind {
 ; CHECK-SD-LABEL: neon_sshll_scalar_constant_shift:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    ldr w8, [x0]
-; CHECK-SD-NEXT:    fmov d0, x8
+; CHECK-SD-NEXT:    ldr s0, [x0]
 ; CHECK-SD-NEXT:    shl d0, d0, #1
 ; CHECK-SD-NEXT:    fmov x0, d0
 ; CHECK-SD-NEXT:    ret
@@ -2872,8 +2870,7 @@ define i64 @neon_sshll_scalar_constant_shift(ptr %A) nounwind {
 define i64 @neon_sshll_scalar_constant_shift_m1(ptr %A) nounwind {
 ; CHECK-SD-LABEL: neon_sshll_scalar_constant_shift_m1:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    ldr w8, [x0]
-; CHECK-SD-NEXT:    fmov d0, x8
+; CHECK-SD-NEXT:    ldr s0, [x0]
 ; CHECK-SD-NEXT:    sshr d0, d0, #1
 ; CHECK-SD-NEXT:    fmov x0, d0
 ; CHECK-SD-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/avoid-pre-trunc.ll b/llvm/test/CodeGen/AArch64/avoid-pre-trunc.ll
index d7a2a83cf3660..bd9162e36f299 100644
--- a/llvm/test/CodeGen/AArch64/avoid-pre-trunc.ll
+++ b/llvm/test/CodeGen/AArch64/avoid-pre-trunc.ll
@@ -107,20 +107,20 @@ define <4 x i32> @lower_trunc_4xi32(i64 %a, i64 %b, i64 %c, i64 %d) {
 define <8 x i32> @lower_trunc_8xi32(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g, i64 %h) {
 ; CHECK-LABEL: lower_trunc_8xi32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    fmov d0, x6
-; CHECK-NEXT:    fmov d1, x4
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    fmov d1, x6
 ; CHECK-NEXT:    fmov d2, x2
-; CHECK-NEXT:    fmov d3, x0
-; CHECK-NEXT:    mov v0.d[1], x7
-; CHECK-NEXT:    mov v1.d[1], x5
+; CHECK-NEXT:    fmov d3, x4
+; CHECK-NEXT:    mov v1.d[1], x7
 ; CHECK-NEXT:    mov v2.d[1], x3
-; CHECK-NEXT:    mov v3.d[1], x1
-; CHECK-NEXT:    uzp1 v1.4s, v1.4s, v0.4s
-; CHECK-NEXT:    uzp1 v2.4s, v3.4s, v2.4s
-; CHECK-NEXT:    add v3.4s, v1.4s, v1.4s
-; CHECK-NEXT:    add v0.4s, v2.4s, v2.4s
-; CHECK-NEXT:    eor v1.16b, v1.16b, v3.16b
-; CHECK-NEXT:    eor v0.16b, v2.16b, v0.16b
+; CHECK-NEXT:    mov v0.d[1], x1
+; CHECK-NEXT:    mov v3.d[1], x5
+; CHECK-NEXT:    uzp1 v0.4s, v0.4s, v2.4s
+; CHECK-NEXT:    uzp1 v1.4s, v3.4s, v1.4s
+; CHECK-NEXT:    add v3.4s, v0.4s, v0.4s
+; CHECK-NEXT:    add v2.4s, v1.4s, v1.4s
+; CHECK-NEXT:    eor v0.16b, v0.16b, v3.16b
+; CHECK-NEXT:    eor v1.16b, v1.16b, v2.16b
 ; CHECK-NEXT:    ret
   %a1 = insertelement <8 x i64> poison, i64 %a, i64 0
   %b1 = insertelement <8 x i64> %a1, i64 %b, i64 1
diff --git a/llvm/test/CodeGen/AArch64/concat-vector-add-combine.ll b/llvm/test/CodeGen/AArch64/concat-vector-add-combine.ll
index 545da98034527..171f74149c905 100644
--- a/llvm/test/CodeGen/AArch64/concat-vector-add-combine.ll
+++ b/llvm/test/CodeGen/AArch64/concat-vector-add-combine.ll
@@ -60,16 +60,16 @@ define i16 @combine_add_16xi16(i16 %a, i16 %b, i16 %c, i16 %d, i16 %e, i16 %f, i
 define i32 @combine_add_8xi32(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h) local_unnamed_addr #0 {
 ; CHECK-LABEL: combine_add_8xi32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    fmov s0, w4
-; CHECK-NEXT:    fmov s1, w0
-; CHECK-NEXT:    mov v0.s[1], w5
-; CHECK-NEXT:    mov v1.s[1], w1
-; CHECK-NEXT:    mov v0.s[2], w6
-; CHECK-NEXT:    mov v1.s[2], w2
-; CHECK-NEXT:    mov v0.s[3], w7
-; CHECK-NEXT:    mov v1.s[3], w3
-; CHECK-NEXT:    uzp2 v2.8h, v1.8h, v0.8h
-; CHECK-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-NEXT:    fmov s0, w0
+; CHECK-NEXT:    fmov s1, w4
+; CHECK-NEXT:    mov v1.s[1], w5
+; CHECK-NEXT:    mov v0.s[1], w1
+; CHECK-NEXT:    mov v1.s[2], w6
+; CHECK-NEXT:    mov v0.s[2], w2
+; CHECK-NEXT:    mov v1.s[3], w7
+; CHECK-NEXT:    mov v0.s[3], w3
+; CHECK-NEXT:    uzp2 v2.8h, v0.8h, v1.8h
+; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
 ; CHECK-NEXT:    uhadd v0.8h, v0.8h, v2.8h
 ; CHECK-NEXT:    uaddlv s0, v0.8h
 ; CHECK-NEXT:    fmov w0, s0
diff --git a/llvm/test/CodeGen/AArch64/ctpop.ll b/llvm/test/CodeGen/AArch64/ctpop.ll
index 9c59f1b233b5d..b7dfb79477b64 100644
--- a/llvm/test/CodeGen/AArch64/ctpop.ll
+++ b/llvm/test/CodeGen/AArch64/ctpop.ll
@@ -395,24 +395,24 @@ entry:
 define <3 x i128> @v3i128(<3 x i128> %d) {
 ; CHECK-SD-LABEL: v3i128:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    fmov d0, x4
+; CHECK-SD-NEXT:    fmov d0, x0
 ; CHECK-SD-NEXT:    fmov d1, x2
-; CHECK-SD-NEXT:    fmov d2, x0
-; CHECK-SD-NEXT:    mov v0.d[1], x5
+; CHECK-SD-NEXT:    fmov d2, x4
+; CHECK-SD-NEXT:    mov v2.d[1], x5
 ; CHECK-SD-NEXT:    mov v1.d[1], x3
-; CHECK-SD-NEXT:    mov v2.d[1], x1
+; CHECK-SD-NEXT:    mov v0.d[1], x1
 ; CHECK-SD-NEXT:    mov x1, xzr
 ; CHECK-SD-NEXT:    mov x3, xzr
 ; CHECK-SD-NEXT:    mov x5, xzr
-; CHECK-SD-NEXT:    cnt v0.16b, v0.16b
-; CHECK-SD-NEXT:    cnt v1.16b, v1.16b
 ; CHECK-SD-NEXT:    cnt v2.16b, v2.16b
-; CHECK-SD-NEXT:    addv b0, v0.16b
-; CHECK-SD-NEXT:    addv b1, v1.16b
+; CHECK-SD-NEXT:    cnt v1.16b, v1.16b
+; CHECK-SD-NEXT:    cnt v0.16b, v0.16b
 ; CHECK-SD-NEXT:    addv b2, v2.16b
-; CHECK-SD-NEXT:    fmov x0, d2
+; CHECK-SD-NEXT:    addv b1, v1.16b
+; CHECK-SD-NEXT:    addv b0, v0.16b
+; CHECK-SD-NEXT:    fmov x0, d0
 ; CHECK-SD-NEXT:    fmov x2, d1
-; CHECK-SD-NEXT:    fmov x4, d0
+; CHECK-SD-NEXT:    fmov x4, d2
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: v3i128:
@@ -444,30 +444,30 @@ entry:
 define <4 x i128> @v4i128(<4 x i128> %d) {
 ; CHECK-SD-LABEL: v4i128:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    fmov d0, x6
-; CHECK-SD-NEXT:    fmov d1, x4
-; CHECK-SD-NEXT:    fmov d2, x2
-; CHECK-SD-NEXT:    fmov d3, x0
-; CHECK-SD-NEXT:    mov v1.d[1], x5
-; CHECK-SD-NEXT:    mov v2.d[1], x3
-; CHECK-SD-NEXT:    mov v0.d[1], x7
-; CHECK-SD-NEXT:    mov v3.d[1], x1
+; CHECK-SD-NEXT:    fmov d0, x0
+; CHECK-SD-NEXT:    fmov d1, x2
+; CHECK-SD-NEXT:    fmov d2, x4
+; CHECK-SD-NEXT:    fmov d3, x6
+; CHECK-SD-NEXT:    mov v2.d[1], x5
+; CHECK-SD-NEXT:    mov v1.d[1], x3
+; CHECK-SD-NEXT:    mov v0.d[1], x1
+; CHECK-SD-NEXT:    mov v3.d[1], x7
 ; CHECK-SD-NEXT:    mov x1, xzr
 ; CHECK-SD-NEXT:    mov x3, xzr
 ; CHECK-SD-NEXT:    mov x5, xzr
 ; CHECK-SD-NEXT:    mov x7, xzr
-; CHECK-SD-NEXT:    cnt v1.16b, v1.16b
 ; CHECK-SD-NEXT:    cnt v2.16b, v2.16b
+; CHECK-SD-NEXT:    cnt v1.16b, v1.16b
 ; CHECK-SD-NEXT:    cnt v0.16b, v0.16b
 ; CHECK-SD-NEXT:    cnt v3.16b, v3.16b
-; CHECK-SD-NEXT:    addv b1, v1.16b
 ; CHECK-SD-NEXT:    addv b2, v2.16b
+; CHECK-SD-NEXT:    addv b1, v1.16b
 ; CHECK-SD-NEXT:    addv b0, v0.16b
 ; CHECK-SD-NEXT:    addv b3, v3.16b
-; CHECK-SD-NEXT:    fmov x2, d2
-; CHECK-SD-NEXT:    fmov x4, d1
-; CHECK-SD-NEXT:    fmov x6, d0
-; CHECK-SD-NEXT:    fmov x0, d3
+; CHECK-SD-NEXT:    fmov x0, d0
+; CHECK-SD-NEXT:    fmov x2, d1
+; CHECK-SD-NEXT:    fmov x4, d2
+; CHECK-SD-NEXT:    fmov x6, d3
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: v4i128:
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
index 2417205759767..05e4b414dc072 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
@@ -1161,24 +1161,17 @@ declare <7 x i32> @llvm.fptosi.sat.v7f16.v7i32 (<7 x half>)
 declare <8 x i32> @llvm.fptosi.sat.v8f16.v8i32 (<8 x half>)
 
 define <1 x i32> @test_signed_v1f16_v1i32(<1 x half> %f) {
-; CHECK-SD-CVT-LABEL: test_signed_v1f16_v1i32:
-; CHECK-SD-CVT:       // %bb.0:
-; CHECK-SD-CVT-NEXT:    fcvt s0, h0
-; CHECK-SD-CVT-NEXT:    fcvtzs w8, s0
-; CHECK-SD-CVT-NEXT:    fmov s0, w8
-; CHECK-SD-CVT-NEXT:    ret
+; CHECK-CVT-LABEL: test_signed_v1f16_v1i32:
+; CHECK-CVT:       // %bb.0:
+; CHECK-CVT-NEXT:    fcvt s0, h0
+; CHECK-CVT-NEXT:    fcvtzs s0, s0
+; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: test_signed_v1f16_v1i32:
 ; CHECK-FP16:       // %bb.0:
 ; CHECK-FP16-NEXT:    fcvtzs w8, h0
 ; CHECK-FP16-NEXT:    fmov s0, w8
 ; CHECK-FP16-NEXT:    ret
-;
-; CHECK-GI-CVT-LABEL: test_signed_v1f16_v1i32:
-; CHECK-GI-CVT:       // %bb.0:
-; CHECK-GI-CVT-NEXT:    fcvt s0, h0
-; CHECK-GI-CVT-NEXT:    fcvtzs s0, s0
-; CHECK-GI-CVT-NEXT:    ret
     %x = call <1 x i32> @llvm.fptosi.sat.v1f16.v1i32(<1 x half> %f)
     ret <1 x i32> %x
 }
diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
index ecca1165753bf..e461ace0ea0a2 100644
--- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
@@ -992,24 +992,17 @@ declare <7 x i32> @llvm.fptoui.sat.v7f16.v7i32 (<7 x half>)
 declare <8 x i32> @llvm.fptoui.sat.v8f16.v8i32 (<8 x half>)
 
 define <1 x i32> @test_unsigned_v1f16_v1i32(<1 x half> %f) {
-; CHECK-SD-CVT-LABEL: test_unsigned_v1f16_v1i32:
-; CHECK-SD-CVT:       // %bb.0:
-; CHECK-SD-CVT-NEXT:    fcvt s0, h0
-; CHECK-SD-CVT-NEXT:    fcvtzu w8, s0
-; CHECK-SD-CVT-NEXT:    fmov s0, w8
-; CHECK-SD-CVT-NEXT:    ret
+; CHECK-CVT-LABEL: test_unsigned_v1f16_v1i32:
+; CHECK-CVT:       // %bb.0:
+; CHECK-CVT-NEXT:    fcvt s0, h0
+; CHECK-CVT-NEXT:    fcvtzu s0, s0
+; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: test_unsigned_v1f16_v1i32:
 ; CHECK-FP16:       // %bb.0:
 ; CHECK-FP16-NEXT:    fcvtzu w8, h0
 ; CHECK-FP16-NEXT:    fmov s0, w8
 ; CHECK-FP16-NEXT:    ret
-;
-; CHECK-GI-CVT-LABEL: test_unsigned_v1f16_v1i32:
-; CHECK-GI-CVT:       // %bb.0:
-; CHECK-GI-CVT-NEXT:    fcvt s0, h0
-; CHECK-GI-CVT-NEXT:    fcvtzu s0, s0
-; CHECK-GI-CVT-NEXT:    ret
     %x = call <1 x i32> @llvm.fptoui.sat.v1f16.v1i32(<1 x half> %f)
     ret <1 x i32> %x
 }
diff --git a/llvm/test/CodeGen/AArch64/fsh.ll b/llvm/test/CodeGen/AArch64/fsh.ll
index 1db776ea6f616..3ccbdb6b9d30a 100644
--- a/llvm/test/CodeGen/AArch64/fsh.ll
+++ b/llvm/test/CodeGen/AArch64/fsh.ll
@@ -2611,25 +2611,25 @@ define <7 x i32> @fshr_v7i32(<7 x i32> %a, <7 x i32> %b, <7 x i32> %c) {
 ; CHECK-SD-NEXT:    mov v3.s[2], w6
 ; CHECK-SD-NEXT:    ld1 { v4.s }[2], [x9]
 ; CHECK-SD-NEXT:    ld1 { v6.s }[1], [x8]
-; CHECK-SD-NEXT:    bic v16.16b, v5.16b, v2.16b
-; CHECK-SD-NEXT:    and v2.16b, v2.16b, v5.16b
+; CHECK-SD-NEXT:    and v16.16b, v2.16b, v5.16b
 ; CHECK-SD-NEXT:    add x8, sp, #40
 ; CHECK-SD-NEXT:    add x9, sp, #16
+; CHECK-SD-NEXT:    bic v2.16b, v5.16b, v2.16b
 ; CHECK-SD-NEXT:    mov v1.s[3], w3
 ; CHECK-SD-NEXT:    and v7.16b, v0.16b, v5.16b
 ; CHECK-SD-NEXT:    bic v0.16b, v5.16b, v0.16b
 ; CHECK-SD-NEXT:    ld1 { v4.s }[3], [x9]
 ; CHECK-SD-NEXT:    ld1 { v6.s }[2], [x8]
 ; CHECK-SD-NEXT:    add v3.4s, v3.4s, v3.4s
-; CHECK-SD-NEXT:    neg v2.4s, v2.4s
 ; CHECK-SD-NEXT:    neg v5.4s, v7.4s
+; CHECK-SD-NEXT:    neg v7.4s, v16.4s
 ; CHECK-SD-NEXT:    add v1.4s, v1.4s, v1.4s
-; CHECK-SD-NEXT:    ushl v3.4s, v3.4s, v16.4s
-; CHECK-SD-NEXT:    ushl v2.4s, v6.4s, v2.4s
+; CHECK-SD-NEXT:    ushl v4.4s, v4.4s, v5.4s
 ; CHECK-SD-NEXT:    ushl v0.4s, v1.4s, v0.4s
-; CHECK-SD-NEXT:    ushl v1.4s, v4.4s, v5.4s
-; CHECK-SD-NEXT:    orr v0.16b, v0.16b, v1.16b
-; CHECK-SD-NEXT:    orr v1.16b, v3.16b, v2.16b
+; CHECK-SD-NEXT:    ushl v1.4s, v3.4s, v2.4s
+; CHECK-SD-NEXT:    ushl v2.4s, v6.4s, v7.4s
+; CHECK-SD-NEXT:    orr v0.16b, v0.16b, v4.16b
+; CHECK-SD-NEXT:    orr v1.16b, v1.16b, v2.16b
 ; CHECK-SD-NEXT:    mov w1, v0.s[1]
 ; CHECK-SD-NEXT:    mov w2, v0.s[2]
 ; CHECK-SD-NEXT:    mov w3, v0.s[3]
diff --git a/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll b/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll
index ae71cd00b9aa4..b94a26fd0e80b 100644
--- a/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll
+++ b/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll
@@ -25,7 +25,7 @@ define dso_local void @run_test() local_unnamed_addr uwtable {
 ; CHECK-NEXT:    .cfi_offset b13, -64
 ; CHECK-NEXT:    .cfi_offset b14, -72
 ; CHECK-NEXT:    .cfi_offset b15, -80
-; CHECK-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-NEXT:    movi v7.2d, #0000000000000000
 ; CHECK-NEXT:    adrp x14, B+48
 ; CHECK-NEXT:    add x14, x14, :lo12:B+48
 ; CHECK-NEXT:    // implicit-def: $q18
@@ -43,15 +43,15 @@ define dso_local void @run_test() local_unnamed_addr uwtable {
 ; CHECK-NEXT:    // implicit-def: $q3
 ; CHECK-NEXT:    // implicit-def: $q4
 ; CHECK-NEXT:    // implicit-def: $q5
-; CHECK-NEXT:    // implicit-def: $q6
+; CHECK-NEXT:    // implicit-def: $q1
 ; CHECK-NEXT:    // implicit-def: $q16
 ; CHECK-NEXT:    // implicit-def: $q17
-; CHECK-NEXT:    // implicit-def: $q7
+; CHECK-NEXT:    // implicit-def: $q6
 ; CHECK-NEXT:    // implicit-def: $q19
 ; CHECK-NEXT:    // implicit-def: $q20
 ; CHECK-NEXT:    // implicit-def: $q21
 ; CHECK-NEXT:    // implicit-def: $q22
-; CHECK-NEXT:    // implicit-def: $q24
+; CHECK-NEXT:    // implicit-def: $q12
 ; CHECK-NEXT:    // implicit-def: $q23
 ; CHECK-NEXT:    // implicit-def: $q25
 ; CHECK-NEXT:    // implicit-def: $q26
@@ -59,7 +59,7 @@ define dso_local void @run_test() local_unnamed_addr uwtable {
 ; CHECK-NEXT:    // implicit-def: $q30
 ; CHECK-NEXT:    // implicit-def: $q8
 ; CHECK-NEXT:    // implicit-def: $q11
-; CHECK-NEXT:    // implicit-def: $q12
+; CHECK-NEXT:    // implicit-def: $q28
 ; CHECK-NEXT:    // implicit-def: $q29
 ; CHECK-NEXT:    // implicit-def: $q13
 ; CHECK-NEXT:    // implicit-def: $q10
@@ -69,111 +69,107 @@ define dso_local void @run_test() local_unnamed_addr uwtable {
 ; CHECK-NEXT:    // kill: killed $q18
 ; CHECK-NEXT:  .LBB0_1: // %for.cond1.preheader
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldr x17, [x8]
+; CHECK-NEXT:    ldr x18, [x8]
 ; CHECK-NEXT:    ldr x15, [x8]
 ; CHECK-NEXT:    mov v18.16b, v0.16b
 ; CHECK-NEXT:    ldr x16, [x9]
 ; CHECK-NEXT:    stp q15, q4, [sp] // 32-byte Folded Spill
 ; CHECK-NEXT:    add x5, x10, x11
-; CHECK-NEXT:    mul x1, x15, x17
+; CHECK-NEXT:    mul x1, x15, x18
 ; CHECK-NEXT:    ldr x2, [x13], #64
-; CHECK-NEXT:    ldr x5, [x5, #128]
-; CHECK-NEXT:    stp q7, q23, [sp, #32] // 32-byte Folded Spill
-; CHECK-NEXT:    ldr x14, [x14, #8]
-; CHECK-NEXT:    mul x0, x17, x17
+; CHECK-NEXT:    stp q6, q23, [sp, #32] // 32-byte Folded Spill
 ; CHECK-NEXT:    ldr q23, [sp, #80] // 16-byte Reload
+; CHECK-NEXT:    ldr x14, [x14, #8]
+; CHECK-NEXT:    mul x0, x18, x18
+; CHECK-NEXT:    ldr x5, [x5, #128]
 ; CHECK-NEXT:    mov v9.16b, v30.16b
 ; CHECK-NEXT:    mov v30.16b, v25.16b
 ; CHECK-NEXT:    mov v25.16b, v20.16b
-; CHECK-NEXT:    mov v20.16b, v6.16b
-; CHECK-NEXT:    mul x18, x16, x17
-; CHECK-NEXT:    mov v6.16b, v1.16b
-; CHECK-NEXT:    mov v28.16b, v24.16b
-; CHECK-NEXT:    fmov d14, x1
-; CHECK-NEXT:    mov v24.16b, v19.16b
-; CHECK-NEXT:    mov v19.16b, v5.16b
-; CHECK-NEXT:    mul x4, x2, x17
+; CHECK-NEXT:    mov v20.16b, v1.16b
+; CHECK-NEXT:    mul x17, x16, x18
 ; CHECK-NEXT:    mov v31.16b, v26.16b
 ; CHECK-NEXT:    mov v26.16b, v21.16b
-; CHECK-NEXT:    fmov d15, x0
+; CHECK-NEXT:    fmov d14, x1
 ; CHECK-NEXT:    mov v21.16b, v16.16b
 ; CHECK-NEXT:    mov v16.16b, v2.16b
-; CHECK-NEXT:    mov v0.16b, v14.16b
-; CHECK-NEXT:    mul x20, x2, x5
-; CHECK-NEXT:    mov v7.16b, v10.16b
+; CHECK-NEXT:    mul x4, x2, x18
+; CHECK-NEXT:    mov v6.16b, v10.16b
 ; CHECK-NEXT:    mov v10.16b, v17.16b
+; CHECK-NEXT:    fmov d15, x0
 ; CHECK-NEXT:    mov v17.16b, v3.16b
+; CHECK-NEXT:    mov v24.16b, v19.16b
+; CHECK-NEXT:    mov v0.16b, v14.16b
+; CHECK-NEXT:    mul x3, x14, x18
+; CHECK-NEXT:    mov v19.16b, v5.16b
 ; CHECK-NEXT:    add x11, x11, #8
-; CHECK-NEXT:    mov v15.d[1], x18
-; CHECK-NEXT:    mul x3, x14, x17
+; CHECK-NEXT:    add x12, x12, #1
+; CHECK-NEXT:    mov v15.d[1], x17
+; CHECK-NEXT:    mul x6, x15, x15
 ; CHECK-NEXT:    cmp x11, #64
 ; CHECK-NEXT:    mov v0.d[1], x1
 ; CHECK-NEXT:    fmov d1, x4
-; CHECK-NEXT:    add x12, x12, #1
-; CHECK-NEXT:    mul x17, x17, x5
-; CHECK-NEXT:    fmov d5, x20
-; CHECK-NEXT:    mul x6, x15, x15
+; CHECK-NEXT:    mul x7, x15, x5
+; CHECK-NEXT:    mul x18, x18, x5
+; CHECK-NEXT:    mov v1.d[1], x3
 ; CHECK-NEXT:    add v23.2d, v23.2d, v0.2d
 ; CHECK-NEXT:    ldr q0, [sp, #64] // 16-byte Reload
-; CHECK-NEXT:    mov v1.d[1], x3
-; CHECK-NEXT:    mul x7, x15, x5
-; CHECK-NEXT:    add v0.2d, v0.2d, v15.2d
-; CHECK-NEXT:    fmov d2, x17
-; CHECK-NEXT:    mul x0, x14, x5
 ; CHECK-NEXT:    fmov d4, x6
+; CHECK-NEXT:    mul x20, x2, x5
+; CHECK-NEXT:    add v0.2d, v0.2d, v15.2d
+; CHECK-NEXT:    fmov d3, x7
 ; CHECK-NEXT:    mul x19, x16, x5
+; CHECK-NEXT:    mov v4.d[1], x6
+; CHECK-NEXT:    fmov d2, x18
+; CHECK-NEXT:    mul x0, x14, x5
 ; CHECK-NEXT:    stp q0, q23, [sp, #64] // 32-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #96] // 16-byte Reload
-; CHECK-NEXT:    fmov d3, x7
+; CHECK-NEXT:    fmov d5, x20
+; CHECK-NEXT:    mov v3.d[1], x7
 ; CHECK-NEXT:    ldr q23, [sp, #48] // 16-byte Reload
 ; CHECK-NEXT:    mul x17, x2, x15
 ; CHECK-NEXT:    add v0.2d, v0.2d, v15.2d
 ; CHECK-NEXT:    ldr q15, [sp] // 16-byte Reload
-; CHECK-NEXT:    mov v5.d[1], x0
-; CHECK-NEXT:    mov v4.d[1], x6
+; CHECK-NEXT:    mov v2.d[1], x19
+; CHECK-NEXT:    add v13.2d, v13.2d, v4.2d
+; CHECK-NEXT:    add v12.2d, v12.2d, v4.2d
 ; CHECK-NEXT:    mul x16, x16, x15
-; CHECK-NEXT:    mov v3.d[1], x7
 ; CHECK-NEXT:    add v15.2d, v15.2d, v1.2d
-; CHECK-NEXT:    mov v2.d[1], x19
+; CHECK-NEXT:    mov v1.16b, v20.16b
+; CHECK-NEXT:    mov v5.d[1], x0
 ; CHECK-NEXT:    str q0, [sp, #96] // 16-byte Spill
-; CHECK-NEXT:    mov v1.16b, v6.16b
-; CHECK-NEXT:    mul x14, x14, x15
-; CHECK-NEXT:    mov v6.16b, v20.16b
 ; CHECK-NEXT:    mov v20.16b, v25.16b
-; CHECK-NEXT:    fmov d0, x17
+; CHECK-NEXT:    mul x14, x14, x15
 ; CHECK-NEXT:    mov v25.16b, v30.16b
-; CHECK-NEXT:    add v30.2d, v9.2d, v5.2d
-; CHECK-NEXT:    mov v5.16b, v19.16b
-; CHECK-NEXT:    mov v19.16b, v24.16b
 ; CHECK-NEXT:    add v11.2d, v11.2d, v3.2d
-; CHECK-NEXT:    mov v14.d[1], x16
+; CHECK-NEXT:    fmov d0, x17
 ; CHECK-NEXT:    mov v3.16b, v17.16b
 ; CHECK-NEXT:    mov v17.16b, v10.16b
-; CHECK-NEXT:    mov v10.16b, v7.16b
+; CHECK-NEXT:    mov v10.16b, v6.16b
 ; CHECK-NEXT:    add v8.2d, v8.2d, v2.2d
 ; CHECK-NEXT:    mov v2.16b, v16.16b
-; CHECK-NEXT:    mov v0.d[1], x14
+; CHECK-NEXT:    mov v14.d[1], x16
 ; CHECK-NEXT:    mov v16.16b, v21.16b
 ; CHECK-NEXT:    mov v21.16b, v26.16b
-; CHECK-NEXT:    add v13.2d, v13.2d, v4.2d
+; CHECK-NEXT:    add v30.2d, v9.2d, v5.2d
+; CHECK-NEXT:    mov v5.16b, v19.16b
 ; CHECK-NEXT:    add v26.2d, v31.2d, v4.2d
-; CHECK-NEXT:    add v24.2d, v28.2d, v4.2d
-; CHECK-NEXT:    add v19.2d, v19.2d, v4.2d
-; CHECK-NEXT:    add v6.2d, v6.2d, v4.2d
+; CHECK-NEXT:    mov v0.d[1], x14
+; CHECK-NEXT:    add v19.2d, v24.2d, v4.2d
 ; CHECK-NEXT:    add v1.2d, v1.2d, v4.2d
-; CHECK-NEXT:    ldp q4, q7, [sp, #16] // 32-byte Folded Reload
+; CHECK-NEXT:    add v7.2d, v7.2d, v4.2d
+; CHECK-NEXT:    ldp q4, q6, [sp, #16] // 32-byte Folded Reload
 ; CHECK-NEXT:    add v10.2d, v10.2d, v14.2d
 ; CHECK-NEXT:    add v29.2d, v29.2d, v14.2d
 ; CHECK-NEXT:    add v27.2d, v27.2d, v14.2d
 ; CHECK-NEXT:    add v23.2d, v23.2d, v14.2d
 ; CHECK-NEXT:    add v22.2d, v22.2d, v14.2d
 ; CHECK-NEXT:    add v20.2d, v20.2d, v14.2d
+; CHECK-NEXT:    add v6.2d, v6.2d, v14.2d
 ; CHECK-NEXT:    add v16.2d, v16.2d, v14.2d
-; CHECK-NEXT:    add v7.2d, v7.2d, v14.2d
 ; CHECK-NEXT:    add v5.2d, v5.2d, v14.2d
 ; CHECK-NEXT:    add v3.2d, v3.2d, v14.2d
 ; CHECK-NEXT:    add v2.2d, v2.2d, v14.2d
-; CHECK-NEXT:    add v12.2d, v12.2d, v0.2d
+; CHECK-NEXT:    add v28.2d, v28.2d, v0.2d
 ; CHECK-NEXT:    add v25.2d, v25.2d, v0.2d
 ; CHECK-NEXT:    add v21.2d, v21.2d, v0.2d
 ; CHECK-NEXT:    add v17.2d, v17.2d, v0.2d
@@ -182,30 +178,30 @@ define dso_local void @run_test() local_unnamed_addr uwtable {
 ; CHECK-NEXT:    mov x14, x13
 ; CHECK-NEXT:    b.ne .LBB0_1
 ; CHECK-NEXT:  // %bb.2: // %for.cond.cleanup
-; CHECK-NEXT:    ldp q28, q18, [sp, #64] // 32-byte Folded Reload
+; CHECK-NEXT:    ldp q24, q18, [sp, #64] // 32-byte Folded Reload
 ; CHECK-NEXT:    adrp x8, C
 ; CHECK-NEXT:    add x8, x8, :lo12:C
 ; CHECK-NEXT:    ldp x20, x19, [sp, #176] // 16-byte Folded Reload
 ; CHECK-NEXT:    stp q10, q13, [x8, #64]
-; CHECK-NEXT:    stp q28, q18, [x8]
+; CHECK-NEXT:    stp q24, q18, [x8]
 ; CHECK-NEXT:    ldr q18, [sp, #96] // 16-byte Reload
-; CHECK-NEXT:    stp q29, q12, [x8, #96]
-; CHECK-NEXT:    ldp d13, d12, [sp, #128] // 16-byte Folded Reload
+; CHECK-NEXT:    stp q29, q28, [x8, #96]
 ; CHECK-NEXT:    stp q18, q15, [x8, #32]
 ; CHECK-NEXT:    ldp d15, d14, [sp, #112] // 16-byte Folded Reload
 ; CHECK-NEXT:    stp q11, q8, [x8, #144]
 ; CHECK-NEXT:    ldp d9, d8, [sp, #160] // 16-byte Folded Reload
-; CHECK-NEXT:    stp q30, q27, [x8, #176]
+; CHECK-NEXT:    stp q12, q22, [x8, #272]
 ; CHECK-NEXT:    ldp d11, d10, [sp, #144] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d13, d12, [sp, #128] // 16-byte Folded Reload
+; CHECK-NEXT:    stp q30, q27, [x8, #176]
 ; CHECK-NEXT:    str q26, [x8, #208]
 ; CHECK-NEXT:    stp q25, q23, [x8, #240]
-; CHECK-NEXT:    stp q24, q22, [x8, #272]
 ; CHECK-NEXT:    stp q21, q20, [x8, #304]
-; CHECK-NEXT:    stp q19, q7, [x8, #336]
+; CHECK-NEXT:    stp q19, q6, [x8, #336]
 ; CHECK-NEXT:    stp q17, q16, [x8, #368]
-; CHECK-NEXT:    stp q6, q5, [x8, #400]
+; CHECK-NEXT:    stp q1, q5, [x8, #400]
 ; CHECK-NEXT:    stp q4, q3, [x8, #432]
-; CHECK-NEXT:    stp q1, q2, [x8, #464]
+; CHECK-NEXT:    stp q7, q2, [x8, #464]
 ; CHECK-NEXT:    str q0, [x8, #496]
 ; CHECK-NEXT:    add sp, sp, #192
 ; CHECK-NEXT:    .cfi_def_cfa_offset 0
diff --git a/llvm/test/CodeGen/AArch64/scalar-to-vector-bitcasts.ll b/llvm/test/CodeGen/AArch64/scalar-to-vector-bitcasts.ll
new file mode 100644
index 0000000000000..3e7e4e741622c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/scalar-to-vector-bitcasts.ll
@@ -0,0 +1,45 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple aarch64-unknown-unknown -mattr=+fullfp16,+fprcvt | FileCheck %s
+
+; These tests ensure bitcasts are correctly emitted for scalar_to_vector
+; by checking if NEON variant of conversion instruction was selected
+
+define <2 x i32> @fcvtzs_v2i32_scalar_to_vector(float %a) {
+; CHECK-LABEL: fcvtzs_v2i32_scalar_to_vector:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, s0
+; CHECK-NEXT:    ret
+  %c = fptosi float %a to i32
+  %v = insertelement <2 x i32> poison, i32 %c, i32 0
+  ret <2 x i32> %v
+}
+
+define <4 x i32> @fcvtzs_v4i32_scalar_to_vector(double %a) {
+; CHECK-LABEL: fcvtzs_v4i32_scalar_to_vector:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, d0
+; CHECK-NEXT:    ret
+  %c = fptosi double %a to i32
+  %v = insertelement <4 x i32> poison, i32 %c, i32 0
+  ret <4 x i32> %v
+}
+
+define <1 x i64> @fcvtzs_v1i64_scalar_to_vector(half %a) {
+; CHECK-LABEL: fcvtzs_v1i64_scalar_to_vector:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, h0
+; CHECK-NEXT:    ret
+  %c = fptosi half %a to i64
+  %v = insertelement <1 x i64> poison, i64 %c, i32 0
+  ret <1 x i64> %v
+}
+
+define <2 x i64> @fcvtzs_v2i64_scalar_to_vector(float %a) {
+; CHECK-LABEL: fcvtzs_v2i64_scalar_to_vector:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, s0
+; CHECK-NEXT:    ret
+  %c = fptosi float %a to i64
+  %v = insertelement <2 x i64> poison, i64 %c, i32 0
+  ret <2 x i64> %v
+}
diff --git a/llvm/test/CodeGen/AArch64/sext.ll b/llvm/test/CodeGen/AArch64/sext.ll
index ef6b65cd50a1e..e6af0256fe6e2 100644
--- a/llvm/test/CodeGen/AArch64/sext.ll
+++ b/llvm/test/CodeGen/AArch64/sext.ll
@@ -1144,47 +1144,47 @@ entry:
 define <16 x i64> @sext_v16i10_v16i64(<16 x i10> %a) {
 ; CHECK-SD-LABEL: sext_v16i10_v16i64:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    fmov s0, w2
-; CHECK-SD-NEXT:    fmov s1, w0
-; CHECK-SD-NEXT:    ldr s2, [sp]
-; CHECK-SD-NEXT:    fmov s3, w4
-; CHECK-SD-NEXT:    fmov s4, w6
+; CHECK-SD-NEXT:    fmov s0, w6
+; CHECK-SD-NEXT:    fmov s2, w4
+; CHECK-SD-NEXT:    ldr s1, [sp]
+; CHECK-SD-NEXT:    fmov s3, w2
+; CHECK-SD-NEXT:    fmov s4, w0
 ; CHECK-SD-NEXT:    add x8, sp, #8
 ; CHECK-SD-NEXT:    ldr s5, [sp, #16]
 ; CHECK-SD-NEXT:    ldr s6, [sp, #32]
 ; CHECK-SD-NEXT:    ldr s7, [sp, #48]
-; CHECK-SD-NEXT:    mov v1.s[1], w1
-; CHECK-SD-NEXT:    mov v0.s[1], w3
-; CHECK-SD-NEXT:    ld1 { v2.s }[1], [x8]
-; CHECK-SD-NEXT:    mov v3.s[1], w5
-; CHECK-SD-NEXT:    mov v4.s[1], w7
+; CHECK-SD-NEXT:    ld1 { v1.s }[1], [x8]
+; CHECK-SD-NEXT:    mov v2.s[1], w5
+; CHECK-SD-NEXT:    mov v0.s[1], w7
+; CHECK-SD-NEXT:    mov v4.s[1], w1
+; CHECK-SD-NEXT:    mov v3.s[1], w3
 ; CHECK-SD-NEXT:    add x8, sp, #24
 ; CHECK-SD-NEXT:    add x9, sp, #40
 ; CHECK-SD-NEXT:    add x10, sp, #56
 ; CHECK-SD-NEXT:    ld1 { v5.s }[1], [x8]
 ; CHECK-SD-NEXT:    ld1 { v6.s }[1], [x9]
 ; CHECK-SD-NEXT:    ld1 { v7.s }[1], [x10]
-; CHECK-SD-NEXT:    ushll v2.2d, v2.2s, #0
 ; CHECK-SD-NEXT:    ushll v1.2d, v1.2s, #0
+; CHECK-SD-NEXT:    ushll v2.2d, v2.2s, #0
 ; CHECK-SD-NEXT:    ushll v0.2d, v0.2s, #0
-; CHECK-SD-NEXT:    ushll v3.2d, v3.2s, #0
 ; CHECK-SD-NEXT:    ushll v4.2d, v4.2s, #0
+; CHECK-SD-NEXT:    ushll v3.2d, v3.2s, #0
 ; CHECK-SD-NEXT:    ushll v5.2d, v5.2s, #0
 ; CHECK-SD-NEXT:    ushll v6.2d, v6.2s, #0
 ; CHECK-SD-NEXT:    ushll v7.2d, v7.2s, #0
-; CHECK-SD-NEXT:    shl v17.2d, v2.2d, #54
-; CHECK-SD-NEXT:    shl v1.2d, v1.2d, #54
-; CHECK-SD-NEXT:    shl v16.2d, v0.2d, #54
-; CHECK-SD-NEXT:    shl v3.2d, v3.2d, #54
+; CHECK-SD-NEXT:    shl v16.2d, v1.2d, #54
+; CHECK-SD-NEXT:    shl v2.2d, v2.2d, #54
+; CHECK-SD-NEXT:    shl v17.2d, v0.2d, #54
 ; CHECK-SD-NEXT:    shl v4.2d, v4.2d, #54
+; CHECK-SD-NEXT:    shl v3.2d, v3.2d, #54
 ; CHECK-SD-NEXT:    shl v5.2d, v5.2d, #54
 ; CHECK-SD-NEXT:    shl v6.2d, v6.2d, #54
 ; CHECK-SD-NEXT:    shl v7.2d, v7.2d, #54
-; CHECK-SD-NEXT:    sshr v0.2d, v1.2d, #54
-; CHECK-SD-NEXT:    sshr v1.2d, v16.2d, #54
-; CHECK-SD-NEXT:    sshr v2.2d, v3.2d, #54
-; CHECK-SD-NEXT:    sshr v3.2d, v4.2d, #54
-; CHECK-SD-NEXT:    sshr v4.2d, v17.2d, #54
+; CHECK-SD-NEXT:    sshr v2.2d, v2.2d, #54
+; CHECK-SD-NEXT:    sshr v0.2d, v4.2d, #54
+; CHECK-SD-NEXT:    sshr v1.2d, v3.2d, #54
+; CHECK-SD-NEXT:    sshr v4.2d, v16.2d, #54
+; CHECK-SD-NEXT:    sshr v3.2d, v17.2d, #54
 ; CHECK-SD-NEXT:    sshr v5.2d, v5.2d, #54
 ; CHECK-SD-NEXT:    sshr v6.2d, v6.2d, #54
 ; CHECK-SD-NEXT:    sshr v7.2d, v7.2d, #54
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll b/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll
index a8b2c30bec562..b19767b0de550 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll
@@ -57,28 +57,28 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) nounwind {
 ; CHECK-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
 ; CHECK-NEXT:    frintx v0.4h, v0.4h
 ; CHECK-NEXT:    frintx v1.4h, v1.4h
-; CHECK-NEXT:    mov h4, v0.h[2]
-; CHECK-NEXT:    mov h2, v0.h[1]
-; CHECK-NEXT:    mov h7, v0.h[3]
+; CHECK-NEXT:    mov h3, v0.h[2]
+; CHECK-NEXT:    mov h4, v0.h[1]
+; CHECK-NEXT:    mov h5, v0.h[3]
 ; CHECK-NEXT:    fcvtzs x8, h0
-; CHECK-NEXT:    mov h3, v1.h[2]
-; CHECK-NEXT:    mov h5, v1.h[3]
-; CHECK-NEXT:    mov h6, v1.h[1]
-; CHECK-NEXT:    fcvtzs x11, h1
+; CHECK-NEXT:    mov h2, v1.h[2]
+; CHECK-NEXT:    mov h6, v1.h[3]
+; CHECK-NEXT:    mov h7, v1.h[1]
+; CHECK-NEXT:    fcvtzs x10, h1
+; CHECK-NEXT:    fcvtzs x11, h3
 ; CHECK-NEXT:    fcvtzs x12, h4
-; CHECK-NEXT:    fcvtzs x9, h2
-; CHECK-NEXT:    fcvtzs x15, h7
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    fcvtzs x10, h3
 ; CHECK-NEXT:    fcvtzs x13, h5
+; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    fcvtzs x9, h2
 ; CHECK-NEXT:    fcvtzs x14, h6
-; CHECK-NEXT:    fmov d1, x12
-; CHECK-NEXT:    fmov d2, x11
-; CHECK-NEXT:    mov v0.d[1], x9
-; CHECK-NEXT:    fmov d3, x10
-; CHECK-NEXT:    mov v1.d[1], x15
-; CHECK-NEXT:    mov v2.d[1], x14
-; CHECK-NEXT:    mov v3.d[1], x13
+; CHECK-NEXT:    fcvtzs x15, h7
+; CHECK-NEXT:    fmov d2, x10
+; CHECK-NEXT:    fmov d1, x11
+; CHECK-NEXT:    mov v0.d[1], x12
+; CHECK-NEXT:    fmov d3, x9
+; CHECK-NEXT:    mov v1.d[1], x13
+; CHECK-NEXT:    mov v2.d[1], x15
+; CHECK-NEXT:    mov v3.d[1], x14
 ; CHECK-NEXT:    ret
   %a = call <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half> %x)
   ret <8 x i64> %a
@@ -89,55 +89,55 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) nounwind {
 ; CHECK-LABEL: llrint_v16i64_v16f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext v2.16b, v1.16b, v1.16b, #8
+; CHECK-NEXT:    ext v3.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    frintx v0.4h, v0.4h
 ; CHECK-NEXT:    frintx v1.4h, v1.4h
-; CHECK-NEXT:    frintx v3.4h, v0.4h
-; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
 ; CHECK-NEXT:    frintx v2.4h, v2.4h
+; CHECK-NEXT:    frintx v3.4h, v3.4h
+; CHECK-NEXT:    mov h5, v0.h[2]
 ; CHECK-NEXT:    mov h4, v1.h[2]
+; CHECK-NEXT:    mov h6, v0.h[1]
+; CHECK-NEXT:    fcvtzs x8, h1
+; CHECK-NEXT:    mov h16, v0.h[3]
+; CHECK-NEXT:    fcvtzs x9, h0
+; CHECK-NEXT:    mov h7, v1.h[1]
+; CHECK-NEXT:    mov h1, v1.h[3]
+; CHECK-NEXT:    mov h0, v2.h[3]
+; CHECK-NEXT:    mov h17, v2.h[2]
+; CHECK-NEXT:    fcvtzs x12, h5
 ; CHECK-NEXT:    mov h5, v3.h[2]
-; CHECK-NEXT:    frintx v0.4h, v0.4h
-; CHECK-NEXT:    mov h6, v3.h[1]
-; CHECK-NEXT:    fcvtzs x9, h3
-; CHECK-NEXT:    mov h16, v1.h[1]
-; CHECK-NEXT:    fcvtzs x12, h1
-; CHECK-NEXT:    mov h3, v3.h[3]
-; CHECK-NEXT:    mov h17, v1.h[3]
-; CHECK-NEXT:    mov h7, v2.h[3]
-; CHECK-NEXT:    fcvtzs x8, h4
-; CHECK-NEXT:    fcvtzs x10, h5
-; CHECK-NEXT:    mov h4, v2.h[2]
-; CHECK-NEXT:    mov h5, v0.h[2]
-; CHECK-NEXT:    fcvtzs x11, h6
-; CHECK-NEXT:    mov h6, v0.h[3]
-; CHECK-NEXT:    fcvtzs x15, h2
-; CHECK-NEXT:    mov h2, v2.h[1]
-; CHECK-NEXT:    fcvtzs x14, h0
-; CHECK-NEXT:    fcvtzs x17, h3
-; CHECK-NEXT:    fcvtzs x0, h17
-; CHECK-NEXT:    fcvtzs x13, h7
-; CHECK-NEXT:    mov h7, v0.h[1]
+; CHECK-NEXT:    fcvtzs x11, h2
+; CHECK-NEXT:    mov h18, v3.h[3]
+; CHECK-NEXT:    fcvtzs x14, h3
+; CHECK-NEXT:    mov h3, v3.h[1]
+; CHECK-NEXT:    mov h19, v2.h[1]
+; CHECK-NEXT:    fcvtzs x10, h4
+; CHECK-NEXT:    fmov d4, x8
+; CHECK-NEXT:    fcvtzs x13, h6
+; CHECK-NEXT:    fcvtzs x15, h0
+; CHECK-NEXT:    fcvtzs x8, h17
 ; CHECK-NEXT:    fmov d0, x9
-; CHECK-NEXT:    fcvtzs x16, h4
 ; CHECK-NEXT:    fcvtzs x9, h5
-; CHECK-NEXT:    fmov d4, x12
-; CHECK-NEXT:    fcvtzs x12, h16
-; CHECK-NEXT:    fmov d1, x10
-; CHECK-NEXT:    fcvtzs x10, h6
-; CHECK-NEXT:    fmov d5, x8
-; CHECK-NEXT:    fcvtzs x8, h2
+; CHECK-NEXT:    fcvtzs x16, h7
+; CHECK-NEXT:    fcvtzs x17, h16
+; CHECK-NEXT:    fmov d6, x11
+; CHECK-NEXT:    fcvtzs x11, h18
+; CHECK-NEXT:    fcvtzs x18, h3
 ; CHECK-NEXT:    fmov d2, x14
-; CHECK-NEXT:    fcvtzs x18, h7
-; CHECK-NEXT:    fmov d6, x15
-; CHECK-NEXT:    mov v0.d[1], x11
+; CHECK-NEXT:    fcvtzs x14, h19
+; CHECK-NEXT:    fcvtzs x0, h1
+; CHECK-NEXT:    fmov d5, x10
+; CHECK-NEXT:    fmov d1, x12
+; CHECK-NEXT:    fmov d7, x8
 ; CHECK-NEXT:    fmov d3, x9
-; CHECK-NEXT:    fmov d7, x16
+; CHECK-NEXT:    mov v0.d[1], x13
+; CHECK-NEXT:    mov v4.d[1], x16
+; CHECK-NEXT:    mov v2.d[1], x18
 ; CHECK-NEXT:    mov v1.d[1], x17
-; CHECK-NEXT:    mov v4.d[1], x12
 ; CHECK-NEXT:    mov v5.d[1], x0
-; CHECK-NEXT:    mov v6.d[1], x8
-; CHECK-NEXT:    mov v2.d[1], x18
-; CHECK-NEXT:    mov v3.d[1], x10
-; CHECK-NEXT:    mov v7.d[1], x13
+; CHECK-NEXT:    mov v6.d[1], x14
+; CHECK-NEXT:    mov v3.d[1], x11
+; CHECK-NEXT:    mov v7.d[1], x15
 ; CHECK-NEXT:    ret
   %a = call <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half> %x)
   ret <16 x i64> %a
@@ -324,27 +324,27 @@ declare <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float>)
 define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) nounwind {
 ; CHECK-LABEL: llrint_v8i64_v8f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    frintx v0.4s, v0.4s
 ; CHECK-NEXT:    frintx v1.4s, v1.4s
-; CHECK-NEXT:    mov s3, v1.s[2]
-; CHECK-NEXT:    mov s4, v0.s[2]
-; CHECK-NEXT:    mov s2, v0.s[1]
+; CHECK-NEXT:    frintx v0.4s, v0.4s
+; CHECK-NEXT:    mov s2, v1.s[2]
+; CHECK-NEXT:    mov s3, v0.s[2]
+; CHECK-NEXT:    mov s4, v0.s[1]
 ; CHECK-NEXT:    mov s5, v1.s[3]
 ; CHECK-NEXT:    mov s6, v1.s[1]
 ; CHECK-NEXT:    mov s7, v0.s[3]
 ; CHECK-NEXT:    fcvtzs x8, s0
 ; CHECK-NEXT:    fcvtzs x10, s1
+; CHECK-NEXT:    fcvtzs x9, s2
 ; CHECK-NEXT:    fcvtzs x11, s3
 ; CHECK-NEXT:    fcvtzs x12, s4
-; CHECK-NEXT:    fcvtzs x9, s2
 ; CHECK-NEXT:    fcvtzs x13, s5
 ; CHECK-NEXT:    fcvtzs x14, s6
 ; CHECK-NEXT:    fcvtzs x15, s7
 ; CHECK-NEXT:    fmov d0, x8
 ; CHECK-NEXT:    fmov d2, x10
-; CHECK-NEXT:    fmov d1, x12
-; CHECK-NEXT:    fmov d3, x11
-; CHECK-NEXT:    mov v0.d[1], x9
+; CHECK-NEXT:    fmov d3, x9
+; CHECK-NEXT:    fmov d1, x11
+; CHECK-NEXT:    mov v0.d[1], x12
 ; CHECK-NEXT:    mov v2.d[1], x14
 ; CHECK-NEXT:    mov v1.d[1], x15
 ; CHECK-NEXT:    mov v3.d[1], x13
@@ -363,48 +363,48 @@ define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) nounwind {
 ; CHECK-NEXT:    frintx v0.4s, v0.4s
 ; CHECK-NEXT:    mov s4, v3.s[2]
 ; CHECK-NEXT:    mov s5, v2.s[2]
-; CHECK-NEXT:    mov s6, v1.s[2]
-; CHECK-NEXT:    mov s7, v0.s[2]
-; CHECK-NEXT:    fcvtzs x10, s1
-; CHECK-NEXT:    fcvtzs x11, s0
-; CHECK-NEXT:    mov s16, v0.s[1]
-; CHECK-NEXT:    mov s17, v1.s[1]
-; CHECK-NEXT:    mov s18, v3.s[1]
-; CHECK-NEXT:    fcvtzs x14, s3
-; CHECK-NEXT:    fcvtzs x16, s2
-; CHECK-NEXT:    fcvtzs x8, s4
-; CHECK-NEXT:    mov s4, v2.s[1]
-; CHECK-NEXT:    fcvtzs x9, s5
-; CHECK-NEXT:    mov s5, v1.s[3]
-; CHECK-NEXT:    fcvtzs x12, s6
-; CHECK-NEXT:    mov s6, v0.s[3]
-; CHECK-NEXT:    fcvtzs x13, s7
-; CHECK-NEXT:    mov s7, v3.s[3]
-; CHECK-NEXT:    fmov d0, x11
-; CHECK-NEXT:    fcvtzs x17, s16
-; CHECK-NEXT:    fcvtzs x18, s18
-; CHECK-NEXT:    fcvtzs x15, s4
-; CHECK-NEXT:    mov s4, v2.s[3]
-; CHECK-NEXT:    fmov d2, x10
+; CHECK-NEXT:    mov s6, v2.s[1]
+; CHECK-NEXT:    mov s7, v1.s[2]
+; CHECK-NEXT:    fcvtzs x8, s3
+; CHECK-NEXT:    mov s16, v0.s[2]
+; CHECK-NEXT:    fcvtzs x9, s2
+; CHECK-NEXT:    mov s17, v1.s[3]
+; CHECK-NEXT:    mov s18, v0.s[1]
+; CHECK-NEXT:    mov s19, v3.s[3]
+; CHECK-NEXT:    fcvtzs x14, s1
+; CHECK-NEXT:    mov s1, v1.s[1]
+; CHECK-NEXT:    fcvtzs x10, s4
 ; CHECK-NEXT:    fcvtzs x11, s5
-; CHECK-NEXT:    fcvtzs x10, s6
-; CHECK-NEXT:    fmov d3, x12
-; CHECK-NEXT:    fmov d1, x13
-; CHECK-NEXT:    fcvtzs x12, s17
+; CHECK-NEXT:    mov s5, v0.s[3]
+; CHECK-NEXT:    mov s3, v3.s[1]
+; CHECK-NEXT:    mov s2, v2.s[3]
+; CHECK-NEXT:    fcvtzs x12, s6
 ; CHECK-NEXT:    fcvtzs x13, s7
-; CHECK-NEXT:    fmov d5, x9
-; CHECK-NEXT:    fmov d6, x14
-; CHECK-NEXT:    fmov d7, x8
-; CHECK-NEXT:    fcvtzs x0, s4
-; CHECK-NEXT:    fmov d4, x16
+; CHECK-NEXT:    fcvtzs x15, s16
+; CHECK-NEXT:    fmov d6, x8
+; CHECK-NEXT:    fcvtzs x8, s0
+; CHECK-NEXT:    fmov d4, x9
+; CHECK-NEXT:    fcvtzs x9, s17
+; CHECK-NEXT:    fcvtzs x16, s5
+; CHECK-NEXT:    fcvtzs x17, s18
+; CHECK-NEXT:    fmov d7, x10
+; CHECK-NEXT:    fmov d5, x11
+; CHECK-NEXT:    fcvtzs x10, s1
+; CHECK-NEXT:    fcvtzs x11, s19
+; CHECK-NEXT:    fcvtzs x18, s3
+; CHECK-NEXT:    fcvtzs x0, s2
+; CHECK-NEXT:    fmov d3, x13
+; CHECK-NEXT:    fmov d1, x15
+; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    fmov d2, x14
+; CHECK-NEXT:    mov v4.d[1], x12
+; CHECK-NEXT:    mov v3.d[1], x9
+; CHECK-NEXT:    mov v7.d[1], x11
 ; CHECK-NEXT:    mov v0.d[1], x17
-; CHECK-NEXT:    mov v1.d[1], x10
-; CHECK-NEXT:    mov v3.d[1], x11
-; CHECK-NEXT:    mov v2.d[1], x12
-; CHECK-NEXT:    mov v6.d[1], x18
-; CHECK-NEXT:    mov v7.d[1], x13
-; CHECK-NEXT:    mov v4.d[1], x15
+; CHECK-NEXT:    mov v1.d[1], x16
+; CHECK-NEXT:    mov v2.d[1], x10
 ; CHECK-NEXT:    mov v5.d[1], x0
+; CHECK-NEXT:    mov v6.d[1], x18
 ; CHECK-NEXT:    ret
   %a = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> %x)
   ret <16 x i64> %a
@@ -542,8 +542,7 @@ define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) nounwind {
 ; CHECK-LABEL: llrint_v1i64_v1f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    frintx d0, d0
-; CHECK-NEXT:    fcvtzs x8, d0
-; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    fcvtzs d0, d0
 ; CHECK-NEXT:    ret
   %a = call <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double> %x)
   ret <1 x i64> %a
@@ -570,17 +569,15 @@ define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) nounwind {
 ; CHECK-NEXT:    splice z0.d, p0, z0.d, z1.d
 ; CHECK-NEXT:    ptrue p0.d, vl4
 ; CHECK-NEXT:    frintx z0.d, p0/m, z0.d
-; CHECK-NEXT:    mov z1.d, z0.d[2]
-; CHECK-NEXT:    mov z2.d, z0.d[3]
+; CHECK-NEXT:    mov z1.d, z0.d[3]
+; CHECK-NEXT:    mov z2.d, z0.d[2]
 ; CHECK-NEXT:    mov z3.d, z0.d[1]
-; CHECK-NEXT:    fcvtzs x9, d0
+; CHECK-NEXT:    fcvtzs d0, d0
 ; CHECK-NEXT:    fcvtzs x8, d1
-; CHECK-NEXT:    fcvtzs x10, d2
-; CHECK-NEXT:    fcvtzs x11, d3
-; CHECK-NEXT:    fmov d0, x9
-; CHECK-NEXT:    fmov d1, x8
-; CHECK-NEXT:    mov v0.d[1], x11
-; CHECK-NEXT:    mov v1.d[1], x10
+; CHECK-NEXT:    fcvtzs d1, d2
+; CHECK-NEXT:    fcvtzs x9, d3
+; CHECK-NEXT:    mov v0.d[1], x9
+; CHECK-NEXT:    mov v1.d[1], x8
 ; CHECK-NEXT:    ret
   %a = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> %x)
   ret <4 x i64> %a
@@ -598,31 +595,27 @@ define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) nounwind {
 ; CHECK-NEXT:    splice z0.d, p0, z0.d, z1.d
 ; CHECK-NEXT:    splice z2.d, p0, z2.d, z3.d
 ; CHECK-NEXT:    ptrue p0.d, vl4
-; CHECK-NEXT:    frintx z0.d, p0/m, z0.d
 ; CHECK-NEXT:    movprfx z1, z2
 ; CHECK-NEXT:    frintx z1.d, p0/m, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d[2]
-; CHECK-NEXT:    mov z5.d, z0.d[2]
-; CHECK-NEXT:    mov z2.d, z0.d[1]
-; CHECK-NEXT:    mov z3.d, z1.d[3]
-; CHECK-NEXT:    mov z6.d, z0.d[3]
-; CHECK-NEXT:    fcvtzs x8, d0
-; CHECK-NEXT:    mov z0.d, z1.d[1]
-; CHECK-NEXT:    fcvtzs x10, d1
-; CHECK-NEXT:    fcvtzs x11, d4
-; CHECK-NEXT:    fcvtzs x12, d5
-; CHECK-NEXT:    fcvtzs x9, d2
-; CHECK-NEXT:    fcvtzs x13, d3
-; CHECK-NEXT:    fcvtzs x14, d6
-; CHECK-NEXT:    fcvtzs x15, d0
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    fmov d2, x10
-; CHECK-NEXT:    fmov d1, x12
-; CHECK-NEXT:    fmov d3, x11
-; CHECK-NEXT:    mov v0.d[1], x9
-; CHECK-NEXT:    mov v2.d[1], x15
-; CHECK-NEXT:    mov v1.d[1], x14
-; CHECK-NEXT:    mov v3.d[1], x13
+; CHECK-NEXT:    frintx z0.d, p0/m, z0.d
+; CHECK-NEXT:    mov z2.d, z1.d[3]
+; CHECK-NEXT:    mov z3.d, z0.d[3]
+; CHECK-NEXT:    mov z4.d, z0.d[1]
+; CHECK-NEXT:    mov z5.d, z1.d[2]
+; CHECK-NEXT:    mov z6.d, z0.d[2]
+; CHECK-NEXT:    mov z7.d, z1.d[1]
+; CHECK-NEXT:    fcvtzs d0, d0
+; CHECK-NEXT:    fcvtzs x8, d2
+; CHECK-NEXT:    fcvtzs x9, d3
+; CHECK-NEXT:    fcvtzs x10, d4
+; CHECK-NEXT:    fcvtzs d2, d1
+; CHECK-NEXT:    fcvtzs d3, d5
+; CHECK-NEXT:    fcvtzs d1, d6
+; CHECK-NEXT:    fcvtzs x11, d7
+; CHECK-NEXT:    mov v0.d[1], x10
+; CHECK-NEXT:    mov v1.d[1], x9
+; CHECK-NEXT:    mov v3.d[1], x8
+; CHECK-NEXT:    mov v2.d[1], x11
 ; CHECK-NEXT:    ret
   %a = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> %x)
   ret <8 x i64> %a
@@ -632,70 +625,60 @@ declare <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double>)
 define <16 x i64> @llrint_v16f64(<16 x double> %x) nounwind {
 ; CHECK-LABEL: llrint_v16f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.d, vl2
+; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    // kill: def $q6 killed $q6 def $z6
 ; CHECK-NEXT:    // kill: def $q4 killed $q4 def $z4
+; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
 ; CHECK-NEXT:    // kill: def $q7 killed $q7 def $z7
 ; CHECK-NEXT:    // kill: def $q5 killed $q5 def $z5
-; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    // kill: def $q3 killed $q3 def $z3
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    splice z6.d, p0, z6.d, z7.d
+; CHECK-NEXT:    splice z2.d, p0, z2.d, z3.d
+; CHECK-NEXT:    splice z4.d, p0, z4.d, z5.d
+; CHECK-NEXT:    splice z0.d, p0, z0.d, z1.d
 ; CHECK-NEXT:    ptrue p0.d, vl4
-; CHECK-NEXT:    splice z6.d, p1, z6.d, z7.d
-; CHECK-NEXT:    splice z4.d, p1, z4.d, z5.d
-; CHECK-NEXT:    splice z2.d, p1, z2.d, z3.d
-; CHECK-NEXT:    splice z0.d, p1, z0.d, z1.d
-; CHECK-NEXT:    movprfx z3, z6
-; CHECK-NEXT:    frintx z3.d, p0/m, z6.d
-; CHECK-NEXT:    movprfx z1, z4
-; CHECK-NEXT:    frintx z1.d, p0/m, z4.d
+; CHECK-NEXT:    frintx z6.d, p0/m, z6.d
+; CHECK-NEXT:    frintx z4.d, p0/m, z4.d
 ; CHECK-NEXT:    frintx z2.d, p0/m, z2.d
 ; CHECK-NEXT:    frintx z0.d, p0/m, z0.d
-; CHECK-NEXT:    mov z4.d, z3.d[2]
-; CHECK-NEXT:    mov z5.d, z1.d[2]
-; CHECK-NEXT:    mov z6.d, z2.d[3]
-; CHECK-NEXT:    fcvtzs x11, d0
-; CHECK-NEXT:    fcvtzs x12, d1
-; CHECK-NEXT:    fcvtzs x13, d2
-; CHECK-NEXT:    fcvtzs x14, d3
-; CHECK-NEXT:    mov z7.d, z3.d[3]
-; CHECK-NEXT:    mov z16.d, z1.d[3]
-; CHECK-NEXT:    fcvtzs x9, d4
+; CHECK-NEXT:    mov z1.d, z6.d[3]
+; CHECK-NEXT:    mov z3.d, z4.d[3]
+; CHECK-NEXT:    mov z5.d, z2.d[3]
+; CHECK-NEXT:    mov z16.d, z4.d[1]
+; CHECK-NEXT:    mov z7.d, z0.d[3]
+; CHECK-NEXT:    mov z17.d, z0.d[2]
+; CHECK-NEXT:    mov z18.d, z4.d[2]
+; CHECK-NEXT:    mov z19.d, z6.d[1]
+; CHECK-NEXT:    fcvtzs d4, d4
+; CHECK-NEXT:    fcvtzs x8, d1
+; CHECK-NEXT:    mov z1.d, z2.d[1]
+; CHECK-NEXT:    fcvtzs x9, d3
+; CHECK-NEXT:    mov z3.d, z0.d[1]
 ; CHECK-NEXT:    fcvtzs x10, d5
-; CHECK-NEXT:    mov z4.d, z2.d[2]
-; CHECK-NEXT:    mov z5.d, z0.d[2]
-; CHECK-NEXT:    fcvtzs x8, d6
-; CHECK-NEXT:    mov z2.d, z2.d[1]
-; CHECK-NEXT:    mov z6.d, z0.d[3]
-; CHECK-NEXT:    mov z1.d, z1.d[1]
-; CHECK-NEXT:    mov z3.d, z3.d[1]
-; CHECK-NEXT:    fcvtzs x15, d4
-; CHECK-NEXT:    mov z4.d, z0.d[1]
-; CHECK-NEXT:    fmov d0, x11
-; CHECK-NEXT:    fcvtzs x16, d5
-; CHECK-NEXT:    fcvtzs x11, d2
-; CHECK-NEXT:    fmov d2, x13
-; CHECK-NEXT:    fcvtzs x17, d7
-; CHECK-NEXT:    fcvtzs x18, d16
-; CHECK-NEXT:    fcvtzs x0, d3
-; CHECK-NEXT:    fcvtzs x13, d4
-; CHECK-NEXT:    fmov d4, x12
-; CHECK-NEXT:    fcvtzs x12, d6
-; CHECK-NEXT:    fmov d6, x14
-; CHECK-NEXT:    fcvtzs x14, d1
-; CHECK-NEXT:    fmov d3, x15
-; CHECK-NEXT:    fmov d1, x16
-; CHECK-NEXT:    fmov d5, x10
-; CHECK-NEXT:    fmov d7, x9
-; CHECK-NEXT:    mov v2.d[1], x11
-; CHECK-NEXT:    mov v0.d[1], x13
-; CHECK-NEXT:    mov v3.d[1], x8
-; CHECK-NEXT:    mov v6.d[1], x0
-; CHECK-NEXT:    mov v4.d[1], x14
-; CHECK-NEXT:    mov v1.d[1], x12
-; CHECK-NEXT:    mov v5.d[1], x18
-; CHECK-NEXT:    mov v7.d[1], x17
+; CHECK-NEXT:    mov z5.d, z6.d[2]
+; CHECK-NEXT:    fcvtzs x12, d16
+; CHECK-NEXT:    mov z16.d, z2.d[2]
+; CHECK-NEXT:    fcvtzs x11, d7
+; CHECK-NEXT:    fcvtzs x13, d1
+; CHECK-NEXT:    fcvtzs d1, d17
+; CHECK-NEXT:    fcvtzs d0, d0
+; CHECK-NEXT:    fcvtzs x14, d3
+; CHECK-NEXT:    fcvtzs d7, d5
+; CHECK-NEXT:    fcvtzs d2, d2
+; CHECK-NEXT:    fcvtzs d3, d16
+; CHECK-NEXT:    fcvtzs d5, d18
+; CHECK-NEXT:    fcvtzs x15, d19
+; CHECK-NEXT:    fcvtzs d6, d6
+; CHECK-NEXT:    mov v4.d[1], x12
+; CHECK-NEXT:    mov v1.d[1], x11
+; CHECK-NEXT:    mov v0.d[1], x14
+; CHECK-NEXT:    mov v2.d[1], x13
+; CHECK-NEXT:    mov v7.d[1], x8
+; CHECK-NEXT:    mov v3.d[1], x10
+; CHECK-NEXT:    mov v5.d[1], x9
+; CHECK-NEXT:    mov v6.d[1], x15
 ; CHECK-NEXT:    ret
   %a = call <16 x i64> @llvm.llrint.v16i64.v16f64(<16 x double> %x)
   ret <16 x i64> %a
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll b/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll
index 465ba38b17874..edf1027633906 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll
@@ -97,17 +97,17 @@ define <8 x iXLen> @lrint_v8f16(<8 x half> %x) nounwind {
 ; CHECK-i32-NEXT:    mov h3, v2.h[7]
 ; CHECK-i32-NEXT:    fcvtzs w12, h4
 ; CHECK-i32-NEXT:    mov h2, v2.h[3]
-; CHECK-i32-NEXT:    fcvtzs w13, h0
-; CHECK-i32-NEXT:    fmov s0, w9
 ; CHECK-i32-NEXT:    fmov s1, w8
-; CHECK-i32-NEXT:    fcvtzs w8, h3
-; CHECK-i32-NEXT:    fcvtzs w9, h2
+; CHECK-i32-NEXT:    fcvtzs w8, h0
+; CHECK-i32-NEXT:    fmov s0, w9
+; CHECK-i32-NEXT:    fcvtzs w9, h3
 ; CHECK-i32-NEXT:    mov v0.s[1], w11
 ; CHECK-i32-NEXT:    mov v1.s[1], w10
-; CHECK-i32-NEXT:    mov v0.s[2], w13
+; CHECK-i32-NEXT:    fcvtzs w10, h2
+; CHECK-i32-NEXT:    mov v0.s[2], w8
 ; CHECK-i32-NEXT:    mov v1.s[2], w12
-; CHECK-i32-NEXT:    mov v0.s[3], w9
-; CHECK-i32-NEXT:    mov v1.s[3], w8
+; CHECK-i32-NEXT:    mov v0.s[3], w10
+; CHECK-i32-NEXT:    mov v1.s[3], w9
 ; CHECK-i32-NEXT:    ret
 ;
 ; CHECK-i64-LABEL: lrint_v8f16:
@@ -115,28 +115,28 @@ define <8 x iXLen> @lrint_v8f16(<8 x half> %x) nounwind {
 ; CHECK-i64-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
 ; CHECK-i64-NEXT:    frintx v0.4h, v0.4h
 ; CHECK-i64-NEXT:    frintx v1.4h, v1.4h
-; CHECK-i64-NEXT:    mov h4, v0.h[2]
-; CHECK-i64-NEXT:    mov h2, v0.h[1]
-; CHECK-i64-NEXT:    mov h7, v0.h[3]
+; CHECK-i64-NEXT:    mov h3, v0.h[2]
+; CHECK-i64-NEXT:    mov h4, v0.h[1]
+; CHECK-i64-NEXT:    mov h5, v0.h[3]
 ; CHECK-i64-NEXT:    fcvtzs x8, h0
-; CHECK-i64-NEXT:    mov h3, v1.h[2]
-; CHECK-i64-NEXT:    mov h5, v1.h[3]
-; CHECK-i64-NEXT:    mov h6, v1.h[1]
-; CHECK-i64-NEXT:    fcvtzs x11, h1
+; CHECK-i64-NEXT:    mov h2, v1.h[2]
+; CHECK-i64-NEXT:    mov h6, v1.h[3]
+; CHECK-i64-NEXT:    mov h7, v1.h[1]
+; CHECK-i64-NEXT:    fcvtzs x10, h1
+; CHECK-i64-NEXT:    fcvtzs x11, h3
 ; CHECK-i64-NEXT:    fcvtzs x12, h4
-; CHECK-i64-NEXT:    fcvtzs x9, h2
-; CHECK-i64-NEXT:    fcvtzs x15, h7
-; CHECK-i64-NEXT:    fmov d0, x8
-; CHECK-i64-NEXT:    fcvtzs x10, h3
 ; CHECK-i64-NEXT:    fcvtzs x13, h5
+; CHECK-i64-NEXT:    fmov d0, x8
+; CHECK-i64-NEXT:    fcvtzs x9, h2
 ; CHECK-i64-NEXT:    fcvtzs x14, h6
-; CHECK-i64-NEXT:    fmov d1, x12
-; CHECK-i64-NEXT:    fmov d2, x11
-; CHECK-i64-NEXT:    mov v0.d[1], x9
-; CHECK-i64-NEXT:    fmov d3, x10
-; CHECK-i64-NEXT:    mov v1.d[1], x15
-; CHECK-i64-NEXT:    mov v2.d[1], x14
-; CHECK-i64-NEXT:    mov v3.d[1], x13
+; CHECK-i64-NEXT:    fcvtzs x15, h7
+; CHECK-i64-NEXT:    fmov d2, x10
+; CHECK-i64-NEXT:    fmov d1, x11
+; CHECK-i64-NEXT:    mov v0.d[1], x12
+; CHECK-i64-NEXT:    fmov d3, x9
+; CHECK-i64-NEXT:    mov v1.d[1], x13
+; CHECK-i64-NEXT:    mov v2.d[1], x15
+; CHECK-i64-NEXT:    mov v3.d[1], x14
 ; CHECK-i64-NEXT:    ret
   %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f16(<8 x half> %x)
   ret <8 x iXLen> %a
@@ -147,107 +147,107 @@ define <16 x iXLen> @lrint_v16f16(<16 x half> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v16f16:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    frintx v1.8h, v1.8h
-; CHECK-i32-NEXT:    frintx v0.8h, v0.8h
-; CHECK-i32-NEXT:    mov h3, v1.h[4]
+; CHECK-i32-NEXT:    frintx v4.8h, v0.8h
+; CHECK-i32-NEXT:    mov h0, v1.h[6]
 ; CHECK-i32-NEXT:    mov h2, v1.h[5]
-; CHECK-i32-NEXT:    mov h5, v0.h[4]
-; CHECK-i32-NEXT:    mov h4, v1.h[1]
-; CHECK-i32-NEXT:    mov h6, v0.h[1]
-; CHECK-i32-NEXT:    fcvtzs w11, h0
-; CHECK-i32-NEXT:    fcvtzs w14, h1
-; CHECK-i32-NEXT:    mov h7, v1.h[6]
+; CHECK-i32-NEXT:    mov h3, v1.h[4]
+; CHECK-i32-NEXT:    mov h5, v4.h[4]
+; CHECK-i32-NEXT:    mov h7, v4.h[1]
+; CHECK-i32-NEXT:    fcvtzs w10, h1
+; CHECK-i32-NEXT:    fcvtzs w13, h4
+; CHECK-i32-NEXT:    mov h6, v1.h[2]
 ; CHECK-i32-NEXT:    mov h16, v1.h[3]
-; CHECK-i32-NEXT:    mov h17, v0.h[7]
-; CHECK-i32-NEXT:    mov h18, v0.h[3]
-; CHECK-i32-NEXT:    fcvtzs w9, h3
-; CHECK-i32-NEXT:    mov h3, v0.h[5]
-; CHECK-i32-NEXT:    fcvtzs w8, h2
-; CHECK-i32-NEXT:    mov h2, v1.h[2]
+; CHECK-i32-NEXT:    mov h17, v4.h[7]
+; CHECK-i32-NEXT:    fcvtzs w8, h0
+; CHECK-i32-NEXT:    mov h0, v1.h[1]
+; CHECK-i32-NEXT:    fcvtzs w9, h2
+; CHECK-i32-NEXT:    mov h2, v4.h[5]
+; CHECK-i32-NEXT:    fcvtzs w11, h3
+; CHECK-i32-NEXT:    mov h3, v4.h[6]
 ; CHECK-i32-NEXT:    fcvtzs w12, h5
-; CHECK-i32-NEXT:    fcvtzs w10, h4
-; CHECK-i32-NEXT:    mov h4, v0.h[6]
-; CHECK-i32-NEXT:    mov h5, v0.h[2]
-; CHECK-i32-NEXT:    fcvtzs w13, h6
-; CHECK-i32-NEXT:    mov h6, v1.h[7]
-; CHECK-i32-NEXT:    fmov s0, w11
-; CHECK-i32-NEXT:    fcvtzs w16, h7
-; CHECK-i32-NEXT:    fcvtzs w15, h3
-; CHECK-i32-NEXT:    fmov s3, w9
-; CHECK-i32-NEXT:    fcvtzs w9, h16
-; CHECK-i32-NEXT:    fcvtzs w17, h2
+; CHECK-i32-NEXT:    mov h5, v4.h[2]
+; CHECK-i32-NEXT:    fcvtzs w14, h7
+; CHECK-i32-NEXT:    mov h7, v1.h[7]
+; CHECK-i32-NEXT:    fcvtzs w17, h6
+; CHECK-i32-NEXT:    mov h4, v4.h[3]
+; CHECK-i32-NEXT:    fcvtzs w15, h0
+; CHECK-i32-NEXT:    fmov s0, w13
+; CHECK-i32-NEXT:    fcvtzs w16, h2
+; CHECK-i32-NEXT:    fmov s2, w10
+; CHECK-i32-NEXT:    fcvtzs w10, h3
+; CHECK-i32-NEXT:    fmov s3, w11
 ; CHECK-i32-NEXT:    fmov s1, w12
-; CHECK-i32-NEXT:    fmov s2, w14
-; CHECK-i32-NEXT:    fcvtzs w11, h4
 ; CHECK-i32-NEXT:    fcvtzs w18, h5
-; CHECK-i32-NEXT:    mov v0.s[1], w13
-; CHECK-i32-NEXT:    mov v3.s[1], w8
-; CHECK-i32-NEXT:    fcvtzs w8, h6
-; CHECK-i32-NEXT:    fcvtzs w12, h18
-; CHECK-i32-NEXT:    mov v1.s[1], w15
-; CHECK-i32-NEXT:    mov v2.s[1], w10
-; CHECK-i32-NEXT:    fcvtzs w10, h17
+; CHECK-i32-NEXT:    mov v0.s[1], w14
+; CHECK-i32-NEXT:    fcvtzs w11, h16
+; CHECK-i32-NEXT:    fcvtzs w12, h17
+; CHECK-i32-NEXT:    mov v2.s[1], w15
+; CHECK-i32-NEXT:    fcvtzs w13, h4
+; CHECK-i32-NEXT:    mov v1.s[1], w16
+; CHECK-i32-NEXT:    mov v3.s[1], w9
+; CHECK-i32-NEXT:    fcvtzs w9, h7
 ; CHECK-i32-NEXT:    mov v0.s[2], w18
-; CHECK-i32-NEXT:    mov v3.s[2], w16
-; CHECK-i32-NEXT:    mov v1.s[2], w11
 ; CHECK-i32-NEXT:    mov v2.s[2], w17
-; CHECK-i32-NEXT:    mov v0.s[3], w12
-; CHECK-i32-NEXT:    mov v3.s[3], w8
-; CHECK-i32-NEXT:    mov v1.s[3], w10
-; CHECK-i32-NEXT:    mov v2.s[3], w9
+; CHECK-i32-NEXT:    mov v1.s[2], w10
+; CHECK-i32-NEXT:    mov v3.s[2], w8
+; CHECK-i32-NEXT:    mov v0.s[3], w13
+; CHECK-i32-NEXT:    mov v2.s[3], w11
+; CHECK-i32-NEXT:    mov v1.s[3], w12
+; CHECK-i32-NEXT:    mov v3.s[3], w9
 ; CHECK-i32-NEXT:    ret
 ;
 ; CHECK-i64-LABEL: lrint_v16f16:
 ; CHECK-i64:       // %bb.0:
 ; CHECK-i64-NEXT:    ext v2.16b, v1.16b, v1.16b, #8
+; CHECK-i64-NEXT:    ext v3.16b, v0.16b, v0.16b, #8
+; CHECK-i64-NEXT:    frintx v0.4h, v0.4h
 ; CHECK-i64-NEXT:    frintx v1.4h, v1.4h
-; CHECK-i64-NEXT:    frintx v3.4h, v0.4h
-; CHECK-i64-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
 ; CHECK-i64-NEXT:    frintx v2.4h, v2.4h
+; CHECK-i64-NEXT:    frintx v3.4h, v3.4h
+; CHECK-i64-NEXT:    mov h5, v0.h[2]
 ; CHECK-i64-NEXT:    mov h4, v1.h[2]
+; CHECK-i64-NEXT:    mov h6, v0.h[1]
+; CHECK-i64-NEXT:    fcvtzs x8, h1
+; CHECK-i64-NEXT:    mov h16, v0.h[3]
+; CHECK-i64-NEXT:    fcvtzs x9, h0
+; CHECK-i64-NEXT:    mov h7, v1.h[1]
+; CHECK-i64-NEXT:    mov h1, v1.h[3]
+; CHECK-i64-NEXT:    mov h0, v2.h[3]
+; CHECK-i64-NEXT:    mov h17, v2.h[2]
+; CHECK-i64-NEXT:    fcvtzs x12, h5
 ; CHECK-i64-NEXT:    mov h5, v3.h[2]
-; CHECK-i64-NEXT:    frintx v0.4h, v0.4h
-; CHECK-i64-NEXT:    mov h6, v3.h[1]
-; CHECK-i64-NEXT:    fcvtzs x9, h3
-; CHECK-i64-NEXT:    mov h16, v1.h[1]
-; CHECK-i64-NEXT:    fcvtzs x12, h1
-; CHECK-i64-NEXT:    mov h3, v3.h[3]
-; CHECK-i64-NEXT:    mov h17, v1.h[3]
-; CHECK-i64-NEXT:    mov h7, v2.h[3]
-; CHECK-i64-NEXT:    fcvtzs x8, h4
-; CHECK-i64-NEXT:    fcvtzs x10, h5
-; CHECK-i64-NEXT:    mov h4, v2.h[2]
-; CHECK-i64-NEXT:    mov h5, v0.h[2]
-; CHECK-i64-NEXT:    fcvtzs x11, h6
-; CHECK-i64-NEXT:    mov h6, v0.h[3]
-; CHECK-i64-NEXT:    fcvtzs x15, h2
-; CHECK-i64-NEXT:    mov h2, v2.h[1]
-; CHECK-i64-NEXT:    fcvtzs x14, h0
-; CHECK-i64-NEXT:    fcvtzs x17, h3
-; CHECK-i64-NEXT:    fcvtzs x0, h17
-; CHECK-i64-NEXT:    fcvtzs x13, h7
-; CHECK-i64-NEXT:    mov h7, v0.h[1]
+; CHECK-i64-NEXT:    fcvtzs x11, h2
+; CHECK-i64-NEXT:    mov h18, v3.h[3]
+; CHECK-i64-NEXT:    fcvtzs x14, h3
+; CHECK-i64-NEXT:    mov h3, v3.h[1]
+; CHECK-i64-NEXT:    mov h19, v2.h[1]
+; CHECK-i64-NEXT:    fcvtzs x10, h4
+; CHECK-i64-NEXT:    fmov d4, x8
+; CHECK-i64-NEXT:    fcvtzs x13, h6
+; CHECK-i64-NEXT:    fcvtzs x15, h0
+; CHECK-i64-NEXT:    fcvtzs x8, h17
 ; CHECK-i64-NEXT:    fmov d0, x9
-; CHECK-i64-NEXT:    fcvtzs x16, h4
 ; CHECK-i64-NEXT:    fcvtzs x9, h5
-; CHECK-i64-NEXT:    fmov d4, x12
-; CHECK-i64-NEXT:    fcvtzs x12, h16
-; CHECK-i64-NEXT:    fmov d1, x10
-; CHECK-i64-NEXT:    fcvtzs x10, h6
-; CHECK-i64-NEXT:    fmov d5, x8
-; CHECK-i64-NEXT:    fcvtzs x8, h2
+; CHECK-i64-NEXT:    fcvtzs x16, h7
+; CHECK-i64-NEXT:    fcvtzs x17, h16
+; CHECK-i64-NEXT:    fmov d6, x11
+; CHECK-i64-NEXT:    fcvtzs x11, h18
+; CHECK-i64-NEXT:    fcvtzs x18, h3
 ; CHECK-i64-NEXT:    fmov d2, x14
-; CHECK-i64-NEXT:    fcvtzs x18, h7
-; CHECK-i64-NEXT:    fmov d6, x15
-; CHECK-i64-NEXT:    mov v0.d[1], x11
+; CHECK-i64-NEXT:    fcvtzs x14, h19
+; CHECK-i64-NEXT:    fcvtzs x0, h1
+; CHECK-i64-NEXT:    fmov d5, x10
+; CHECK-i64-NEXT:    fmov d1, x12
+; CHECK-i64-NEXT:    fmov d7, x8
 ; CHECK-i64-NEXT:    fmov d3, x9
-; CHECK-i64-NEXT:    fmov d7, x16
+; CHECK-i64-NEXT:    mov v0.d[1], x13
+; CHECK-i64-NEXT:    mov v4.d[1], x16
+; CHECK-i64-NEXT:    mov v2.d[1], x18
 ; CHECK-i64-NEXT:    mov v1.d[1], x17
-; CHECK-i64-NEXT:    mov v4.d[1], x12
 ; CHECK-i64-NEXT:    mov v5.d[1], x0
-; CHECK-i64-NEXT:    mov v6.d[1], x8
-; CHECK-i64-NEXT:    mov v2.d[1], x18
-; CHECK-i64-NEXT:    mov v3.d[1], x10
-; CHECK-i64-NEXT:    mov v7.d[1], x13
+; CHECK-i64-NEXT:    mov v6.d[1], x14
+; CHECK-i64-NEXT:    mov v3.d[1], x11
+; CHECK-i64-NEXT:    mov v7.d[1], x15
 ; CHECK-i64-NEXT:    ret
   %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half> %x)
   ret <16 x iXLen> %a
@@ -257,110 +257,104 @@ declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half>)
 define <32 x iXLen> @lrint_v32f16(<32 x half> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v32f16:
 ; CHECK-i32:       // %bb.0:
-; CHECK-i32-NEXT:    stp x26, x25, [sp, #-64]! // 16-byte Folded Spill
+; CHECK-i32-NEXT:    stp x20, x19, [sp, #-16]! // 16-byte Folded Spill
 ; CHECK-i32-NEXT:    frintx v3.8h, v3.8h
 ; CHECK-i32-NEXT:    frintx v2.8h, v2.8h
-; CHECK-i32-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
 ; CHECK-i32-NEXT:    frintx v1.8h, v1.8h
-; CHECK-i32-NEXT:    frintx v0.8h, v0.8h
-; CHECK-i32-NEXT:    stp x24, x23, [sp, #16] // 16-byte Folded Spill
-; CHECK-i32-NEXT:    stp x22, x21, [sp, #32] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    mov h16, v3.h[3]
+; CHECK-i32-NEXT:    mov h17, v3.h[2]
 ; CHECK-i32-NEXT:    mov h4, v3.h[7]
 ; CHECK-i32-NEXT:    mov h5, v3.h[6]
 ; CHECK-i32-NEXT:    mov h6, v3.h[5]
 ; CHECK-i32-NEXT:    mov h7, v3.h[4]
-; CHECK-i32-NEXT:    mov h16, v3.h[3]
-; CHECK-i32-NEXT:    mov h17, v3.h[2]
 ; CHECK-i32-NEXT:    mov h18, v3.h[1]
-; CHECK-i32-NEXT:    mov h19, v2.h[7]
-; CHECK-i32-NEXT:    fcvtzs w1, h3
-; CHECK-i32-NEXT:    mov h3, v1.h[6]
-; CHECK-i32-NEXT:    fcvtzs w7, h2
-; CHECK-i32-NEXT:    fcvtzs w22, h0
+; CHECK-i32-NEXT:    fcvtzs w13, h3
+; CHECK-i32-NEXT:    mov h3, v2.h[7]
+; CHECK-i32-NEXT:    mov h19, v2.h[4]
+; CHECK-i32-NEXT:    fcvtzs w18, h2
+; CHECK-i32-NEXT:    mov h20, v2.h[3]
+; CHECK-i32-NEXT:    fcvtzs w9, h16
+; CHECK-i32-NEXT:    fcvtzs w11, h17
+; CHECK-i32-NEXT:    mov h16, v2.h[1]
+; CHECK-i32-NEXT:    frintx v17.8h, v0.8h
 ; CHECK-i32-NEXT:    fcvtzs w8, h4
 ; CHECK-i32-NEXT:    mov h4, v2.h[6]
+; CHECK-i32-NEXT:    mov h0, v1.h[6]
 ; CHECK-i32-NEXT:    fcvtzs w10, h5
 ; CHECK-i32-NEXT:    mov h5, v2.h[5]
+; CHECK-i32-NEXT:    mov h21, v2.h[2]
+; CHECK-i32-NEXT:    mov h2, v1.h[4]
+; CHECK-i32-NEXT:    fcvtzs w15, h7
+; CHECK-i32-NEXT:    fcvtzs w1, h16
 ; CHECK-i32-NEXT:    fcvtzs w12, h6
-; CHECK-i32-NEXT:    mov h6, v2.h[4]
-; CHECK-i32-NEXT:    fcvtzs w13, h7
-; CHECK-i32-NEXT:    mov h7, v2.h[3]
-; CHECK-i32-NEXT:    fcvtzs w9, h16
-; CHECK-i32-NEXT:    fcvtzs w11, h17
-; CHECK-i32-NEXT:    mov h16, v2.h[2]
-; CHECK-i32-NEXT:    mov h17, v2.h[1]
-; CHECK-i32-NEXT:    fcvtzs w17, h4
-; CHECK-i32-NEXT:    mov h4, v1.h[5]
-; CHECK-i32-NEXT:    mov h2, v0.h[5]
+; CHECK-i32-NEXT:    fcvtzs w17, h19
+; CHECK-i32-NEXT:    mov h16, v17.h[4]
+; CHECK-i32-NEXT:    fcvtzs w14, h18
+; CHECK-i32-NEXT:    fmov s6, w13
+; CHECK-i32-NEXT:    fcvtzs w13, h3
+; CHECK-i32-NEXT:    fcvtzs w16, h4
+; CHECK-i32-NEXT:    mov h3, v1.h[5]
+; CHECK-i32-NEXT:    mov h18, v17.h[5]
+; CHECK-i32-NEXT:    fmov s4, w18
+; CHECK-i32-NEXT:    fcvtzs w18, h0
+; CHECK-i32-NEXT:    mov h0, v17.h[1]
+; CHECK-i32-NEXT:    mov h19, v1.h[1]
+; CHECK-i32-NEXT:    fcvtzs w2, h2
+; CHECK-i32-NEXT:    mov h2, v1.h[2]
+; CHECK-i32-NEXT:    fcvtzs w4, h1
+; CHECK-i32-NEXT:    fcvtzs w6, h16
+; CHECK-i32-NEXT:    fcvtzs w7, h17
+; CHECK-i32-NEXT:    fmov s7, w15
 ; CHECK-i32-NEXT:    fcvtzs w0, h5
-; CHECK-i32-NEXT:    fcvtzs w3, h6
-; CHECK-i32-NEXT:    mov h5, v1.h[4]
-; CHECK-i32-NEXT:    mov h6, v0.h[4]
-; CHECK-i32-NEXT:    fcvtzs w16, h7
-; CHECK-i32-NEXT:    mov h7, v0.h[1]
-; CHECK-i32-NEXT:    fcvtzs w15, h18
-; CHECK-i32-NEXT:    fcvtzs w2, h3
-; CHECK-i32-NEXT:    mov h3, v1.h[2]
-; CHECK-i32-NEXT:    fcvtzs w19, h4
-; CHECK-i32-NEXT:    mov h4, v1.h[1]
-; CHECK-i32-NEXT:    mov h18, v0.h[6]
-; CHECK-i32-NEXT:    fcvtzs w20, h5
-; CHECK-i32-NEXT:    fcvtzs w23, h2
-; CHECK-i32-NEXT:    mov h2, v0.h[2]
-; CHECK-i32-NEXT:    fcvtzs w21, h6
-; CHECK-i32-NEXT:    fcvtzs w25, h1
-; CHECK-i32-NEXT:    fcvtzs w4, h17
-; CHECK-i32-NEXT:    fcvtzs w24, h7
-; CHECK-i32-NEXT:    fcvtzs w14, h19
-; CHECK-i32-NEXT:    fcvtzs w18, h16
-; CHECK-i32-NEXT:    fcvtzs w26, h4
+; CHECK-i32-NEXT:    fcvtzs w15, h20
+; CHECK-i32-NEXT:    fcvtzs w3, h3
+; CHECK-i32-NEXT:    mov h20, v17.h[6]
+; CHECK-i32-NEXT:    fcvtzs w5, h18
+; CHECK-i32-NEXT:    mov h18, v17.h[2]
+; CHECK-i32-NEXT:    fcvtzs w19, h0
+; CHECK-i32-NEXT:    fcvtzs w20, h19
+; CHECK-i32-NEXT:    fmov s5, w17
+; CHECK-i32-NEXT:    fcvtzs w17, h21
 ; CHECK-i32-NEXT:    mov h16, v1.h[7]
-; CHECK-i32-NEXT:    mov h17, v1.h[3]
-; CHECK-i32-NEXT:    fcvtzs w5, h3
-; CHECK-i32-NEXT:    mov h19, v0.h[7]
+; CHECK-i32-NEXT:    fmov s3, w2
+; CHECK-i32-NEXT:    mov h21, v1.h[3]
+; CHECK-i32-NEXT:    fcvtzs w2, h2
+; CHECK-i32-NEXT:    fmov s2, w4
+; CHECK-i32-NEXT:    fmov s1, w6
+; CHECK-i32-NEXT:    fmov s0, w7
+; CHECK-i32-NEXT:    mov h19, v17.h[7]
+; CHECK-i32-NEXT:    fcvtzs w4, h20
 ; CHECK-i32-NEXT:    fcvtzs w6, h18
-; CHECK-i32-NEXT:    mov h18, v0.h[3]
-; CHECK-i32-NEXT:    fmov s0, w22
-; CHECK-i32-NEXT:    fmov s1, w21
-; CHECK-i32-NEXT:    fcvtzs w21, h2
-; CHECK-i32-NEXT:    fmov s2, w25
-; CHECK-i32-NEXT:    fmov s3, w20
-; CHECK-i32-NEXT:    fmov s4, w7
-; CHECK-i32-NEXT:    fmov s5, w3
-; CHECK-i32-NEXT:    fmov s6, w1
-; CHECK-i32-NEXT:    fmov s7, w13
-; CHECK-i32-NEXT:    mov v0.s[1], w24
-; CHECK-i32-NEXT:    mov v1.s[1], w23
-; CHECK-i32-NEXT:    ldp x24, x23, [sp, #16] // 16-byte Folded Reload
-; CHECK-i32-NEXT:    mov v2.s[1], w26
-; CHECK-i32-NEXT:    mov v3.s[1], w19
-; CHECK-i32-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-i32-NEXT:    mov v4.s[1], w4
+; CHECK-i32-NEXT:    mov h17, v17.h[3]
+; CHECK-i32-NEXT:    mov v3.s[1], w3
+; CHECK-i32-NEXT:    mov v1.s[1], w5
+; CHECK-i32-NEXT:    mov v2.s[1], w20
+; CHECK-i32-NEXT:    mov v4.s[1], w1
+; CHECK-i32-NEXT:    mov v0.s[1], w19
 ; CHECK-i32-NEXT:    mov v5.s[1], w0
-; CHECK-i32-NEXT:    mov v6.s[1], w15
+; CHECK-i32-NEXT:    mov v6.s[1], w14
 ; CHECK-i32-NEXT:    mov v7.s[1], w12
 ; CHECK-i32-NEXT:    fcvtzs w12, h16
-; CHECK-i32-NEXT:    fcvtzs w13, h17
-; CHECK-i32-NEXT:    fcvtzs w15, h19
-; CHECK-i32-NEXT:    fcvtzs w0, h18
-; CHECK-i32-NEXT:    mov v0.s[2], w21
-; CHECK-i32-NEXT:    ldp x22, x21, [sp, #32] // 16-byte Folded Reload
-; CHECK-i32-NEXT:    mov v1.s[2], w6
-; CHECK-i32-NEXT:    mov v2.s[2], w5
-; CHECK-i32-NEXT:    mov v3.s[2], w2
-; CHECK-i32-NEXT:    mov v4.s[2], w18
-; CHECK-i32-NEXT:    mov v5.s[2], w17
+; CHECK-i32-NEXT:    fcvtzs w14, h21
+; CHECK-i32-NEXT:    fcvtzs w0, h19
+; CHECK-i32-NEXT:    fcvtzs w1, h17
+; CHECK-i32-NEXT:    mov v3.s[2], w18
+; CHECK-i32-NEXT:    mov v1.s[2], w4
+; CHECK-i32-NEXT:    mov v2.s[2], w2
+; CHECK-i32-NEXT:    mov v4.s[2], w17
+; CHECK-i32-NEXT:    mov v0.s[2], w6
+; CHECK-i32-NEXT:    mov v5.s[2], w16
 ; CHECK-i32-NEXT:    mov v6.s[2], w11
 ; CHECK-i32-NEXT:    mov v7.s[2], w10
-; CHECK-i32-NEXT:    mov v0.s[3], w0
-; CHECK-i32-NEXT:    mov v1.s[3], w15
-; CHECK-i32-NEXT:    mov v2.s[3], w13
 ; CHECK-i32-NEXT:    mov v3.s[3], w12
-; CHECK-i32-NEXT:    mov v4.s[3], w16
-; CHECK-i32-NEXT:    mov v5.s[3], w14
+; CHECK-i32-NEXT:    mov v1.s[3], w0
+; CHECK-i32-NEXT:    mov v2.s[3], w14
+; CHECK-i32-NEXT:    mov v4.s[3], w15
+; CHECK-i32-NEXT:    mov v0.s[3], w1
+; CHECK-i32-NEXT:    mov v5.s[3], w13
 ; CHECK-i32-NEXT:    mov v6.s[3], w9
 ; CHECK-i32-NEXT:    mov v7.s[3], w8
-; CHECK-i32-NEXT:    ldp x26, x25, [sp], #64 // 16-byte Folded Reload
+; CHECK-i32-NEXT:    ldp x20, x19, [sp], #16 // 16-byte Folded Reload
 ; CHECK-i32-NEXT:    ret
 ;
 ; CHECK-i64-LABEL: lrint_v32f16:
@@ -567,54 +561,52 @@ define <8 x iXLen> @lrint_v8f32(<8 x float> %x) nounwind {
 ; CHECK-i32-NEXT:    ptrue p0.s, vl8
 ; CHECK-i32-NEXT:    movprfx z2, z0
 ; CHECK-i32-NEXT:    frintx z2.s, p0/m, z0.s
-; CHECK-i32-NEXT:    mov z0.s, z2.s[4]
-; CHECK-i32-NEXT:    mov z1.s, z2.s[5]
+; CHECK-i32-NEXT:    mov z0.s, z2.s[5]
+; CHECK-i32-NEXT:    mov z1.s, z2.s[4]
 ; CHECK-i32-NEXT:    mov z3.s, z2.s[1]
-; CHECK-i32-NEXT:    fcvtzs w9, s2
+; CHECK-i32-NEXT:    mov z4.s, z2.s[6]
+; CHECK-i32-NEXT:    mov z5.s, z2.s[2]
 ; CHECK-i32-NEXT:    fcvtzs w8, s0
-; CHECK-i32-NEXT:    mov z0.s, z2.s[6]
-; CHECK-i32-NEXT:    fcvtzs w10, s1
-; CHECK-i32-NEXT:    mov z1.s, z2.s[2]
-; CHECK-i32-NEXT:    fcvtzs w11, s3
+; CHECK-i32-NEXT:    fcvtzs s1, s1
+; CHECK-i32-NEXT:    fcvtzs w9, s3
+; CHECK-i32-NEXT:    fcvtzs s0, s2
+; CHECK-i32-NEXT:    fcvtzs w10, s4
+; CHECK-i32-NEXT:    fcvtzs w11, s5
 ; CHECK-i32-NEXT:    mov z3.s, z2.s[7]
 ; CHECK-i32-NEXT:    mov z2.s, z2.s[3]
-; CHECK-i32-NEXT:    fcvtzs w12, s0
-; CHECK-i32-NEXT:    fmov s0, w9
-; CHECK-i32-NEXT:    fcvtzs w13, s1
-; CHECK-i32-NEXT:    fmov s1, w8
+; CHECK-i32-NEXT:    mov v1.s[1], w8
+; CHECK-i32-NEXT:    mov v0.s[1], w9
 ; CHECK-i32-NEXT:    fcvtzs w8, s3
 ; CHECK-i32-NEXT:    fcvtzs w9, s2
-; CHECK-i32-NEXT:    mov v0.s[1], w11
-; CHECK-i32-NEXT:    mov v1.s[1], w10
-; CHECK-i32-NEXT:    mov v0.s[2], w13
-; CHECK-i32-NEXT:    mov v1.s[2], w12
-; CHECK-i32-NEXT:    mov v0.s[3], w9
+; CHECK-i32-NEXT:    mov v1.s[2], w10
+; CHECK-i32-NEXT:    mov v0.s[2], w11
 ; CHECK-i32-NEXT:    mov v1.s[3], w8
+; CHECK-i32-NEXT:    mov v0.s[3], w9
 ; CHECK-i32-NEXT:    ret
 ;
 ; CHECK-i64-LABEL: lrint_v8f32:
 ; CHECK-i64:       // %bb.0:
-; CHECK-i64-NEXT:    frintx v0.4s, v0.4s
 ; CHECK-i64-NEXT:    frintx v1.4s, v1.4s
-; CHECK-i64-NEXT:    mov s3, v1.s[2]
-; CHECK-i64-NEXT:    mov s4, v0.s[2]
-; CHECK-i64-NEXT:    mov s2, v0.s[1]
+; CHECK-i64-NEXT:    frintx v0.4s, v0.4s
+; CHECK-i64-NEXT:    mov s2, v1.s[2]
+; CHECK-i64-NEXT:    mov s3, v0.s[2]
+; CHECK-i64-NEXT:    mov s4, v0.s[1]
 ; CHECK-i64-NEXT:    mov s5, v1.s[3]
 ; CHECK-i64-NEXT:    mov s6, v1.s[1]
 ; CHECK-i64-NEXT:    mov s7, v0.s[3]
 ; CHECK-i64-NEXT:    fcvtzs x8, s0
 ; CHECK-i64-NEXT:    fcvtzs x10, s1
+; CHECK-i64-NEXT:    fcvtzs x9, s2
 ; CHECK-i64-NEXT:    fcvtzs x11, s3
 ; CHECK-i64-NEXT:    fcvtzs x12, s4
-; CHECK-i64-NEXT:    fcvtzs x9, s2
 ; CHECK-i64-NEXT:    fcvtzs x13, s5
 ; CHECK-i64-NEXT:    fcvtzs x14, s6
 ; CHECK-i64-NEXT:    fcvtzs x15, s7
 ; CHECK-i64-NEXT:    fmov d0, x8
 ; CHECK-i64-NEXT:    fmov d2, x10
-; CHECK-i64-NEXT:    fmov d1, x12
-; CHECK-i64-NEXT:    fmov d3, x11
-; CHECK-i64-NEXT:    mov v0.d[1], x9
+; CHECK-i64-NEXT:    fmov d3, x9
+; CHECK-i64-NEXT:    fmov d1, x11
+; CHECK-i64-NEXT:    mov v0.d[1], x12
 ; CHECK-i64-NEXT:    mov v2.d[1], x14
 ; CHECK-i64-NEXT:    mov v1.d[1], x15
 ; CHECK-i64-NEXT:    mov v3.d[1], x13
@@ -629,61 +621,58 @@ define <16 x iXLen> @lrint_v16f32(<16 x float> %x) nounwind {
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    ptrue p0.d, vl2
 ; CHECK-i32-NEXT:    // kill: def $q2 killed $q2 def $z2
-; CHECK-i32-NEXT:    // kill: def $q3 killed $q3 def $z3
 ; CHECK-i32-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-i32-NEXT:    // kill: def $q3 killed $q3 def $z3
 ; CHECK-i32-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-i32-NEXT:    splice z2.d, p0, z2.d, z3.d
 ; CHECK-i32-NEXT:    splice z0.d, p0, z0.d, z1.d
 ; CHECK-i32-NEXT:    ptrue p0.s, vl8
-; CHECK-i32-NEXT:    movprfx z1, z2
-; CHECK-i32-NEXT:    frintx z1.s, p0/m, z2.s
-; CHECK-i32-NEXT:    frintx z0.s, p0/m, z0.s
-; CHECK-i32-NEXT:    mov z2.s, z1.s[5]
-; CHECK-i32-NEXT:    mov z3.s, z1.s[4]
-; CHECK-i32-NEXT:    mov z5.s, z0.s[5]
-; CHECK-i32-NEXT:    mov z7.s, z0.s[1]
-; CHECK-i32-NEXT:    fcvtzs w11, s0
-; CHECK-i32-NEXT:    fcvtzs w13, s1
-; CHECK-i32-NEXT:    mov z4.s, z1.s[7]
-; CHECK-i32-NEXT:    mov z6.s, z1.s[6]
-; CHECK-i32-NEXT:    mov z16.s, z0.s[7]
-; CHECK-i32-NEXT:    fcvtzs w8, s2
-; CHECK-i32-NEXT:    mov z2.s, z0.s[4]
-; CHECK-i32-NEXT:    fcvtzs w9, s3
-; CHECK-i32-NEXT:    mov z3.s, z1.s[1]
-; CHECK-i32-NEXT:    fcvtzs w10, s5
-; CHECK-i32-NEXT:    fcvtzs w12, s7
-; CHECK-i32-NEXT:    mov z5.s, z0.s[6]
-; CHECK-i32-NEXT:    mov z7.s, z1.s[2]
-; CHECK-i32-NEXT:    mov z17.s, z1.s[3]
-; CHECK-i32-NEXT:    fcvtzs w14, s2
-; CHECK-i32-NEXT:    mov z2.s, z0.s[2]
-; CHECK-i32-NEXT:    mov z18.s, z0.s[3]
-; CHECK-i32-NEXT:    fcvtzs w15, s3
-; CHECK-i32-NEXT:    fmov s0, w11
-; CHECK-i32-NEXT:    fmov s3, w9
-; CHECK-i32-NEXT:    fcvtzs w16, s6
-; CHECK-i32-NEXT:    fcvtzs w17, s5
+; CHECK-i32-NEXT:    movprfx z4, z2
+; CHECK-i32-NEXT:    frintx z4.s, p0/m, z2.s
+; CHECK-i32-NEXT:    movprfx z5, z0
+; CHECK-i32-NEXT:    frintx z5.s, p0/m, z0.s
+; CHECK-i32-NEXT:    mov z0.s, z4.s[5]
+; CHECK-i32-NEXT:    mov z1.s, z5.s[5]
+; CHECK-i32-NEXT:    mov z3.s, z4.s[4]
+; CHECK-i32-NEXT:    mov z2.s, z4.s[1]
+; CHECK-i32-NEXT:    mov z7.s, z5.s[1]
+; CHECK-i32-NEXT:    mov z17.s, z5.s[4]
+; CHECK-i32-NEXT:    mov z6.s, z4.s[6]
+; CHECK-i32-NEXT:    mov z16.s, z5.s[6]
+; CHECK-i32-NEXT:    mov z18.s, z4.s[2]
+; CHECK-i32-NEXT:    fcvtzs w8, s0
+; CHECK-i32-NEXT:    fcvtzs w9, s1
+; CHECK-i32-NEXT:    fcvtzs s0, s5
+; CHECK-i32-NEXT:    fcvtzs w10, s2
 ; CHECK-i32-NEXT:    fcvtzs w11, s7
-; CHECK-i32-NEXT:    fcvtzs w18, s2
-; CHECK-i32-NEXT:    fmov s2, w13
-; CHECK-i32-NEXT:    fcvtzs w9, s16
-; CHECK-i32-NEXT:    fmov s1, w14
-; CHECK-i32-NEXT:    mov v0.s[1], w12
+; CHECK-i32-NEXT:    fcvtzs s2, s4
+; CHECK-i32-NEXT:    fcvtzs s3, s3
+; CHECK-i32-NEXT:    fcvtzs s1, s17
+; CHECK-i32-NEXT:    mov z19.s, z5.s[2]
+; CHECK-i32-NEXT:    fcvtzs w12, s6
+; CHECK-i32-NEXT:    fcvtzs w13, s16
+; CHECK-i32-NEXT:    fcvtzs w14, s18
+; CHECK-i32-NEXT:    mov z6.s, z4.s[7]
+; CHECK-i32-NEXT:    mov z7.s, z5.s[7]
+; CHECK-i32-NEXT:    mov z4.s, z4.s[3]
+; CHECK-i32-NEXT:    fcvtzs w15, s19
+; CHECK-i32-NEXT:    mov v0.s[1], w11
+; CHECK-i32-NEXT:    mov v2.s[1], w10
+; CHECK-i32-NEXT:    mov v1.s[1], w9
 ; CHECK-i32-NEXT:    mov v3.s[1], w8
-; CHECK-i32-NEXT:    fcvtzs w8, s4
-; CHECK-i32-NEXT:    fcvtzs w12, s18
-; CHECK-i32-NEXT:    mov v2.s[1], w15
-; CHECK-i32-NEXT:    mov v1.s[1], w10
-; CHECK-i32-NEXT:    fcvtzs w10, s17
-; CHECK-i32-NEXT:    mov v0.s[2], w18
-; CHECK-i32-NEXT:    mov v3.s[2], w16
-; CHECK-i32-NEXT:    mov v2.s[2], w11
-; CHECK-i32-NEXT:    mov v1.s[2], w17
-; CHECK-i32-NEXT:    mov v0.s[3], w12
-; CHECK-i32-NEXT:    mov v3.s[3], w8
+; CHECK-i32-NEXT:    mov z5.s, z5.s[3]
+; CHECK-i32-NEXT:    fcvtzs w8, s6
+; CHECK-i32-NEXT:    fcvtzs w9, s7
+; CHECK-i32-NEXT:    fcvtzs w10, s4
+; CHECK-i32-NEXT:    fcvtzs w11, s5
+; CHECK-i32-NEXT:    mov v0.s[2], w15
+; CHECK-i32-NEXT:    mov v2.s[2], w14
+; CHECK-i32-NEXT:    mov v1.s[2], w13
+; CHECK-i32-NEXT:    mov v3.s[2], w12
+; CHECK-i32-NEXT:    mov v0.s[3], w11
 ; CHECK-i32-NEXT:    mov v2.s[3], w10
 ; CHECK-i32-NEXT:    mov v1.s[3], w9
+; CHECK-i32-NEXT:    mov v3.s[3], w8
 ; CHECK-i32-NEXT:    ret
 ;
 ; CHECK-i64-LABEL: lrint_v16f32:
@@ -694,48 +683,48 @@ define <16 x iXLen> @lrint_v16f32(<16 x float> %x) nounwind {
 ; CHECK-i64-NEXT:    frintx v0.4s, v0.4s
 ; CHECK-i64-NEXT:    mov s4, v3.s[2]
 ; CHECK-i64-NEXT:    mov s5, v2.s[2]
-; CHECK-i64-NEXT:    mov s6, v1.s[2]
-; CHECK-i64-NEXT:    mov s7, v0.s[2]
-; CHECK-i64-NEXT:    fcvtzs x10, s1
-; CHECK-i64-NEXT:    fcvtzs x11, s0
-; CHECK-i64-NEXT:    mov s16, v0.s[1]
-; CHECK-i64-NEXT:    mov s17, v1.s[1]
-; CHECK-i64-NEXT:    mov s18, v3.s[1]
-; CHECK-i64-NEXT:    fcvtzs x14, s3
-; CHECK-i64-NEXT:    fcvtzs x16, s2
-; CHECK-i64-NEXT:    fcvtzs x8, s4
-; CHECK-i64-NEXT:    mov s4, v2.s[1]
-; CHECK-i64-NEXT:    fcvtzs x9, s5
-; CHECK-i64-NEXT:    mov s5, v1.s[3]
-; CHECK-i64-NEXT:    fcvtzs x12, s6
-; CHECK-i64-NEXT:    mov s6, v0.s[3]
-; CHECK-i64-NEXT:    fcvtzs x13, s7
-; CHECK-i64-NEXT:    mov s7, v3.s[3]
-; CHECK-i64-NEXT:    fmov d0, x11
-; CHECK-i64-NEXT:    fcvtzs x17, s16
-; CHECK-i64-NEXT:    fcvtzs x18, s18
-; CHECK-i64-NEXT:    fcvtzs x15, s4
-; CHECK-i64-NEXT:    mov s4, v2.s[3]
-; CHECK-i64-NEXT:    fmov d2, x10
+; CHECK-i64-NEXT:    mov s6, v2.s[1]
+; CHECK-i64-NEXT:    mov s7, v1.s[2]
+; CHECK-i64-NEXT:    fcvtzs x8, s3
+; CHECK-i64-NEXT:    mov s16, v0.s[2]
+; CHECK-i64-NEXT:    fcvtzs x9, s2
+; CHECK-i64-NEXT:    mov s17, v1.s[3]
+; CHECK-i64-NEXT:    mov s18, v0.s[1]
+; CHECK-i64-NEXT:    mov s19, v3.s[3]
+; CHECK-i64-NEXT:    fcvtzs x14, s1
+; CHECK-i64-NEXT:    mov s1, v1.s[1]
+; CHECK-i64-NEXT:    fcvtzs x10, s4
 ; CHECK-i64-NEXT:    fcvtzs x11, s5
-; CHECK-i64-NEXT:    fcvtzs x10, s6
-; CHECK-i64-NEXT:    fmov d3, x12
-; CHECK-i64-NEXT:    fmov d1, x13
-; CHECK-i64-NEXT:    fcvtzs x12, s17
+; CHECK-i64-NEXT:    mov s5, v0.s[3]
+; CHECK-i64-NEXT:    mov s3, v3.s[1]
+; CHECK-i64-NEXT:    mov s2, v2.s[3]
+; CHECK-i64-NEXT:    fcvtzs x12, s6
 ; CHECK-i64-NEXT:    fcvtzs x13, s7
-; CHECK-i64-NEXT:    fmov d5, x9
-; CHECK-i64-NEXT:    fmov d6, x14
-; CHECK-i64-NEXT:    fmov d7, x8
-; CHECK-i64-NEXT:    fcvtzs x0, s4
-; CHECK-i64-NEXT:    fmov d4, x16
+; CHECK-i64-NEXT:    fcvtzs x15, s16
+; CHECK-i64-NEXT:    fmov d6, x8
+; CHECK-i64-NEXT:    fcvtzs x8, s0
+; CHECK-i64-NEXT:    fmov d4, x9
+; CHECK-i64-NEXT:    fcvtzs x9, s17
+; CHECK-i64-NEXT:    fcvtzs x16, s5
+; CHECK-i64-NEXT:    fcvtzs x17, s18
+; CHECK-i64-NEXT:    fmov d7, x10
+; CHECK-i64-NEXT:    fmov d5, x11
+; CHECK-i64-NEXT:    fcvtzs x10, s1
+; CHECK-i64-NEXT:    fcvtzs x11, s19
+; CHECK-i64-NEXT:    fcvtzs x18, s3
+; CHECK-i64-NEXT:    fcvtzs x0, s2
+; CHECK-i64-NEXT:    fmov d3, x13
+; CHECK-i64-NEXT:    fmov d1, x15
+; CHECK-i64-NEXT:    fmov d0, x8
+; CHECK-i64-NEXT:    fmov d2, x14
+; CHECK-i64-NEXT:    mov v4.d[1], x12
+; CHECK-i64-NEXT:    mov v3.d[1], x9
+; CHECK-i64-NEXT:    mov v7.d[1], x11
 ; CHECK-i64-NEXT:    mov v0.d[1], x17
-; CHECK-i64-NEXT:    mov v1.d[1], x10
-; CHECK-i64-NEXT:    mov v3.d[1], x11
-; CHECK-i64-NEXT:    mov v2.d[1], x12
-; CHECK-i64-NEXT:    mov v6.d[1], x18
-; CHECK-i64-NEXT:    mov v7.d[1], x13
-; CHECK-i64-NEXT:    mov v4.d[1], x15
+; CHECK-i64-NEXT:    mov v1.d[1], x16
+; CHECK-i64-NEXT:    mov v2.d[1], x10
 ; CHECK-i64-NEXT:    mov v5.d[1], x0
+; CHECK-i64-NEXT:    mov v6.d[1], x18
 ; CHECK-i64-NEXT:    ret
   %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float> %x)
   ret <16 x iXLen> %a
@@ -745,128 +734,114 @@ declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float>)
 define <32 x iXLen> @lrint_v32f32(<32 x float> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v32f32:
 ; CHECK-i32:       // %bb.0:
-; CHECK-i32-NEXT:    str x27, [sp, #-80]! // 8-byte Folded Spill
+; CHECK-i32-NEXT:    str x19, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-i32-NEXT:    ptrue p1.d, vl2
 ; CHECK-i32-NEXT:    // kill: def $q6 killed $q6 def $z6
 ; CHECK-i32-NEXT:    // kill: def $q7 killed $q7 def $z7
-; CHECK-i32-NEXT:    // kill: def $q2 killed $q2 def $z2
 ; CHECK-i32-NEXT:    // kill: def $q4 killed $q4 def $z4
-; CHECK-i32-NEXT:    // kill: def $q3 killed $q3 def $z3
+; CHECK-i32-NEXT:    // kill: def $q2 killed $q2 def $z2
+; CHECK-i32-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-i32-NEXT:    // kill: def $q5 killed $q5 def $z5
+; CHECK-i32-NEXT:    // kill: def $q3 killed $q3 def $z3
 ; CHECK-i32-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-i32-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-i32-NEXT:    stp x24, x23, [sp, #32] // 16-byte Folded Spill
 ; CHECK-i32-NEXT:    ptrue p0.s, vl8
-; CHECK-i32-NEXT:    stp x22, x21, [sp, #48] // 16-byte Folded Spill
 ; CHECK-i32-NEXT:    splice z6.d, p1, z6.d, z7.d
-; CHECK-i32-NEXT:    splice z2.d, p1, z2.d, z3.d
 ; CHECK-i32-NEXT:    splice z4.d, p1, z4.d, z5.d
+; CHECK-i32-NEXT:    splice z2.d, p1, z2.d, z3.d
 ; CHECK-i32-NEXT:    splice z0.d, p1, z0.d, z1.d
-; CHECK-i32-NEXT:    stp x26, x25, [sp, #16] // 16-byte Folded Spill
-; CHECK-i32-NEXT:    stp x20, x19, [sp, #64] // 16-byte Folded Spill
-; CHECK-i32-NEXT:    movprfx z3, z6
-; CHECK-i32-NEXT:    frintx z3.s, p0/m, z6.s
-; CHECK-i32-NEXT:    frintx z2.s, p0/m, z2.s
-; CHECK-i32-NEXT:    movprfx z1, z4
-; CHECK-i32-NEXT:    frintx z1.s, p0/m, z4.s
-; CHECK-i32-NEXT:    frintx z0.s, p0/m, z0.s
-; CHECK-i32-NEXT:    mov z4.s, z3.s[7]
-; CHECK-i32-NEXT:    mov z5.s, z3.s[6]
-; CHECK-i32-NEXT:    mov z6.s, z3.s[5]
-; CHECK-i32-NEXT:    mov z16.s, z1.s[7]
-; CHECK-i32-NEXT:    mov z7.s, z3.s[4]
-; CHECK-i32-NEXT:    mov z17.s, z1.s[6]
-; CHECK-i32-NEXT:    mov z18.s, z1.s[5]
-; CHECK-i32-NEXT:    mov z19.s, z1.s[4]
-; CHECK-i32-NEXT:    fcvtzs w7, s3
-; CHECK-i32-NEXT:    fcvtzs w8, s4
-; CHECK-i32-NEXT:    mov z4.s, z2.s[7]
-; CHECK-i32-NEXT:    fcvtzs w10, s5
-; CHECK-i32-NEXT:    mov z5.s, z2.s[6]
-; CHECK-i32-NEXT:    fcvtzs w13, s6
-; CHECK-i32-NEXT:    fcvtzs w9, s16
-; CHECK-i32-NEXT:    mov z6.s, z2.s[4]
-; CHECK-i32-NEXT:    mov z16.s, z0.s[6]
-; CHECK-i32-NEXT:    fcvtzs w14, s7
-; CHECK-i32-NEXT:    fcvtzs w11, s4
-; CHECK-i32-NEXT:    mov z4.s, z2.s[5]
-; CHECK-i32-NEXT:    mov z7.s, z0.s[7]
-; CHECK-i32-NEXT:    fcvtzs w16, s5
-; CHECK-i32-NEXT:    mov z5.s, z0.s[4]
-; CHECK-i32-NEXT:    fcvtzs w12, s17
-; CHECK-i32-NEXT:    fcvtzs w15, s18
-; CHECK-i32-NEXT:    fcvtzs w17, s19
-; CHECK-i32-NEXT:    mov z17.s, z0.s[5]
-; CHECK-i32-NEXT:    fcvtzs w3, s4
-; CHECK-i32-NEXT:    mov z4.s, z3.s[1]
-; CHECK-i32-NEXT:    mov z18.s, z3.s[2]
-; CHECK-i32-NEXT:    fcvtzs w4, s6
-; CHECK-i32-NEXT:    fcvtzs w0, s16
-; CHECK-i32-NEXT:    fcvtzs w6, s5
-; CHECK-i32-NEXT:    mov z16.s, z3.s[3]
-; CHECK-i32-NEXT:    mov z3.s, z0.s[1]
-; CHECK-i32-NEXT:    mov z5.s, z1.s[1]
-; CHECK-i32-NEXT:    mov z6.s, z2.s[1]
-; CHECK-i32-NEXT:    fcvtzs w21, s1
-; CHECK-i32-NEXT:    fcvtzs w22, s0
-; CHECK-i32-NEXT:    fcvtzs w23, s2
-; CHECK-i32-NEXT:    fcvtzs w18, s7
-; CHECK-i32-NEXT:    fcvtzs w2, s4
-; CHECK-i32-NEXT:    mov z4.s, z1.s[2]
-; CHECK-i32-NEXT:    mov z7.s, z2.s[2]
-; CHECK-i32-NEXT:    fcvtzs w5, s17
-; CHECK-i32-NEXT:    fcvtzs w24, s3
-; CHECK-i32-NEXT:    fcvtzs w25, s5
-; CHECK-i32-NEXT:    fcvtzs w26, s6
-; CHECK-i32-NEXT:    fcvtzs w1, s18
-; CHECK-i32-NEXT:    mov z18.s, z0.s[2]
-; CHECK-i32-NEXT:    mov z17.s, z1.s[3]
-; CHECK-i32-NEXT:    fcvtzs w19, s4
-; CHECK-i32-NEXT:    mov z19.s, z2.s[3]
-; CHECK-i32-NEXT:    fcvtzs w20, s7
-; CHECK-i32-NEXT:    mov z20.s, z0.s[3]
-; CHECK-i32-NEXT:    fmov s0, w22
-; CHECK-i32-NEXT:    fmov s2, w23
-; CHECK-i32-NEXT:    fmov s4, w21
-; CHECK-i32-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
-; CHECK-i32-NEXT:    fmov s1, w6
-; CHECK-i32-NEXT:    fmov s6, w7
-; CHECK-i32-NEXT:    fmov s3, w4
-; CHECK-i32-NEXT:    fmov s5, w17
-; CHECK-i32-NEXT:    fmov s7, w14
-; CHECK-i32-NEXT:    fcvtzs w27, s18
-; CHECK-i32-NEXT:    mov v0.s[1], w24
-; CHECK-i32-NEXT:    ldp x24, x23, [sp, #32] // 16-byte Folded Reload
-; CHECK-i32-NEXT:    mov v2.s[1], w26
-; CHECK-i32-NEXT:    mov v4.s[1], w25
-; CHECK-i32-NEXT:    mov v1.s[1], w5
-; CHECK-i32-NEXT:    ldp x26, x25, [sp, #16] // 16-byte Folded Reload
-; CHECK-i32-NEXT:    mov v3.s[1], w3
-; CHECK-i32-NEXT:    mov v6.s[1], w2
-; CHECK-i32-NEXT:    mov v5.s[1], w15
+; CHECK-i32-NEXT:    movprfx z16, z6
+; CHECK-i32-NEXT:    frintx z16.s, p0/m, z6.s
+; CHECK-i32-NEXT:    movprfx z17, z4
+; CHECK-i32-NEXT:    frintx z17.s, p0/m, z4.s
+; CHECK-i32-NEXT:    movprfx z18, z2
+; CHECK-i32-NEXT:    frintx z18.s, p0/m, z2.s
+; CHECK-i32-NEXT:    movprfx z19, z0
+; CHECK-i32-NEXT:    frintx z19.s, p0/m, z0.s
+; CHECK-i32-NEXT:    mov z0.s, z16.s[7]
+; CHECK-i32-NEXT:    mov z2.s, z16.s[5]
+; CHECK-i32-NEXT:    mov z3.s, z16.s[4]
+; CHECK-i32-NEXT:    mov z1.s, z16.s[6]
+; CHECK-i32-NEXT:    mov z4.s, z17.s[7]
+; CHECK-i32-NEXT:    mov z6.s, z17.s[6]
+; CHECK-i32-NEXT:    mov z20.s, z17.s[5]
+; CHECK-i32-NEXT:    mov z5.s, z17.s[4]
+; CHECK-i32-NEXT:    mov z21.s, z19.s[1]
+; CHECK-i32-NEXT:    fcvtzs w8, s0
+; CHECK-i32-NEXT:    mov z0.s, z18.s[7]
+; CHECK-i32-NEXT:    fcvtzs w13, s2
+; CHECK-i32-NEXT:    mov z2.s, z18.s[5]
+; CHECK-i32-NEXT:    fcvtzs s7, s3
+; CHECK-i32-NEXT:    mov z3.s, z19.s[7]
+; CHECK-i32-NEXT:    fcvtzs w10, s1
+; CHECK-i32-NEXT:    mov z1.s, z18.s[6]
+; CHECK-i32-NEXT:    fcvtzs w9, s4
+; CHECK-i32-NEXT:    fcvtzs w12, s6
+; CHECK-i32-NEXT:    fcvtzs w11, s0
+; CHECK-i32-NEXT:    mov z0.s, z19.s[6]
+; CHECK-i32-NEXT:    mov z4.s, z19.s[5]
+; CHECK-i32-NEXT:    fcvtzs w18, s2
+; CHECK-i32-NEXT:    mov z2.s, z18.s[4]
+; CHECK-i32-NEXT:    fcvtzs w15, s3
+; CHECK-i32-NEXT:    mov z3.s, z16.s[1]
+; CHECK-i32-NEXT:    mov z6.s, z17.s[1]
+; CHECK-i32-NEXT:    fcvtzs w14, s1
+; CHECK-i32-NEXT:    mov z1.s, z16.s[2]
+; CHECK-i32-NEXT:    fcvtzs w17, s0
+; CHECK-i32-NEXT:    fcvtzs w1, s4
+; CHECK-i32-NEXT:    mov z0.s, z17.s[2]
+; CHECK-i32-NEXT:    mov z4.s, z19.s[4]
+; CHECK-i32-NEXT:    fcvtzs w2, s3
+; CHECK-i32-NEXT:    fcvtzs s3, s2
+; CHECK-i32-NEXT:    mov z2.s, z18.s[1]
+; CHECK-i32-NEXT:    fcvtzs w6, s6
+; CHECK-i32-NEXT:    mov z6.s, z19.s[2]
+; CHECK-i32-NEXT:    fcvtzs w16, s20
+; CHECK-i32-NEXT:    fcvtzs w0, s1
+; CHECK-i32-NEXT:    fcvtzs w3, s0
+; CHECK-i32-NEXT:    fcvtzs s1, s4
+; CHECK-i32-NEXT:    fcvtzs w7, s21
+; CHECK-i32-NEXT:    fcvtzs s0, s19
+; CHECK-i32-NEXT:    fcvtzs s4, s17
+; CHECK-i32-NEXT:    fcvtzs w19, s2
+; CHECK-i32-NEXT:    fcvtzs s2, s18
+; CHECK-i32-NEXT:    fcvtzs s5, s5
+; CHECK-i32-NEXT:    fcvtzs w5, s6
+; CHECK-i32-NEXT:    fcvtzs s6, s16
+; CHECK-i32-NEXT:    mov z20.s, z18.s[2]
+; CHECK-i32-NEXT:    mov v1.s[1], w1
+; CHECK-i32-NEXT:    mov v3.s[1], w18
 ; CHECK-i32-NEXT:    mov v7.s[1], w13
+; CHECK-i32-NEXT:    mov v0.s[1], w7
+; CHECK-i32-NEXT:    mov v4.s[1], w6
+; CHECK-i32-NEXT:    mov z16.s, z16.s[3]
+; CHECK-i32-NEXT:    fcvtzs w4, s20
+; CHECK-i32-NEXT:    mov v2.s[1], w19
+; CHECK-i32-NEXT:    mov v5.s[1], w16
+; CHECK-i32-NEXT:    mov v6.s[1], w2
+; CHECK-i32-NEXT:    mov z17.s, z17.s[3]
+; CHECK-i32-NEXT:    mov z18.s, z18.s[3]
+; CHECK-i32-NEXT:    mov z19.s, z19.s[3]
 ; CHECK-i32-NEXT:    fcvtzs w13, s16
-; CHECK-i32-NEXT:    fcvtzs w14, s17
-; CHECK-i32-NEXT:    fcvtzs w15, s19
-; CHECK-i32-NEXT:    fcvtzs w17, s20
-; CHECK-i32-NEXT:    mov v0.s[2], w27
-; CHECK-i32-NEXT:    mov v1.s[2], w0
-; CHECK-i32-NEXT:    mov v2.s[2], w20
-; CHECK-i32-NEXT:    mov v4.s[2], w19
-; CHECK-i32-NEXT:    mov v3.s[2], w16
-; CHECK-i32-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
-; CHECK-i32-NEXT:    mov v6.s[2], w1
+; CHECK-i32-NEXT:    mov v1.s[2], w17
+; CHECK-i32-NEXT:    mov v0.s[2], w5
+; CHECK-i32-NEXT:    mov v4.s[2], w3
+; CHECK-i32-NEXT:    mov v3.s[2], w14
+; CHECK-i32-NEXT:    fcvtzs w16, s17
+; CHECK-i32-NEXT:    fcvtzs w18, s18
+; CHECK-i32-NEXT:    mov v2.s[2], w4
+; CHECK-i32-NEXT:    fcvtzs w1, s19
+; CHECK-i32-NEXT:    mov v6.s[2], w0
 ; CHECK-i32-NEXT:    mov v5.s[2], w12
 ; CHECK-i32-NEXT:    mov v7.s[2], w10
-; CHECK-i32-NEXT:    mov v0.s[3], w17
-; CHECK-i32-NEXT:    mov v1.s[3], w18
-; CHECK-i32-NEXT:    mov v2.s[3], w15
-; CHECK-i32-NEXT:    mov v4.s[3], w14
+; CHECK-i32-NEXT:    mov v1.s[3], w15
 ; CHECK-i32-NEXT:    mov v3.s[3], w11
+; CHECK-i32-NEXT:    mov v2.s[3], w18
+; CHECK-i32-NEXT:    mov v4.s[3], w16
+; CHECK-i32-NEXT:    mov v0.s[3], w1
 ; CHECK-i32-NEXT:    mov v6.s[3], w13
 ; CHECK-i32-NEXT:    mov v5.s[3], w9
 ; CHECK-i32-NEXT:    mov v7.s[3], w8
-; CHECK-i32-NEXT:    ldr x27, [sp], #80 // 8-byte Folded Reload
+; CHECK-i32-NEXT:    ldr x19, [sp], #16 // 8-byte Folded Reload
 ; CHECK-i32-NEXT:    ret
 ;
 ; CHECK-i64-LABEL: lrint_v32f32:
@@ -1006,8 +981,7 @@ define <1 x iXLen> @lrint_v1f64(<1 x double> %x) nounwind {
 ; CHECK-i64-LABEL: lrint_v1f64:
 ; CHECK-i64:       // %bb.0:
 ; CHECK-i64-NEXT:    frintx d0, d0
-; CHECK-i64-NEXT:    fcvtzs x8, d0
-; CHECK-i64-NEXT:    fmov d0, x8
+; CHECK-i64-NEXT:    fcvtzs d0, d0
 ; CHECK-i64-NEXT:    ret
   %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double> %x)
   ret <1 x iXLen> %a
@@ -1067,17 +1041,15 @@ define <4 x iXLen> @lrint_v4f64(<4 x double> %x) nounwind {
 ; CHECK-i64-NEXT:    splice z0.d, p0, z0.d, z1.d
 ; CHECK-i64-NEXT:    ptrue p0.d, vl4
 ; CHECK-i64-NEXT:    frintx z0.d, p0/m, z0.d
-; CHECK-i64-NEXT:    mov z1.d, z0.d[2]
-; CHECK-i64-NEXT:    mov z2.d, z0.d[3]
+; CHECK-i64-NEXT:    mov z1.d, z0.d[3]
+; CHECK-i64-NEXT:    mov z2.d, z0.d[2]
 ; CHECK-i64-NEXT:    mov z3.d, z0.d[1]
-; CHECK-i64-NEXT:    fcvtzs x9, d0
+; CHECK-i64-NEXT:    fcvtzs d0, d0
 ; CHECK-i64-NEXT:    fcvtzs x8, d1
-; CHECK-i64-NEXT:    fcvtzs x10, d2
-; CHECK-i64-NEXT:    fcvtzs x11, d3
-; CHECK-i64-NEXT:    fmov d0, x9
-; CHECK-i64-NEXT:    fmov d1, x8
-; CHECK-i64-NEXT:    mov v0.d[1], x11
-; CHECK-i64-NEXT:    mov v1.d[1], x10
+; CHECK-i64-NEXT:    fcvtzs d1, d2
+; CHECK-i64-NEXT:    fcvtzs x9, d3
+; CHECK-i64-NEXT:    mov v0.d[1], x9
+; CHECK-i64-NEXT:    mov v1.d[1], x8
 ; CHECK-i64-NEXT:    ret
   %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f64(<4 x double> %x)
   ret <4 x iXLen> %a
@@ -1132,31 +1104,27 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) nounwind {
 ; CHECK-i64-NEXT:    splice z0.d, p0, z0.d, z1.d
 ; CHECK-i64-NEXT:    splice z2.d, p0, z2.d, z3.d
 ; CHECK-i64-NEXT:    ptrue p0.d, vl4
-; CHECK-i64-NEXT:    frintx z0.d, p0/m, z0.d
 ; CHECK-i64-NEXT:    movprfx z1, z2
 ; CHECK-i64-NEXT:    frintx z1.d, p0/m, z2.d
-; CHECK-i64-NEXT:    mov z4.d, z1.d[2]
-; CHECK-i64-NEXT:    mov z5.d, z0.d[2]
-; CHECK-i64-NEXT:    mov z2.d, z0.d[1]
-; CHECK-i64-NEXT:    mov z3.d, z1.d[3]
-; CHECK-i64-NEXT:    mov z6.d, z0.d[3]
-; CHECK-i64-NEXT:    fcvtzs x8, d0
-; CHECK-i64-NEXT:    mov z0.d, z1.d[1]
-; CHECK-i64-NEXT:    fcvtzs x10, d1
-; CHECK-i64-NEXT:    fcvtzs x11, d4
-; CHECK-i64-NEXT:    fcvtzs x12, d5
-; CHECK-i64-NEXT:    fcvtzs x9, d2
-; CHECK-i64-NEXT:    fcvtzs x13, d3
-; CHECK-i64-NEXT:    fcvtzs x14, d6
-; CHECK-i64-NEXT:    fcvtzs x15, d0
-; CHECK-i64-NEXT:    fmov d0, x8
-; CHECK-i64-NEXT:    fmov d2, x10
-; CHECK-i64-NEXT:    fmov d1, x12
-; CHECK-i64-NEXT:    fmov d3, x11
-; CHECK-i64-NEXT:    mov v0.d[1], x9
-; CHECK-i64-NEXT:    mov v2.d[1], x15
-; CHECK-i64-NEXT:    mov v1.d[1], x14
-; CHECK-i64-NEXT:    mov v3.d[1], x13
+; CHECK-i64-NEXT:    frintx z0.d, p0/m, z0.d
+; CHECK-i64-NEXT:    mov z2.d, z1.d[3]
+; CHECK-i64-NEXT:    mov z3.d, z0.d[3]
+; CHECK-i64-NEXT:    mov z4.d, z0.d[1]
+; CHECK-i64-NEXT:    mov z5.d, z1.d[2]
+; CHECK-i64-NEXT:    mov z6.d, z0.d[2]
+; CHECK-i64-NEXT:    mov z7.d, z1.d[1]
+; CHECK-i64-NEXT:    fcvtzs d0, d0
+; CHECK-i64-NEXT:    fcvtzs x8, d2
+; CHECK-i64-NEXT:    fcvtzs x9, d3
+; CHECK-i64-NEXT:    fcvtzs x10, d4
+; CHECK-i64-NEXT:    fcvtzs d2, d1
+; CHECK-i64-NEXT:    fcvtzs d3, d5
+; CHECK-i64-NEXT:    fcvtzs d1, d6
+; CHECK-i64-NEXT:    fcvtzs x11, d7
+; CHECK-i64-NEXT:    mov v0.d[1], x10
+; CHECK-i64-NEXT:    mov v1.d[1], x9
+; CHECK-i64-NEXT:    mov v3.d[1], x8
+; CHECK-i64-NEXT:    mov v2.d[1], x11
 ; CHECK-i64-NEXT:    ret
   %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double> %x)
   ret <8 x iXLen> %a
@@ -1234,70 +1202,60 @@ define <16 x iXLen> @lrint_v16f64(<16 x double> %x) nounwind {
 ;
 ; CHECK-i64-LABEL: lrint_v16f64:
 ; CHECK-i64:       // %bb.0:
-; CHECK-i64-NEXT:    ptrue p1.d, vl2
+; CHECK-i64-NEXT:    ptrue p0.d, vl2
 ; CHECK-i64-NEXT:    // kill: def $q6 killed $q6 def $z6
 ; CHECK-i64-NEXT:    // kill: def $q4 killed $q4 def $z4
+; CHECK-i64-NEXT:    // kill: def $q2 killed $q2 def $z2
 ; CHECK-i64-NEXT:    // kill: def $q7 killed $q7 def $z7
 ; CHECK-i64-NEXT:    // kill: def $q5 killed $q5 def $z5
-; CHECK-i64-NEXT:    // kill: def $q2 killed $q2 def $z2
-; CHECK-i64-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-i64-NEXT:    // kill: def $q3 killed $q3 def $z3
+; CHECK-i64-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-i64-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-i64-NEXT:    splice z6.d, p0, z6.d, z7.d
+; CHECK-i64-NEXT:    splice z2.d, p0, z2.d, z3.d
+; CHECK-i64-NEXT:    splice z4.d, p0, z4.d, z5.d
+; CHECK-i64-NEXT:    splice z0.d, p0, z0.d, z1.d
 ; CHECK-i64-NEXT:    ptrue p0.d, vl4
-; CHECK-i64-NEXT:    splice z6.d, p1, z6.d, z7.d
-; CHECK-i64-NEXT:    splice z4.d, p1, z4.d, z5.d
-; CHECK-i64-NEXT:    splice z2.d, p1, z2.d, z3.d
-; CHECK-i64-NEXT:    splice z0.d, p1, z0.d, z1.d
-; CHECK-i64-NEXT:    movprfx z3, z6
-; CHECK-i64-NEXT:    frintx z3.d, p0/m, z6.d
-; CHECK-i64-NEXT:    movprfx z1, z4
-; CHECK-i64-NEXT:    frintx z1.d, p0/m, z4.d
+; CHECK-i64-NEXT:    frintx z6.d, p0/m, z6.d
+; CHECK-i64-NEXT:    frintx z4.d, p0/m, z4.d
 ; CHECK-i64-NEXT:    frintx z2.d, p0/m, z2.d
 ; CHECK-i64-NEXT:    frintx z0.d, p0/m, z0.d
-; CHECK-i64-NEXT:    mov z4.d, z3.d[2]
-; CHECK-i64-NEXT:    mov z5.d, z1.d[2]
-; CHECK-i64-NEXT:    mov z6.d, z2.d[3]
-; CHECK-i64-NEXT:    fcvtzs x11, d0
-; CHECK-i64-NEXT:    fcvtzs x12, d1
-; CHECK-i64-NEXT:    fcvtzs x13, d2
-; CHECK-i64-NEXT:    fcvtzs x14, d3
-; CHECK-i64-NEXT:    mov z7.d, z3.d[3]
-; CHECK-i64-NEXT:    mov z16.d, z1.d[3]
-; CHECK-i64-NEXT:    fcvtzs x9, d4
+; CHECK-i64-NEXT:    mov z1.d, z6.d[3]
+; CHECK-i64-NEXT:    mov z3.d, z4.d[3]
+; CHECK-i64-NEXT:    mov z5.d, z2.d[3]
+; CHECK-i64-NEXT:    mov z16.d, z4.d[1]
+; CHECK-i64-NEXT:    mov z7.d, z0.d[3]
+; CHECK-i64-NEXT:    mov z17.d, z0.d[2]
+; CHECK-i64-NEXT:    mov z18.d, z4.d[2]
+; CHECK-i64-NEXT:    mov z19.d, z6.d[1]
+; CHECK-i64-NEXT:    fcvtzs d4, d4
+; CHECK-i64-NEXT:    fcvtzs x8, d1
+; CHECK-i64-NEXT:    mov z1.d, z2.d[1]
+; CHECK-i64-NEXT:    fcvtzs x9, d3
+; CHECK-i64-NEXT:    mov z3.d, z0.d[1]
 ; CHECK-i64-NEXT:    fcvtzs x10, d5
-; CHECK-i64-NEXT:    mov z4.d, z2.d[2]
-; CHECK-i64-NEXT:    mov z5.d, z0.d[2]
-; CHECK-i64-NEXT:    fcvtzs x8, d6
-; CHECK-i64-NEXT:    mov z2.d, z2.d[1]
-; CHECK-i64-NEXT:    mov z6.d, z0.d[3]
-; CHECK-i64-NEXT:    mov z1.d, z1.d[1]
-; CHECK-i64-NEXT:    mov z3.d, z3.d[1]
-; CHECK-i64-NEXT:    fcvtzs x15, d4
-; CHECK-i64-NEXT:    mov z4.d, z0.d[1]
-; CHECK-i64-NEXT:    fmov d0, x11
-; CHECK-i64-NEXT:    fcvtzs x16, d5
-; CHECK-i64-NEXT:    fcvtzs x11, d2
-; CHECK-i64-NEXT:    fmov d2, x13
-; CHECK-i64-NEXT:    fcvtzs x17, d7
-; CHECK-i64-NEXT:    fcvtzs x18, d16
-; CHECK-i64-NEXT:    fcvtzs x0, d3
-; CHECK-i64-NEXT:    fcvtzs x13, d4
-; CHECK-i64-NEXT:    fmov d4, x12
-; CHECK-i64-NEXT:    fcvtzs x12, d6
-; CHECK-i64-NEXT:    fmov d6, x14
-; CHECK-i64-NEXT:    fcvtzs x14, d1
-; CHECK-i64-NEXT:    fmov d3, x15
-; CHECK-i64-NEXT:    fmov d1, x16
-; CHECK-i64-NEXT:    fmov d5, x10
-; CHECK-i64-NEXT:    fmov d7, x9
-; CHECK-i64-NEXT:    mov v2.d[1], x11
-; CHECK-i64-NEXT:    mov v0.d[1], x13
-; CHECK-i64-NEXT:    mov v3.d[1], x8
-; CHECK-i64-NEXT:    mov v6.d[1], x0
-; CHECK-i64-NEXT:    mov v4.d[1], x14
-; CHECK-i64-NEXT:    mov v1.d[1], x12
-; CHECK-i64-NEXT:    mov v5.d[1], x18
-; CHECK-i64-NEXT:    mov v7.d[1], x17
+; CHECK-i64-NEXT:    mov z5.d, z6.d[2]
+; CHECK-i64-NEXT:    fcvtzs x12, d16
+; CHECK-i64-NEXT:    mov z16.d, z2.d[2]
+; CHECK-i64-NEXT:    fcvtzs x11, d7
+; CHECK-i64-NEXT:    fcvtzs x13, d1
+; CHECK-i64-NEXT:    fcvtzs d1, d17
+; CHECK-i64-NEXT:    fcvtzs d0, d0
+; CHECK-i64-NEXT:    fcvtzs x14, d3
+; CHECK-i64-NEXT:    fcvtzs d7, d5
+; CHECK-i64-NEXT:    fcvtzs d2, d2
+; CHECK-i64-NEXT:    fcvtzs d3, d16
+; CHECK-i64-NEXT:    fcvtzs d5, d18
+; CHECK-i64-NEXT:    fcvtzs x15, d19
+; CHECK-i64-NEXT:    fcvtzs d6, d6
+; CHECK-i64-NEXT:    mov v4.d[1], x12
+; CHECK-i64-NEXT:    mov v1.d[1], x11
+; CHECK-i64-NEXT:    mov v0.d[1], x14
+; CHECK-i64-NEXT:    mov v2.d[1], x13
+; CHECK-i64-NEXT:    mov v7.d[1], x8
+; CHECK-i64-NEXT:    mov v3.d[1], x10
+; CHECK-i64-NEXT:    mov v5.d[1], x9
+; CHECK-i64-NEXT:    mov v6.d[1], x15
 ; CHECK-i64-NEXT:    ret
   %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f64(<16 x double> %x)
   ret <16 x iXLen> %a
@@ -1723,13 +1681,13 @@ define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) nounwind {
 ; CHECK-i32-NEXT:    sub sp, sp, #176
 ; CHECK-i32-NEXT:    str q0, [sp, #96] // 16-byte Spill
 ; CHECK-i32-NEXT:    mov v0.16b, v7.16b
-; CHECK-i32-NEXT:    stp x30, x25, [sp, #112] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    str x30, [sp, #112] // 8-byte Spill
 ; CHECK-i32-NEXT:    stp x24, x23, [sp, #128] // 16-byte Folded Spill
 ; CHECK-i32-NEXT:    stp x22, x21, [sp, #144] // 16-byte Folded Spill
 ; CHECK-i32-NEXT:    stp x20, x19, [sp, #160] // 16-byte Folded Spill
 ; CHECK-i32-NEXT:    stp q6, q5, [sp] // 32-byte Folded Spill
-; CHECK-i32-NEXT:    stp q4, q3, [sp, #32] // 32-byte Folded Spill
-; CHECK-i32-NEXT:    stp q2, q1, [sp, #64] // 32-byte Folded Spill
+; CHECK-i32-NEXT:    stp q3, q2, [sp, #32] // 32-byte Folded Spill
+; CHECK-i32-NEXT:    stp q1, q4, [sp, #64] // 32-byte Folded Spill
 ; CHECK-i32-NEXT:    bl lrintl
 ; CHECK-i32-NEXT:    ldr q0, [sp] // 16-byte Reload
 ; CHECK-i32-NEXT:    mov w19, w0
@@ -1749,21 +1707,22 @@ define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) nounwind {
 ; CHECK-i32-NEXT:    ldr q0, [sp, #80] // 16-byte Reload
 ; CHECK-i32-NEXT:    mov w24, w0
 ; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    fmov s1, w0
 ; CHECK-i32-NEXT:    ldr q0, [sp, #96] // 16-byte Reload
-; CHECK-i32-NEXT:    mov w25, w0
+; CHECK-i32-NEXT:    str q1, [sp, #96] // 16-byte Spill
 ; CHECK-i32-NEXT:    bl lrintl
-; CHECK-i32-NEXT:    fmov s1, w22
 ; CHECK-i32-NEXT:    fmov s0, w0
-; CHECK-i32-NEXT:    mov v0.s[1], w25
+; CHECK-i32-NEXT:    ldr q1, [sp, #96] // 16-byte Reload
+; CHECK-i32-NEXT:    ldr x30, [sp, #112] // 8-byte Reload
 ; CHECK-i32-NEXT:    mov v1.s[1], w21
-; CHECK-i32-NEXT:    ldp x22, x21, [sp, #144] // 16-byte Folded Reload
-; CHECK-i32-NEXT:    ldp x30, x25, [sp, #112] // 16-byte Folded Reload
-; CHECK-i32-NEXT:    mov v0.s[2], w24
+; CHECK-i32-NEXT:    mov v0.s[1], w24
 ; CHECK-i32-NEXT:    mov v1.s[2], w20
-; CHECK-i32-NEXT:    mov v0.s[3], w23
+; CHECK-i32-NEXT:    mov v0.s[2], w23
+; CHECK-i32-NEXT:    ldp x24, x23, [sp, #128] // 16-byte Folded Reload
 ; CHECK-i32-NEXT:    mov v1.s[3], w19
 ; CHECK-i32-NEXT:    ldp x20, x19, [sp, #160] // 16-byte Folded Reload
-; CHECK-i32-NEXT:    ldp x24, x23, [sp, #128] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[3], w22
+; CHECK-i32-NEXT:    ldp x22, x21, [sp, #144] // 16-byte Folded Reload
 ; CHECK-i32-NEXT:    add sp, sp, #176
 ; CHECK-i32-NEXT:    ret
 ;
@@ -1847,11 +1806,10 @@ define <16 x iXLen> @lrint_v16fp128(<16 x fp128> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v16fp128:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    sub sp, sp, #368
-; CHECK-i32-NEXT:    stp q3, q0, [sp, #144] // 32-byte Folded Spill
-; CHECK-i32-NEXT:    stp q2, q1, [sp, #176] // 32-byte Folded Spill
+; CHECK-i32-NEXT:    stp q2, q1, [sp, #128] // 32-byte Folded Spill
 ; CHECK-i32-NEXT:    ldr q1, [sp, #368]
 ; CHECK-i32-NEXT:    stp x29, x30, [sp, #272] // 16-byte Folded Spill
-; CHECK-i32-NEXT:    str q1, [sp, #64] // 16-byte Spill
+; CHECK-i32-NEXT:    str q1, [sp, #160] // 16-byte Spill
 ; CHECK-i32-NEXT:    ldr q1, [sp, #384]
 ; CHECK-i32-NEXT:    stp x28, x27, [sp, #288] // 16-byte Folded Spill
 ; CHECK-i32-NEXT:    str q1, [sp, #48] // 16-byte Spill
@@ -1860,43 +1818,40 @@ define <16 x iXLen> @lrint_v16fp128(<16 x fp128> %x) nounwind {
 ; CHECK-i32-NEXT:    str q1, [sp, #32] // 16-byte Spill
 ; CHECK-i32-NEXT:    ldr q1, [sp, #416]
 ; CHECK-i32-NEXT:    stp x24, x23, [sp, #320] // 16-byte Folded Spill
-; CHECK-i32-NEXT:    str q1, [sp, #208] // 16-byte Spill
-; CHECK-i32-NEXT:    ldr q1, [sp, #432]
 ; CHECK-i32-NEXT:    stp x22, x21, [sp, #336] // 16-byte Folded Spill
-; CHECK-i32-NEXT:    str q1, [sp, #16] // 16-byte Spill
-; CHECK-i32-NEXT:    ldr q1, [sp, #448]
 ; CHECK-i32-NEXT:    stp x20, x19, [sp, #352] // 16-byte Folded Spill
-; CHECK-i32-NEXT:    str q1, [sp, #224] // 16-byte Spill
+; CHECK-i32-NEXT:    stp q7, q6, [sp, #64] // 32-byte Folded Spill
+; CHECK-i32-NEXT:    stp q5, q3, [sp, #96] // 32-byte Folded Spill
+; CHECK-i32-NEXT:    stp q0, q1, [sp, #208] // 32-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #432]
+; CHECK-i32-NEXT:    stp q1, q4, [sp, #176] // 32-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #448]
+; CHECK-i32-NEXT:    str q1, [sp, #16] // 16-byte Spill
 ; CHECK-i32-NEXT:    ldr q1, [sp, #464]
-; CHECK-i32-NEXT:    stp q7, q6, [sp, #80] // 32-byte Folded Spill
 ; CHECK-i32-NEXT:    str q1, [sp, #240] // 16-byte Spill
 ; CHECK-i32-NEXT:    ldr q1, [sp, #480]
-; CHECK-i32-NEXT:    stp q5, q4, [sp, #112] // 32-byte Folded Spill
 ; CHECK-i32-NEXT:    mov v0.16b, v1.16b
 ; CHECK-i32-NEXT:    bl lrintl
 ; CHECK-i32-NEXT:    ldr q0, [sp, #240] // 16-byte Reload
 ; CHECK-i32-NEXT:    str w0, [sp, #268] // 4-byte Spill
 ; CHECK-i32-NEXT:    bl lrintl
-; CHECK-i32-NEXT:    ldr q0, [sp, #224] // 16-byte Reload
-; CHECK-i32-NEXT:    str w0, [sp, #240] // 4-byte Spill
-; CHECK-i32-NEXT:    bl lrintl
 ; CHECK-i32-NEXT:    ldr q0, [sp, #16] // 16-byte Reload
-; CHECK-i32-NEXT:    str w0, [sp, #224] // 4-byte Spill
+; CHECK-i32-NEXT:    str w0, [sp, #240] // 4-byte Spill
 ; CHECK-i32-NEXT:    bl lrintl
-; CHECK-i32-NEXT:    ldr q0, [sp, #208] // 16-byte Reload
-; CHECK-i32-NEXT:    mov w23, w0
+; CHECK-i32-NEXT:    ldr q0, [sp, #224] // 16-byte Reload
+; CHECK-i32-NEXT:    mov w22, w0
 ; CHECK-i32-NEXT:    bl lrintl
 ; CHECK-i32-NEXT:    ldr q0, [sp, #32] // 16-byte Reload
-; CHECK-i32-NEXT:    str w0, [sp, #208] // 4-byte Spill
+; CHECK-i32-NEXT:    str w0, [sp, #224] // 4-byte Spill
 ; CHECK-i32-NEXT:    bl lrintl
 ; CHECK-i32-NEXT:    ldr q0, [sp, #48] // 16-byte Reload
-; CHECK-i32-NEXT:    mov w24, w0
+; CHECK-i32-NEXT:    mov w23, w0
 ; CHECK-i32-NEXT:    bl lrintl
 ; CHECK-i32-NEXT:    ldr q0, [sp, #64] // 16-byte Reload
 ; CHECK-i32-NEXT:    mov w25, w0
 ; CHECK-i32-NEXT:    bl lrintl
 ; CHECK-i32-NEXT:    ldr q0, [sp, #80] // 16-byte Reload
-; CHECK-i32-NEXT:    mov w27, w0
+; CHECK-i32-NEXT:    mov w24, w0
 ; CHECK-i32-NEXT:    bl lrintl
 ; CHECK-i32-NEXT:    ldr q0, [sp, #96] // 16-byte Reload
 ; CHECK-i32-NEXT:    mov w26, w0
@@ -1905,46 +1860,52 @@ define <16 x iXLen> @lrint_v16fp128(<16 x fp128> %x) nounwind {
 ; CHECK-i32-NEXT:    mov w28, w0
 ; CHECK-i32-NEXT:    bl lrintl
 ; CHECK-i32-NEXT:    ldr q0, [sp, #128] // 16-byte Reload
-; CHECK-i32-NEXT:    mov w29, w0
+; CHECK-i32-NEXT:    mov w27, w0
 ; CHECK-i32-NEXT:    bl lrintl
 ; CHECK-i32-NEXT:    ldr q0, [sp, #144] // 16-byte Reload
+; CHECK-i32-NEXT:    mov w29, w0
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #192] // 16-byte Reload
 ; CHECK-i32-NEXT:    mov w19, w0
 ; CHECK-i32-NEXT:    bl lrintl
-; CHECK-i32-NEXT:    ldr q0, [sp, #176] // 16-byte Reload
+; CHECK-i32-NEXT:    ldr q0, [sp, #160] // 16-byte Reload
 ; CHECK-i32-NEXT:    mov w20, w0
 ; CHECK-i32-NEXT:    bl lrintl
-; CHECK-i32-NEXT:    ldr q0, [sp, #192] // 16-byte Reload
+; CHECK-i32-NEXT:    ldr q0, [sp, #176] // 16-byte Reload
 ; CHECK-i32-NEXT:    mov w21, w0
 ; CHECK-i32-NEXT:    bl lrintl
-; CHECK-i32-NEXT:    ldr q0, [sp, #160] // 16-byte Reload
-; CHECK-i32-NEXT:    mov w22, w0
+; CHECK-i32-NEXT:    fmov s1, w0
+; CHECK-i32-NEXT:    ldr q0, [sp, #208] // 16-byte Reload
+; CHECK-i32-NEXT:    str q1, [sp, #208] // 16-byte Spill
+; CHECK-i32-NEXT:    fmov s1, w21
+; CHECK-i32-NEXT:    str q1, [sp, #192] // 16-byte Spill
+; CHECK-i32-NEXT:    fmov s1, w20
+; CHECK-i32-NEXT:    str q1, [sp, #176] // 16-byte Spill
 ; CHECK-i32-NEXT:    bl lrintl
-; CHECK-i32-NEXT:    fmov s1, w19
 ; CHECK-i32-NEXT:    fmov s0, w0
-; CHECK-i32-NEXT:    ldr w8, [sp, #224] // 4-byte Reload
-; CHECK-i32-NEXT:    fmov s2, w27
-; CHECK-i32-NEXT:    fmov s3, w23
-; CHECK-i32-NEXT:    mov v0.s[1], w22
-; CHECK-i32-NEXT:    mov v1.s[1], w29
-; CHECK-i32-NEXT:    mov v2.s[1], w25
-; CHECK-i32-NEXT:    mov v3.s[1], w8
+; CHECK-i32-NEXT:    ldp q1, q2, [sp, #176] // 32-byte Folded Reload
+; CHECK-i32-NEXT:    ldr q3, [sp, #208] // 16-byte Reload
 ; CHECK-i32-NEXT:    ldr w8, [sp, #240] // 4-byte Reload
-; CHECK-i32-NEXT:    ldp x29, x30, [sp, #272] // 16-byte Folded Reload
-; CHECK-i32-NEXT:    mov v0.s[2], w21
-; CHECK-i32-NEXT:    mov v1.s[2], w28
-; CHECK-i32-NEXT:    mov v2.s[2], w24
-; CHECK-i32-NEXT:    mov v3.s[2], w8
-; CHECK-i32-NEXT:    ldr w8, [sp, #208] // 4-byte Reload
+; CHECK-i32-NEXT:    mov v0.s[1], w19
+; CHECK-i32-NEXT:    mov v1.s[1], w28
+; CHECK-i32-NEXT:    mov v2.s[1], w25
+; CHECK-i32-NEXT:    mov v3.s[1], w22
+; CHECK-i32-NEXT:    ldp x20, x19, [sp, #352] // 16-byte Folded Reload
 ; CHECK-i32-NEXT:    ldp x22, x21, [sp, #336] // 16-byte Folded Reload
-; CHECK-i32-NEXT:    ldp x24, x23, [sp, #320] // 16-byte Folded Reload
-; CHECK-i32-NEXT:    mov v0.s[3], w20
-; CHECK-i32-NEXT:    mov v1.s[3], w26
+; CHECK-i32-NEXT:    mov v0.s[2], w29
+; CHECK-i32-NEXT:    mov v1.s[2], w26
+; CHECK-i32-NEXT:    mov v2.s[2], w23
+; CHECK-i32-NEXT:    mov v3.s[2], w8
+; CHECK-i32-NEXT:    ldr w8, [sp, #224] // 4-byte Reload
+; CHECK-i32-NEXT:    ldp x26, x25, [sp, #304] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    ldp x29, x30, [sp, #272] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[3], w27
+; CHECK-i32-NEXT:    mov v1.s[3], w24
 ; CHECK-i32-NEXT:    mov v2.s[3], w8
 ; CHECK-i32-NEXT:    ldr w8, [sp, #268] // 4-byte Reload
-; CHECK-i32-NEXT:    ldp x20, x19, [sp, #352] // 16-byte Folded Reload
-; CHECK-i32-NEXT:    ldp x26, x25, [sp, #304] // 16-byte Folded Reload
-; CHECK-i32-NEXT:    mov v3.s[3], w8
+; CHECK-i32-NEXT:    ldp x24, x23, [sp, #320] // 16-byte Folded Reload
 ; CHECK-i32-NEXT:    ldp x28, x27, [sp, #288] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v3.s[3], w8
 ; CHECK-i32-NEXT:    add sp, sp, #368
 ; CHECK-i32-NEXT:    ret
 ;
diff --git a/llvm/test/CodeGen/AArch64/vector-llrint.ll b/llvm/test/CodeGen/AArch64/vector-llrint.ll
index ae7617d9c0b66..d9a9e57fe0a63 100644
--- a/llvm/test/CodeGen/AArch64/vector-llrint.ll
+++ b/llvm/test/CodeGen/AArch64/vector-llrint.ll
@@ -806,18 +806,11 @@ define <32 x i64> @llrint_v32i64_v32f32(<32 x float> %x) nounwind {
 declare <32 x i64> @llvm.llrint.v32i64.v32f32(<32 x float>)
 
 define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) nounwind {
-; CHECK-SD-LABEL: llrint_v1i64_v1f64:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    frintx d0, d0
-; CHECK-SD-NEXT:    fcvtzs x8, d0
-; CHECK-SD-NEXT:    fmov d0, x8
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: llrint_v1i64_v1f64:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    frintx d0, d0
-; CHECK-GI-NEXT:    fcvtzs d0, d0
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: llrint_v1i64_v1f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintx d0, d0
+; CHECK-NEXT:    fcvtzs d0, d0
+; CHECK-NEXT:    ret
   %a = call <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double> %x)
   ret <1 x i64> %a
 }
diff --git a/llvm/test/CodeGen/AArch64/vector-lrint.ll b/llvm/test/CodeGen/AArch64/vector-lrint.ll
index 9eaad687fb4a2..2abe0b7ae2106 100644
--- a/llvm/test/CodeGen/AArch64/vector-lrint.ll
+++ b/llvm/test/CodeGen/AArch64/vector-lrint.ll
@@ -11,13 +11,12 @@
 ; RUN:   FileCheck %s --check-prefixes=CHECK-i64,CHECK-i64-GI
 
 define <1 x iXLen> @lrint_v1f16(<1 x half> %x) nounwind {
-; CHECK-i32-SD-LABEL: lrint_v1f16:
-; CHECK-i32-SD:       // %bb.0:
-; CHECK-i32-SD-NEXT:    fcvt s0, h0
-; CHECK-i32-SD-NEXT:    frintx s0, s0
-; CHECK-i32-SD-NEXT:    fcvtzs w8, s0
-; CHECK-i32-SD-NEXT:    fmov s0, w8
-; CHECK-i32-SD-NEXT:    ret
+; CHECK-i32-LABEL: lrint_v1f16:
+; CHECK-i32:       // %bb.0:
+; CHECK-i32-NEXT:    fcvt s0, h0
+; CHECK-i32-NEXT:    frintx s0, s0
+; CHECK-i32-NEXT:    fcvtzs s0, s0
+; CHECK-i32-NEXT:    ret
 ;
 ; CHECK-i64-LABEL: lrint_v1f16:
 ; CHECK-i64:       // %bb.0:
@@ -26,13 +25,6 @@ define <1 x iXLen> @lrint_v1f16(<1 x half> %x) nounwind {
 ; CHECK-i64-NEXT:    fcvtzs x8, s0
 ; CHECK-i64-NEXT:    fmov d0, x8
 ; CHECK-i64-NEXT:    ret
-;
-; CHECK-i32-GI-LABEL: lrint_v1f16:
-; CHECK-i32-GI:       // %bb.0:
-; CHECK-i32-GI-NEXT:    fcvt s0, h0
-; CHECK-i32-GI-NEXT:    frintx s0, s0
-; CHECK-i32-GI-NEXT:    fcvtzs s0, s0
-; CHECK-i32-GI-NEXT:    ret
   %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half> %x)
   ret <1 x iXLen> %a
 }
@@ -47,10 +39,9 @@ define <2 x iXLen> @lrint_v2f16(<2 x half> %x) nounwind {
 ; CHECK-i32-SD-NEXT:    fcvt s1, h1
 ; CHECK-i32-SD-NEXT:    frintx s0, s0
 ; CHECK-i32-SD-NEXT:    frintx s1, s1
-; CHECK-i32-SD-NEXT:    fcvtzs w8, s0
-; CHECK-i32-SD-NEXT:    fcvtzs w9, s1
-; CHECK-i32-SD-NEXT:    fmov s0, w8
-; CHECK-i32-SD-NEXT:    mov v0.s[1], w9
+; CHECK-i32-SD-NEXT:    fcvtzs s0, s0
+; CHECK-i32-SD-NEXT:    fcvtzs w8, s1
+; CHECK-i32-SD-NEXT:    mov v0.s[1], w8
 ; CHECK-i32-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-i32-SD-NEXT:    ret
 ;
@@ -98,17 +89,16 @@ define <4 x iXLen> @lrint_v4f16(<4 x half> %x) nounwind {
 ; CHECK-i32-SD-NEXT:    mov h1, v0.h[1]
 ; CHECK-i32-SD-NEXT:    fcvt s2, h0
 ; CHECK-i32-SD-NEXT:    mov h3, v0.h[2]
-; CHECK-i32-SD-NEXT:    mov h0, v0.h[3]
+; CHECK-i32-SD-NEXT:    mov h4, v0.h[3]
 ; CHECK-i32-SD-NEXT:    fcvt s1, h1
-; CHECK-i32-SD-NEXT:    frintx s2, s2
-; CHECK-i32-SD-NEXT:    fcvt s3, h3
+; CHECK-i32-SD-NEXT:    frintx s0, s2
+; CHECK-i32-SD-NEXT:    fcvt s2, h3
 ; CHECK-i32-SD-NEXT:    frintx s1, s1
-; CHECK-i32-SD-NEXT:    fcvtzs w8, s2
-; CHECK-i32-SD-NEXT:    fcvt s2, h0
-; CHECK-i32-SD-NEXT:    fcvtzs w9, s1
-; CHECK-i32-SD-NEXT:    frintx s1, s3
-; CHECK-i32-SD-NEXT:    fmov s0, w8
-; CHECK-i32-SD-NEXT:    mov v0.s[1], w9
+; CHECK-i32-SD-NEXT:    fcvtzs s0, s0
+; CHECK-i32-SD-NEXT:    fcvtzs w8, s1
+; CHECK-i32-SD-NEXT:    frintx s1, s2
+; CHECK-i32-SD-NEXT:    fcvt s2, h4
+; CHECK-i32-SD-NEXT:    mov v0.s[1], w8
 ; CHECK-i32-SD-NEXT:    fcvtzs w8, s1
 ; CHECK-i32-SD-NEXT:    frintx s1, s2
 ; CHECK-i32-SD-NEXT:    mov v0.s[2], w8
@@ -169,41 +159,39 @@ define <8 x iXLen> @lrint_v8f16(<8 x half> %x) nounwind {
 ; CHECK-i32-SD-LABEL: lrint_v8f16:
 ; CHECK-i32-SD:       // %bb.0:
 ; CHECK-i32-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-i32-SD-NEXT:    mov h3, v0.h[1]
-; CHECK-i32-SD-NEXT:    fcvt s6, h0
+; CHECK-i32-SD-NEXT:    mov h2, v0.h[1]
 ; CHECK-i32-SD-NEXT:    mov h4, v0.h[2]
+; CHECK-i32-SD-NEXT:    fcvt s7, h0
 ; CHECK-i32-SD-NEXT:    mov h0, v0.h[3]
-; CHECK-i32-SD-NEXT:    mov h2, v1.h[1]
+; CHECK-i32-SD-NEXT:    mov h3, v1.h[1]
+; CHECK-i32-SD-NEXT:    fcvt s2, h2
 ; CHECK-i32-SD-NEXT:    fcvt s5, h1
-; CHECK-i32-SD-NEXT:    mov h7, v1.h[2]
-; CHECK-i32-SD-NEXT:    fcvt s3, h3
-; CHECK-i32-SD-NEXT:    frintx s6, s6
+; CHECK-i32-SD-NEXT:    mov h6, v1.h[2]
 ; CHECK-i32-SD-NEXT:    fcvt s4, h4
-; CHECK-i32-SD-NEXT:    mov h1, v1.h[3]
-; CHECK-i32-SD-NEXT:    fcvt s2, h2
+; CHECK-i32-SD-NEXT:    mov h16, v1.h[3]
+; CHECK-i32-SD-NEXT:    frintx s7, s7
+; CHECK-i32-SD-NEXT:    fcvt s3, h3
+; CHECK-i32-SD-NEXT:    frintx s2, s2
 ; CHECK-i32-SD-NEXT:    frintx s5, s5
-; CHECK-i32-SD-NEXT:    fcvt s7, h7
+; CHECK-i32-SD-NEXT:    fcvt s6, h6
 ; CHECK-i32-SD-NEXT:    frintx s3, s3
-; CHECK-i32-SD-NEXT:    fcvtzs w9, s6
-; CHECK-i32-SD-NEXT:    frintx s4, s4
-; CHECK-i32-SD-NEXT:    frintx s2, s2
-; CHECK-i32-SD-NEXT:    fcvtzs w8, s5
-; CHECK-i32-SD-NEXT:    fcvt s5, h1
-; CHECK-i32-SD-NEXT:    fcvtzs w11, s3
-; CHECK-i32-SD-NEXT:    fcvt s3, h0
-; CHECK-i32-SD-NEXT:    fmov s0, w9
-; CHECK-i32-SD-NEXT:    fcvtzs w12, s4
-; CHECK-i32-SD-NEXT:    fcvtzs w10, s2
-; CHECK-i32-SD-NEXT:    frintx s2, s7
-; CHECK-i32-SD-NEXT:    fmov s1, w8
-; CHECK-i32-SD-NEXT:    mov v0.s[1], w11
 ; CHECK-i32-SD-NEXT:    fcvtzs w8, s2
-; CHECK-i32-SD-NEXT:    mov v1.s[1], w10
-; CHECK-i32-SD-NEXT:    frintx s2, s3
+; CHECK-i32-SD-NEXT:    frintx s2, s4
+; CHECK-i32-SD-NEXT:    fcvtzs s1, s5
+; CHECK-i32-SD-NEXT:    fcvt s4, h0
+; CHECK-i32-SD-NEXT:    fcvt s5, h16
+; CHECK-i32-SD-NEXT:    fcvtzs s0, s7
+; CHECK-i32-SD-NEXT:    fcvtzs w9, s3
+; CHECK-i32-SD-NEXT:    frintx s3, s6
+; CHECK-i32-SD-NEXT:    fcvtzs w10, s2
+; CHECK-i32-SD-NEXT:    frintx s2, s4
+; CHECK-i32-SD-NEXT:    mov v0.s[1], w8
+; CHECK-i32-SD-NEXT:    fcvtzs w11, s3
+; CHECK-i32-SD-NEXT:    mov v1.s[1], w9
 ; CHECK-i32-SD-NEXT:    frintx s3, s5
-; CHECK-i32-SD-NEXT:    mov v0.s[2], w12
-; CHECK-i32-SD-NEXT:    mov v1.s[2], w8
 ; CHECK-i32-SD-NEXT:    fcvtzs w9, s2
+; CHECK-i32-SD-NEXT:    mov v0.s[2], w10
+; CHECK-i32-SD-NEXT:    mov v1.s[2], w11
 ; CHECK-i32-SD-NEXT:    fcvtzs w8, s3
 ; CHECK-i32-SD-NEXT:    mov v0.s[3], w9
 ; CHECK-i32-SD-NEXT:    mov v1.s[3], w8
@@ -295,82 +283,78 @@ define <16 x iXLen> @lrint_v16f16(<16 x half> %x) nounwind {
 ; CHECK-i32-SD:       // %bb.0:
 ; CHECK-i32-SD-NEXT:    ext v2.16b, v0.16b, v0.16b, #8
 ; CHECK-i32-SD-NEXT:    ext v3.16b, v1.16b, v1.16b, #8
-; CHECK-i32-SD-NEXT:    mov h18, v0.h[1]
-; CHECK-i32-SD-NEXT:    mov h19, v1.h[1]
-; CHECK-i32-SD-NEXT:    fcvt s20, h0
-; CHECK-i32-SD-NEXT:    mov h21, v0.h[2]
+; CHECK-i32-SD-NEXT:    mov h4, v0.h[1]
+; CHECK-i32-SD-NEXT:    mov h5, v1.h[1]
+; CHECK-i32-SD-NEXT:    mov h16, v0.h[2]
+; CHECK-i32-SD-NEXT:    fcvt s19, h0
+; CHECK-i32-SD-NEXT:    mov h20, v1.h[2]
 ; CHECK-i32-SD-NEXT:    mov h0, v0.h[3]
-; CHECK-i32-SD-NEXT:    mov h4, v2.h[1]
-; CHECK-i32-SD-NEXT:    mov h5, v2.h[2]
-; CHECK-i32-SD-NEXT:    fcvt s6, h2
-; CHECK-i32-SD-NEXT:    fcvt s7, h3
-; CHECK-i32-SD-NEXT:    mov h16, v3.h[1]
-; CHECK-i32-SD-NEXT:    mov h17, v3.h[2]
-; CHECK-i32-SD-NEXT:    fcvt s18, h18
-; CHECK-i32-SD-NEXT:    fcvt s19, h19
-; CHECK-i32-SD-NEXT:    mov h2, v2.h[3]
+; CHECK-i32-SD-NEXT:    mov h21, v1.h[3]
+; CHECK-i32-SD-NEXT:    mov h6, v2.h[1]
+; CHECK-i32-SD-NEXT:    mov h7, v3.h[1]
 ; CHECK-i32-SD-NEXT:    fcvt s4, h4
 ; CHECK-i32-SD-NEXT:    fcvt s5, h5
+; CHECK-i32-SD-NEXT:    mov h17, v2.h[2]
+; CHECK-i32-SD-NEXT:    mov h18, v3.h[2]
+; CHECK-i32-SD-NEXT:    frintx s19, s19
+; CHECK-i32-SD-NEXT:    fcvt s23, h0
+; CHECK-i32-SD-NEXT:    fcvt s6, h6
+; CHECK-i32-SD-NEXT:    fcvt s7, h7
+; CHECK-i32-SD-NEXT:    frintx s4, s4
+; CHECK-i32-SD-NEXT:    frintx s5, s5
+; CHECK-i32-SD-NEXT:    fcvtzs s0, s19
 ; CHECK-i32-SD-NEXT:    frintx s6, s6
 ; CHECK-i32-SD-NEXT:    frintx s7, s7
-; CHECK-i32-SD-NEXT:    fcvt s16, h16
-; CHECK-i32-SD-NEXT:    fcvt s17, h17
+; CHECK-i32-SD-NEXT:    fcvtzs w8, s4
+; CHECK-i32-SD-NEXT:    fcvt s4, h2
+; CHECK-i32-SD-NEXT:    fcvtzs w9, s5
+; CHECK-i32-SD-NEXT:    fcvt s5, h3
+; CHECK-i32-SD-NEXT:    mov h2, v2.h[3]
+; CHECK-i32-SD-NEXT:    fcvtzs w10, s6
+; CHECK-i32-SD-NEXT:    fcvt s6, h1
+; CHECK-i32-SD-NEXT:    fcvtzs w11, s7
+; CHECK-i32-SD-NEXT:    fcvt s7, h16
+; CHECK-i32-SD-NEXT:    fcvt s16, h17
+; CHECK-i32-SD-NEXT:    fcvt s17, h18
+; CHECK-i32-SD-NEXT:    fcvt s18, h20
+; CHECK-i32-SD-NEXT:    frintx s4, s4
+; CHECK-i32-SD-NEXT:    frintx s5, s5
+; CHECK-i32-SD-NEXT:    mov h20, v3.h[3]
+; CHECK-i32-SD-NEXT:    fcvt s22, h2
+; CHECK-i32-SD-NEXT:    mov v0.s[1], w8
+; CHECK-i32-SD-NEXT:    frintx s6, s6
+; CHECK-i32-SD-NEXT:    frintx s16, s16
+; CHECK-i32-SD-NEXT:    frintx s17, s17
+; CHECK-i32-SD-NEXT:    frintx s7, s7
 ; CHECK-i32-SD-NEXT:    frintx s18, s18
-; CHECK-i32-SD-NEXT:    fcvt s2, h2
+; CHECK-i32-SD-NEXT:    fcvtzs s1, s4
+; CHECK-i32-SD-NEXT:    fcvtzs s3, s5
+; CHECK-i32-SD-NEXT:    fcvt s4, h20
+; CHECK-i32-SD-NEXT:    fcvt s5, h21
+; CHECK-i32-SD-NEXT:    fcvtzs s2, s6
+; CHECK-i32-SD-NEXT:    frintx s6, s22
+; CHECK-i32-SD-NEXT:    fcvtzs w12, s16
+; CHECK-i32-SD-NEXT:    fcvtzs w13, s17
+; CHECK-i32-SD-NEXT:    fcvtzs w14, s7
+; CHECK-i32-SD-NEXT:    fcvtzs w15, s18
+; CHECK-i32-SD-NEXT:    frintx s7, s23
+; CHECK-i32-SD-NEXT:    mov v1.s[1], w10
+; CHECK-i32-SD-NEXT:    mov v3.s[1], w11
 ; CHECK-i32-SD-NEXT:    frintx s4, s4
 ; CHECK-i32-SD-NEXT:    frintx s5, s5
+; CHECK-i32-SD-NEXT:    mov v2.s[1], w9
 ; CHECK-i32-SD-NEXT:    fcvtzs w8, s6
-; CHECK-i32-SD-NEXT:    fcvt s6, h1
+; CHECK-i32-SD-NEXT:    mov v0.s[2], w14
 ; CHECK-i32-SD-NEXT:    fcvtzs w9, s7
-; CHECK-i32-SD-NEXT:    mov h7, v1.h[2]
-; CHECK-i32-SD-NEXT:    frintx s16, s16
-; CHECK-i32-SD-NEXT:    fcvtzs w15, s18
+; CHECK-i32-SD-NEXT:    mov v1.s[2], w12
+; CHECK-i32-SD-NEXT:    mov v3.s[2], w13
 ; CHECK-i32-SD-NEXT:    fcvtzs w10, s4
-; CHECK-i32-SD-NEXT:    frintx s4, s17
 ; CHECK-i32-SD-NEXT:    fcvtzs w11, s5
-; CHECK-i32-SD-NEXT:    frintx s5, s20
-; CHECK-i32-SD-NEXT:    fcvt s17, h21
-; CHECK-i32-SD-NEXT:    frintx s6, s6
-; CHECK-i32-SD-NEXT:    fcvtzs w12, s16
-; CHECK-i32-SD-NEXT:    frintx s16, s19
-; CHECK-i32-SD-NEXT:    fcvt s7, h7
-; CHECK-i32-SD-NEXT:    mov h19, v1.h[3]
-; CHECK-i32-SD-NEXT:    fmov s1, w8
-; CHECK-i32-SD-NEXT:    fcvtzs w13, s4
-; CHECK-i32-SD-NEXT:    mov h4, v3.h[3]
-; CHECK-i32-SD-NEXT:    fmov s3, w9
-; CHECK-i32-SD-NEXT:    fcvtzs w14, s5
-; CHECK-i32-SD-NEXT:    frintx s5, s17
-; CHECK-i32-SD-NEXT:    fcvtzs w16, s6
-; CHECK-i32-SD-NEXT:    fcvt s17, h0
-; CHECK-i32-SD-NEXT:    fcvtzs w8, s16
-; CHECK-i32-SD-NEXT:    frintx s6, s7
-; CHECK-i32-SD-NEXT:    fcvt s7, h19
-; CHECK-i32-SD-NEXT:    mov v1.s[1], w10
-; CHECK-i32-SD-NEXT:    mov v3.s[1], w12
-; CHECK-i32-SD-NEXT:    fcvt s4, h4
-; CHECK-i32-SD-NEXT:    fcvtzs w9, s5
-; CHECK-i32-SD-NEXT:    fmov s0, w14
-; CHECK-i32-SD-NEXT:    frintx s5, s2
-; CHECK-i32-SD-NEXT:    fmov s2, w16
-; CHECK-i32-SD-NEXT:    frintx s16, s17
-; CHECK-i32-SD-NEXT:    fcvtzs w10, s6
-; CHECK-i32-SD-NEXT:    frintx s6, s7
-; CHECK-i32-SD-NEXT:    mov v1.s[2], w11
-; CHECK-i32-SD-NEXT:    mov v3.s[2], w13
-; CHECK-i32-SD-NEXT:    mov v0.s[1], w15
-; CHECK-i32-SD-NEXT:    frintx s4, s4
-; CHECK-i32-SD-NEXT:    mov v2.s[1], w8
-; CHECK-i32-SD-NEXT:    fcvtzs w8, s5
-; CHECK-i32-SD-NEXT:    fcvtzs w12, s16
-; CHECK-i32-SD-NEXT:    mov v0.s[2], w9
-; CHECK-i32-SD-NEXT:    fcvtzs w9, s4
-; CHECK-i32-SD-NEXT:    mov v2.s[2], w10
-; CHECK-i32-SD-NEXT:    fcvtzs w10, s6
+; CHECK-i32-SD-NEXT:    mov v2.s[2], w15
+; CHECK-i32-SD-NEXT:    mov v0.s[3], w9
 ; CHECK-i32-SD-NEXT:    mov v1.s[3], w8
-; CHECK-i32-SD-NEXT:    mov v0.s[3], w12
-; CHECK-i32-SD-NEXT:    mov v3.s[3], w9
-; CHECK-i32-SD-NEXT:    mov v2.s[3], w10
+; CHECK-i32-SD-NEXT:    mov v3.s[3], w10
+; CHECK-i32-SD-NEXT:    mov v2.s[3], w11
 ; CHECK-i32-SD-NEXT:    ret
 ;
 ; CHECK-i64-SD-LABEL: lrint_v16f16:
@@ -522,164 +506,156 @@ declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half>)
 define <32 x iXLen> @lrint_v32f16(<32 x half> %x) nounwind {
 ; CHECK-i32-SD-LABEL: lrint_v32f16:
 ; CHECK-i32-SD:       // %bb.0:
-; CHECK-i32-SD-NEXT:    ext v5.16b, v0.16b, v0.16b, #8
-; CHECK-i32-SD-NEXT:    ext v4.16b, v1.16b, v1.16b, #8
-; CHECK-i32-SD-NEXT:    ext v17.16b, v2.16b, v2.16b, #8
-; CHECK-i32-SD-NEXT:    mov h6, v5.h[1]
-; CHECK-i32-SD-NEXT:    fcvt s7, h5
-; CHECK-i32-SD-NEXT:    mov h16, v5.h[2]
-; CHECK-i32-SD-NEXT:    mov h5, v5.h[3]
-; CHECK-i32-SD-NEXT:    mov h18, v4.h[1]
-; CHECK-i32-SD-NEXT:    mov h20, v4.h[3]
-; CHECK-i32-SD-NEXT:    mov h19, v4.h[2]
-; CHECK-i32-SD-NEXT:    fcvt s21, h4
-; CHECK-i32-SD-NEXT:    mov h23, v17.h[1]
-; CHECK-i32-SD-NEXT:    ext v4.16b, v3.16b, v3.16b, #8
-; CHECK-i32-SD-NEXT:    fcvt s22, h17
-; CHECK-i32-SD-NEXT:    fcvt s6, h6
-; CHECK-i32-SD-NEXT:    frintx s7, s7
-; CHECK-i32-SD-NEXT:    fcvt s16, h16
+; CHECK-i32-SD-NEXT:    str x19, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-i32-SD-NEXT:    ext v4.16b, v0.16b, v0.16b, #8
+; CHECK-i32-SD-NEXT:    ext v5.16b, v1.16b, v1.16b, #8
+; CHECK-i32-SD-NEXT:    ext v6.16b, v2.16b, v2.16b, #8
+; CHECK-i32-SD-NEXT:    mov h27, v3.h[2]
+; CHECK-i32-SD-NEXT:    mov h16, v4.h[2]
+; CHECK-i32-SD-NEXT:    mov h17, v4.h[3]
+; CHECK-i32-SD-NEXT:    mov h18, v5.h[1]
+; CHECK-i32-SD-NEXT:    mov h7, v4.h[1]
+; CHECK-i32-SD-NEXT:    mov h19, v5.h[2]
+; CHECK-i32-SD-NEXT:    mov h20, v5.h[3]
+; CHECK-i32-SD-NEXT:    mov h21, v6.h[1]
+; CHECK-i32-SD-NEXT:    mov h22, v6.h[2]
+; CHECK-i32-SD-NEXT:    fcvt s4, h4
 ; CHECK-i32-SD-NEXT:    fcvt s5, h5
+; CHECK-i32-SD-NEXT:    fcvt s16, h16
+; CHECK-i32-SD-NEXT:    fcvt s17, h17
 ; CHECK-i32-SD-NEXT:    fcvt s18, h18
-; CHECK-i32-SD-NEXT:    fcvt s20, h20
+; CHECK-i32-SD-NEXT:    fcvt s23, h7
 ; CHECK-i32-SD-NEXT:    fcvt s19, h19
-; CHECK-i32-SD-NEXT:    frintx s22, s22
-; CHECK-i32-SD-NEXT:    frintx s6, s6
-; CHECK-i32-SD-NEXT:    fcvtzs w12, s7
-; CHECK-i32-SD-NEXT:    frintx s7, s16
+; CHECK-i32-SD-NEXT:    ext v7.16b, v3.16b, v3.16b, #8
+; CHECK-i32-SD-NEXT:    fcvt s20, h20
+; CHECK-i32-SD-NEXT:    fcvt s21, h21
+; CHECK-i32-SD-NEXT:    fcvt s22, h22
+; CHECK-i32-SD-NEXT:    frintx s4, s4
 ; CHECK-i32-SD-NEXT:    frintx s5, s5
-; CHECK-i32-SD-NEXT:    frintx s16, s21
-; CHECK-i32-SD-NEXT:    fcvt s21, h23
+; CHECK-i32-SD-NEXT:    frintx s24, s16
+; CHECK-i32-SD-NEXT:    frintx s17, s17
 ; CHECK-i32-SD-NEXT:    frintx s18, s18
-; CHECK-i32-SD-NEXT:    frintx s20, s20
+; CHECK-i32-SD-NEXT:    mov h16, v6.h[3]
 ; CHECK-i32-SD-NEXT:    frintx s19, s19
-; CHECK-i32-SD-NEXT:    fcvtzs w15, s22
-; CHECK-i32-SD-NEXT:    mov h22, v1.h[2]
-; CHECK-i32-SD-NEXT:    fcvtzs w17, s6
-; CHECK-i32-SD-NEXT:    mov h6, v17.h[2]
-; CHECK-i32-SD-NEXT:    mov h17, v17.h[3]
-; CHECK-i32-SD-NEXT:    fcvtzs w9, s7
-; CHECK-i32-SD-NEXT:    mov h7, v4.h[2]
-; CHECK-i32-SD-NEXT:    fcvtzs w8, s5
-; CHECK-i32-SD-NEXT:    mov h5, v4.h[1]
-; CHECK-i32-SD-NEXT:    fcvtzs w13, s16
-; CHECK-i32-SD-NEXT:    frintx s16, s21
-; CHECK-i32-SD-NEXT:    fcvtzs w14, s18
-; CHECK-i32-SD-NEXT:    fcvtzs w10, s20
-; CHECK-i32-SD-NEXT:    fcvt s18, h4
+; CHECK-i32-SD-NEXT:    mov h25, v7.h[1]
+; CHECK-i32-SD-NEXT:    mov h26, v7.h[2]
+; CHECK-i32-SD-NEXT:    frintx s20, s20
+; CHECK-i32-SD-NEXT:    frintx s21, s21
+; CHECK-i32-SD-NEXT:    frintx s22, s22
+; CHECK-i32-SD-NEXT:    frintx s23, s23
 ; CHECK-i32-SD-NEXT:    fcvt s6, h6
+; CHECK-i32-SD-NEXT:    fcvtzs w8, s17
+; CHECK-i32-SD-NEXT:    fcvtzs w12, s18
+; CHECK-i32-SD-NEXT:    mov h17, v0.h[1]
+; CHECK-i32-SD-NEXT:    mov h18, v0.h[2]
+; CHECK-i32-SD-NEXT:    fcvt s16, h16
+; CHECK-i32-SD-NEXT:    fcvtzs w9, s24
+; CHECK-i32-SD-NEXT:    fcvtzs w10, s19
+; CHECK-i32-SD-NEXT:    fcvtzs w13, s20
+; CHECK-i32-SD-NEXT:    mov h19, v0.h[3]
+; CHECK-i32-SD-NEXT:    mov h20, v1.h[1]
+; CHECK-i32-SD-NEXT:    fcvtzs w15, s21
+; CHECK-i32-SD-NEXT:    fcvtzs w14, s22
+; CHECK-i32-SD-NEXT:    fcvt s21, h25
+; CHECK-i32-SD-NEXT:    fcvt s22, h26
 ; CHECK-i32-SD-NEXT:    fcvt s17, h17
-; CHECK-i32-SD-NEXT:    mov h20, v0.h[2]
-; CHECK-i32-SD-NEXT:    fcvt s7, h7
-; CHECK-i32-SD-NEXT:    fcvtzs w11, s19
-; CHECK-i32-SD-NEXT:    mov h19, v0.h[1]
-; CHECK-i32-SD-NEXT:    fcvt s5, h5
-; CHECK-i32-SD-NEXT:    fcvtzs w0, s16
-; CHECK-i32-SD-NEXT:    mov h21, v1.h[1]
-; CHECK-i32-SD-NEXT:    frintx s18, s18
-; CHECK-i32-SD-NEXT:    mov h4, v4.h[3]
-; CHECK-i32-SD-NEXT:    frintx s6, s6
-; CHECK-i32-SD-NEXT:    frintx s16, s17
-; CHECK-i32-SD-NEXT:    mov h17, v0.h[3]
-; CHECK-i32-SD-NEXT:    fcvt s0, h0
+; CHECK-i32-SD-NEXT:    fcvt s18, h18
+; CHECK-i32-SD-NEXT:    mov h24, v2.h[1]
+; CHECK-i32-SD-NEXT:    mov h25, v2.h[2]
+; CHECK-i32-SD-NEXT:    frintx s16, s16
+; CHECK-i32-SD-NEXT:    mov h26, v3.h[1]
+; CHECK-i32-SD-NEXT:    fcvtzs w11, s23
+; CHECK-i32-SD-NEXT:    mov h23, v1.h[2]
 ; CHECK-i32-SD-NEXT:    fcvt s19, h19
-; CHECK-i32-SD-NEXT:    frintx s5, s5
-; CHECK-i32-SD-NEXT:    fcvtzs w2, s18
-; CHECK-i32-SD-NEXT:    fcvt s18, h21
-; CHECK-i32-SD-NEXT:    fcvt s21, h2
-; CHECK-i32-SD-NEXT:    fcvtzs w18, s6
-; CHECK-i32-SD-NEXT:    frintx s6, s7
-; CHECK-i32-SD-NEXT:    fcvt s7, h20
-; CHECK-i32-SD-NEXT:    fcvtzs w16, s16
-; CHECK-i32-SD-NEXT:    fcvt s16, h17
-; CHECK-i32-SD-NEXT:    fcvt s17, h1
-; CHECK-i32-SD-NEXT:    frintx s0, s0
-; CHECK-i32-SD-NEXT:    fcvtzs w3, s5
-; CHECK-i32-SD-NEXT:    frintx s5, s19
-; CHECK-i32-SD-NEXT:    fcvt s19, h22
-; CHECK-i32-SD-NEXT:    mov h1, v1.h[3]
-; CHECK-i32-SD-NEXT:    fcvtzs w1, s6
-; CHECK-i32-SD-NEXT:    frintx s6, s7
-; CHECK-i32-SD-NEXT:    mov h7, v2.h[1]
+; CHECK-i32-SD-NEXT:    fcvt s20, h20
 ; CHECK-i32-SD-NEXT:    frintx s17, s17
-; CHECK-i32-SD-NEXT:    frintx s20, s16
-; CHECK-i32-SD-NEXT:    fmov s16, w12
-; CHECK-i32-SD-NEXT:    fcvtzs w4, s0
-; CHECK-i32-SD-NEXT:    frintx s0, s18
-; CHECK-i32-SD-NEXT:    fcvtzs w5, s5
-; CHECK-i32-SD-NEXT:    frintx s5, s19
-; CHECK-i32-SD-NEXT:    frintx s18, s21
-; CHECK-i32-SD-NEXT:    fcvt s19, h3
-; CHECK-i32-SD-NEXT:    fcvtzs w12, s6
-; CHECK-i32-SD-NEXT:    fcvt s6, h7
-; CHECK-i32-SD-NEXT:    mov h7, v3.h[1]
-; CHECK-i32-SD-NEXT:    fcvtzs w6, s17
-; CHECK-i32-SD-NEXT:    fmov s17, w13
-; CHECK-i32-SD-NEXT:    mov v16.s[1], w17
-; CHECK-i32-SD-NEXT:    fcvtzs w17, s20
-; CHECK-i32-SD-NEXT:    fcvtzs w7, s0
-; CHECK-i32-SD-NEXT:    mov h0, v2.h[2]
-; CHECK-i32-SD-NEXT:    mov h20, v3.h[2]
-; CHECK-i32-SD-NEXT:    fcvtzs w13, s5
-; CHECK-i32-SD-NEXT:    fmov s5, w15
-; CHECK-i32-SD-NEXT:    frintx s6, s6
-; CHECK-i32-SD-NEXT:    fcvt s7, h7
-; CHECK-i32-SD-NEXT:    mov v17.s[1], w14
-; CHECK-i32-SD-NEXT:    fcvtzs w14, s18
-; CHECK-i32-SD-NEXT:    frintx s18, s19
-; CHECK-i32-SD-NEXT:    mov h2, v2.h[3]
+; CHECK-i32-SD-NEXT:    frintx s21, s21
+; CHECK-i32-SD-NEXT:    frintx s22, s22
+; CHECK-i32-SD-NEXT:    frintx s18, s18
+; CHECK-i32-SD-NEXT:    fcvt s24, h24
+; CHECK-i32-SD-NEXT:    fcvt s25, h25
+; CHECK-i32-SD-NEXT:    fcvtzs w16, s16
+; CHECK-i32-SD-NEXT:    fcvt s16, h26
+; CHECK-i32-SD-NEXT:    fcvt s26, h27
+; CHECK-i32-SD-NEXT:    fcvt s23, h23
+; CHECK-i32-SD-NEXT:    frintx s19, s19
+; CHECK-i32-SD-NEXT:    frintx s20, s20
+; CHECK-i32-SD-NEXT:    fcvtzs w2, s17
+; CHECK-i32-SD-NEXT:    fcvtzs w1, s21
 ; CHECK-i32-SD-NEXT:    fcvt s0, h0
-; CHECK-i32-SD-NEXT:    mov h3, v3.h[3]
-; CHECK-i32-SD-NEXT:    mov v5.s[1], w0
-; CHECK-i32-SD-NEXT:    fcvt s19, h20
-; CHECK-i32-SD-NEXT:    fcvt s1, h1
-; CHECK-i32-SD-NEXT:    mov v16.s[2], w9
-; CHECK-i32-SD-NEXT:    fcvtzs w15, s6
-; CHECK-i32-SD-NEXT:    frintx s6, s7
-; CHECK-i32-SD-NEXT:    fmov s7, w2
 ; CHECK-i32-SD-NEXT:    fcvtzs w0, s18
-; CHECK-i32-SD-NEXT:    fcvt s20, h2
-; CHECK-i32-SD-NEXT:    fcvt s18, h4
-; CHECK-i32-SD-NEXT:    frintx s21, s0
-; CHECK-i32-SD-NEXT:    fcvt s3, h3
-; CHECK-i32-SD-NEXT:    fmov s0, w4
-; CHECK-i32-SD-NEXT:    frintx s19, s19
-; CHECK-i32-SD-NEXT:    fmov s2, w6
-; CHECK-i32-SD-NEXT:    fmov s4, w14
-; CHECK-i32-SD-NEXT:    fcvtzs w2, s6
-; CHECK-i32-SD-NEXT:    mov v7.s[1], w3
-; CHECK-i32-SD-NEXT:    frintx s1, s1
-; CHECK-i32-SD-NEXT:    fmov s6, w0
-; CHECK-i32-SD-NEXT:    mov v0.s[1], w5
+; CHECK-i32-SD-NEXT:    frintx s17, s24
+; CHECK-i32-SD-NEXT:    frintx s18, s25
+; CHECK-i32-SD-NEXT:    frintx s16, s16
+; CHECK-i32-SD-NEXT:    fcvtzs w18, s22
+; CHECK-i32-SD-NEXT:    frintx s6, s6
+; CHECK-i32-SD-NEXT:    frintx s21, s23
+; CHECK-i32-SD-NEXT:    fcvtzs w17, s19
+; CHECK-i32-SD-NEXT:    fcvtzs w3, s20
+; CHECK-i32-SD-NEXT:    frintx s19, s26
+; CHECK-i32-SD-NEXT:    fcvt s20, h7
+; CHECK-i32-SD-NEXT:    frintx s0, s0
+; CHECK-i32-SD-NEXT:    fcvtzs w5, s17
+; CHECK-i32-SD-NEXT:    fcvt s17, h1
+; CHECK-i32-SD-NEXT:    fcvtzs w6, s18
+; CHECK-i32-SD-NEXT:    fcvt s18, h2
+; CHECK-i32-SD-NEXT:    fcvtzs w7, s16
+; CHECK-i32-SD-NEXT:    fcvt s16, h3
+; CHECK-i32-SD-NEXT:    fcvtzs w4, s21
+; CHECK-i32-SD-NEXT:    mov h2, v2.h[3]
+; CHECK-i32-SD-NEXT:    mov h21, v7.h[3]
+; CHECK-i32-SD-NEXT:    fcvtzs w19, s19
+; CHECK-i32-SD-NEXT:    mov h19, v1.h[3]
 ; CHECK-i32-SD-NEXT:    frintx s20, s20
-; CHECK-i32-SD-NEXT:    mov v2.s[1], w7
-; CHECK-i32-SD-NEXT:    fcvtzs w3, s21
-; CHECK-i32-SD-NEXT:    mov v4.s[1], w15
-; CHECK-i32-SD-NEXT:    fcvtzs w14, s19
+; CHECK-i32-SD-NEXT:    frintx s17, s17
+; CHECK-i32-SD-NEXT:    mov h22, v3.h[3]
+; CHECK-i32-SD-NEXT:    fcvtzs s1, s4
 ; CHECK-i32-SD-NEXT:    frintx s18, s18
-; CHECK-i32-SD-NEXT:    frintx s3, s3
-; CHECK-i32-SD-NEXT:    mov v6.s[1], w2
-; CHECK-i32-SD-NEXT:    mov v17.s[2], w11
-; CHECK-i32-SD-NEXT:    fcvtzs w15, s1
-; CHECK-i32-SD-NEXT:    fcvtzs w0, s20
-; CHECK-i32-SD-NEXT:    mov v5.s[2], w18
-; CHECK-i32-SD-NEXT:    mov v0.s[2], w12
-; CHECK-i32-SD-NEXT:    mov v7.s[2], w1
-; CHECK-i32-SD-NEXT:    mov v2.s[2], w13
-; CHECK-i32-SD-NEXT:    mov v4.s[2], w3
-; CHECK-i32-SD-NEXT:    fcvtzs w9, s18
-; CHECK-i32-SD-NEXT:    fcvtzs w11, s3
-; CHECK-i32-SD-NEXT:    mov v16.s[3], w8
-; CHECK-i32-SD-NEXT:    mov v6.s[2], w14
-; CHECK-i32-SD-NEXT:    mov v17.s[3], w10
+; CHECK-i32-SD-NEXT:    frintx s16, s16
+; CHECK-i32-SD-NEXT:    fcvtzs s3, s5
+; CHECK-i32-SD-NEXT:    fcvt s23, h2
+; CHECK-i32-SD-NEXT:    fcvtzs s5, s6
+; CHECK-i32-SD-NEXT:    fcvtzs s0, s0
+; CHECK-i32-SD-NEXT:    fcvt s19, h19
+; CHECK-i32-SD-NEXT:    fcvtzs s7, s20
+; CHECK-i32-SD-NEXT:    fcvtzs s2, s17
+; CHECK-i32-SD-NEXT:    fcvt s17, h21
+; CHECK-i32-SD-NEXT:    mov v1.s[1], w11
+; CHECK-i32-SD-NEXT:    fcvtzs s4, s18
+; CHECK-i32-SD-NEXT:    fcvtzs s6, s16
+; CHECK-i32-SD-NEXT:    fcvt s16, h22
+; CHECK-i32-SD-NEXT:    mov v3.s[1], w12
+; CHECK-i32-SD-NEXT:    mov v5.s[1], w15
+; CHECK-i32-SD-NEXT:    mov v0.s[1], w2
+; CHECK-i32-SD-NEXT:    frintx s18, s19
+; CHECK-i32-SD-NEXT:    frintx s19, s23
+; CHECK-i32-SD-NEXT:    mov v7.s[1], w1
+; CHECK-i32-SD-NEXT:    mov v2.s[1], w3
+; CHECK-i32-SD-NEXT:    frintx s17, s17
+; CHECK-i32-SD-NEXT:    mov v1.s[2], w9
+; CHECK-i32-SD-NEXT:    mov v4.s[1], w5
+; CHECK-i32-SD-NEXT:    mov v6.s[1], w7
+; CHECK-i32-SD-NEXT:    frintx s16, s16
+; CHECK-i32-SD-NEXT:    mov v3.s[2], w10
+; CHECK-i32-SD-NEXT:    mov v5.s[2], w14
+; CHECK-i32-SD-NEXT:    mov v0.s[2], w0
+; CHECK-i32-SD-NEXT:    fcvtzs w11, s18
+; CHECK-i32-SD-NEXT:    fcvtzs w12, s19
+; CHECK-i32-SD-NEXT:    mov v7.s[2], w18
+; CHECK-i32-SD-NEXT:    mov v2.s[2], w4
+; CHECK-i32-SD-NEXT:    fcvtzs w9, s17
+; CHECK-i32-SD-NEXT:    mov v1.s[3], w8
+; CHECK-i32-SD-NEXT:    mov v4.s[2], w6
+; CHECK-i32-SD-NEXT:    mov v6.s[2], w19
+; CHECK-i32-SD-NEXT:    fcvtzs w10, s16
 ; CHECK-i32-SD-NEXT:    mov v0.s[3], w17
+; CHECK-i32-SD-NEXT:    mov v3.s[3], w13
 ; CHECK-i32-SD-NEXT:    mov v5.s[3], w16
-; CHECK-i32-SD-NEXT:    mov v2.s[3], w15
-; CHECK-i32-SD-NEXT:    mov v4.s[3], w0
+; CHECK-i32-SD-NEXT:    mov v2.s[3], w11
 ; CHECK-i32-SD-NEXT:    mov v7.s[3], w9
-; CHECK-i32-SD-NEXT:    mov v1.16b, v16.16b
-; CHECK-i32-SD-NEXT:    mov v6.s[3], w11
-; CHECK-i32-SD-NEXT:    mov v3.16b, v17.16b
+; CHECK-i32-SD-NEXT:    mov v4.s[3], w12
+; CHECK-i32-SD-NEXT:    mov v6.s[3], w10
+; CHECK-i32-SD-NEXT:    ldr x19, [sp], #16 // 8-byte Folded Reload
 ; CHECK-i32-SD-NEXT:    ret
 ;
 ; CHECK-i64-SD-LABEL: lrint_v32f16:
@@ -1326,18 +1302,11 @@ define <1 x iXLen> @lrint_v1f64(<1 x double> %x) nounwind {
 ; CHECK-i32-NEXT:    fmov s0, w8
 ; CHECK-i32-NEXT:    ret
 ;
-; CHECK-i64-SD-LABEL: lrint_v1f64:
-; CHECK-i64-SD:       // %bb.0:
-; CHECK-i64-SD-NEXT:    frintx d0, d0
-; CHECK-i64-SD-NEXT:    fcvtzs x8, d0
-; CHECK-i64-SD-NEXT:    fmov d0, x8
-; CHECK-i64-SD-NEXT:    ret
-;
-; CHECK-i64-GI-LABEL: lrint_v1f64:
-; CHECK-i64-GI:       // %bb.0:
-; CHECK-i64-GI-NEXT:    frintx d0, d0
-; CHECK-i64-GI-NEXT:    fcvtzs d0, d0
-; CHECK-i64-GI-NEXT:    ret
+; CHECK-i64-LABEL: lrint_v1f64:
+; CHECK-i64:       // %bb.0:
+; CHECK-i64-NEXT:    frintx d0, d0
+; CHECK-i64-NEXT:    fcvtzs d0, d0
+; CHECK-i64-NEXT:    ret
   %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double> %x)
   ret <1 x iXLen> %a
 }
diff --git a/llvm/test/CodeGen/AArch64/zext.ll b/llvm/test/CodeGen/AArch64/zext.ll
index 7078d9b2586a8..292b7b28903ee 100644
--- a/llvm/test/CodeGen/AArch64/zext.ll
+++ b/llvm/test/CodeGen/AArch64/zext.ll
@@ -1127,41 +1127,41 @@ entry:
 define <16 x i64> @zext_v16i10_v16i64(<16 x i10> %a) {
 ; CHECK-SD-LABEL: zext_v16i10_v16i64:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    fmov s0, w2
-; CHECK-SD-NEXT:    fmov s1, w0
+; CHECK-SD-NEXT:    fmov s0, w6
+; CHECK-SD-NEXT:    fmov s1, w4
 ; CHECK-SD-NEXT:    ldr s2, [sp]
-; CHECK-SD-NEXT:    fmov s3, w4
-; CHECK-SD-NEXT:    fmov s4, w6
-; CHECK-SD-NEXT:    add x9, sp, #8
+; CHECK-SD-NEXT:    fmov s3, w2
+; CHECK-SD-NEXT:    fmov s4, w0
 ; CHECK-SD-NEXT:    ldr s5, [sp, #16]
 ; CHECK-SD-NEXT:    ldr s6, [sp, #32]
 ; CHECK-SD-NEXT:    ldr s7, [sp, #48]
-; CHECK-SD-NEXT:    mov v1.s[1], w1
-; CHECK-SD-NEXT:    mov v0.s[1], w3
-; CHECK-SD-NEXT:    ld1 { v2.s }[1], [x9]
-; CHECK-SD-NEXT:    mov v3.s[1], w5
-; CHECK-SD-NEXT:    mov v4.s[1], w7
+; CHECK-SD-NEXT:    add x8, sp, #8
+; CHECK-SD-NEXT:    mov v1.s[1], w5
+; CHECK-SD-NEXT:    mov v0.s[1], w7
 ; CHECK-SD-NEXT:    add x9, sp, #24
+; CHECK-SD-NEXT:    mov v4.s[1], w1
+; CHECK-SD-NEXT:    mov v3.s[1], w3
 ; CHECK-SD-NEXT:    add x10, sp, #40
 ; CHECK-SD-NEXT:    add x11, sp, #56
+; CHECK-SD-NEXT:    ld1 { v2.s }[1], [x8]
 ; CHECK-SD-NEXT:    ld1 { v5.s }[1], [x9]
 ; CHECK-SD-NEXT:    ld1 { v6.s }[1], [x10]
 ; CHECK-SD-NEXT:    ld1 { v7.s }[1], [x11]
 ; CHECK-SD-NEXT:    mov w8, #1023 // =0x3ff
-; CHECK-SD-NEXT:    ushll v1.2d, v1.2s, #0
 ; CHECK-SD-NEXT:    dup v16.2d, x8
-; CHECK-SD-NEXT:    ushll v17.2d, v0.2s, #0
-; CHECK-SD-NEXT:    ushll v3.2d, v3.2s, #0
+; CHECK-SD-NEXT:    ushll v17.2d, v1.2s, #0
+; CHECK-SD-NEXT:    ushll v18.2d, v0.2s, #0
 ; CHECK-SD-NEXT:    ushll v4.2d, v4.2s, #0
-; CHECK-SD-NEXT:    ushll v18.2d, v2.2s, #0
+; CHECK-SD-NEXT:    ushll v3.2d, v3.2s, #0
+; CHECK-SD-NEXT:    ushll v2.2d, v2.2s, #0
 ; CHECK-SD-NEXT:    ushll v5.2d, v5.2s, #0
 ; CHECK-SD-NEXT:    ushll v6.2d, v6.2s, #0
 ; CHECK-SD-NEXT:    ushll v7.2d, v7.2s, #0
-; CHECK-SD-NEXT:    and v0.16b, v1.16b, v16.16b
-; CHECK-SD-NEXT:    and v1.16b, v17.16b, v16.16b
-; CHECK-SD-NEXT:    and v2.16b, v3.16b, v16.16b
-; CHECK-SD-NEXT:    and v3.16b, v4.16b, v16.16b
-; CHECK-SD-NEXT:    and v4.16b, v18.16b, v16.16b
+; CHECK-SD-NEXT:    and v0.16b, v4.16b, v16.16b
+; CHECK-SD-NEXT:    and v1.16b, v3.16b, v16.16b
+; CHECK-SD-NEXT:    and v4.16b, v2.16b, v16.16b
+; CHECK-SD-NEXT:    and v2.16b, v17.16b, v16.16b
+; CHECK-SD-NEXT:    and v3.16b, v18.16b, v16.16b
 ; CHECK-SD-NEXT:    and v5.16b, v5.16b, v16.16b
 ; CHECK-SD-NEXT:    and v6.16b, v6.16b, v16.16b
 ; CHECK-SD-NEXT:    and v7.16b, v7.16b, v16.16b

>From a8aaad464f36e1c3a33bc2f45e594de31637cd99 Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Wed, 21 Jan 2026 15:42:30 +0000
Subject: [PATCH 6/8] clean-up trigger conditions

---
 llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index b1377aeaaa69c..be79aab5357dd 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -7808,9 +7808,8 @@ void AArch64DAGToDAGISel::PreprocessISelDAG() {
     switch (N.getOpcode()) {
     case ISD::SCALAR_TO_VECTOR: {
       EVT VT = N.getValueType(0);
-      if (!VT.isVector() || VT.isScalableVector() || !VT.isInteger())
-        break;
-      if (VT.getVectorElementType() != N.getOperand(0).getValueType())
+      if (VT.isScalableVector() || !VT.isInteger() ||
+          VT.getVectorElementType() != N.getOperand(0).getValueType())
         break;
 
       Result = addBitcastHints(*CurDAG, N);

>From 1d19c22fcf7e9075c61c07b7d9511ab6e1c7a799 Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Thu, 22 Jan 2026 11:51:25 +0000
Subject: [PATCH 7/8] Adjust check for adding bitcast

---
 llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index be79aab5357dd..c32a59e8af447 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -7807,12 +7807,10 @@ void AArch64DAGToDAGISel::PreprocessISelDAG() {
     SDValue Result;
     switch (N.getOpcode()) {
     case ISD::SCALAR_TO_VECTOR: {
-      EVT VT = N.getValueType(0);
-      if (VT.isScalableVector() || !VT.isInteger() ||
-          VT.getVectorElementType() != N.getOperand(0).getValueType())
-        break;
-
-      Result = addBitcastHints(*CurDAG, N);
+      EVT ScalarTy = N.getValueType(0).getVectorElementType();
+      if ((ScalarTy == MVT::i32 || ScalarTy == MVT::i64) && ScalarTy == N.getOperand(0).getValueType())
+        Result = addBitcastHints(*CurDAG, N);
+      
       break;
     }
     default:

>From 55a16ead7577205167b166989fbb9b2e92b2bdd2 Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Thu, 22 Jan 2026 12:40:30 +0000
Subject: [PATCH 8/8] Fix interface

---
 llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index b7d807557f29a..fb0be3e812a96 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -537,10 +537,11 @@ INITIALIZE_PASS(AArch64DAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
 /// to help instruction selector determine which operands are in Neon registers.
 static SDValue addBitcastHints(SelectionDAG &DAG, SDNode &N) {
   SDLoc DL(&N);
-  auto getFloatVT = [](EVT VT) {
+  auto getFloatVT = [&](EVT VT) {
     EVT ScalarVT = VT.getScalarType();
     assert((ScalarVT == MVT::i32 || ScalarVT == MVT::i64) && "Unexpected VT");
-    return VT.changeElementType(ScalarVT == MVT::i32 ? MVT::f32 : MVT::f64);
+    return VT.changeElementType(*(DAG.getContext()),
+                                ScalarVT == MVT::i32 ? MVT::f32 : MVT::f64);
   };
   auto bitcastToFloat = [&](SDValue Val) {
     return DAG.getBitcast(getFloatVT(Val.getValueType()), Val);
@@ -7828,7 +7829,7 @@ void AArch64DAGToDAGISel::PreprocessISelDAG() {
       EVT ScalarTy = N.getValueType(0).getVectorElementType();
       if ((ScalarTy == MVT::i32 || ScalarTy == MVT::i64) && ScalarTy == N.getOperand(0).getValueType())
         Result = addBitcastHints(*CurDAG, N);
-      
+
       break;
     }
     default:



More information about the llvm-commits mailing list