[llvm] [AArch64] Wrap integer SCALAR_TO_VECTOR nodes in bitcasts (PR #172837)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 22 04:41:55 PST 2026
https://github.com/Lukacma updated https://github.com/llvm/llvm-project/pull/172837
>From f453fd3056b9ffbdf377bc4b5607ef3569049b8b Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Thu, 18 Dec 2025 11:16:41 +0000
Subject: [PATCH 1/8] [AArch64] Add scal_to_vec patterns for SIMD convert
intrinsics
---
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 17 +-
.../CodeGen/AArch64/arm64-cvt-simd-fptoi.ll | 170 ++
.../CodeGen/AArch64/arm64-cvt-simd-fptoi.s | 1515 +++++++++++++++++
.../AArch64/arm64-cvt-simd-intrinsics.ll | 334 +++-
...arm64-fixed-point-scalar-cvt-dagcombine.ll | 3 +-
llvm/test/CodeGen/AArch64/arm64-neon-copy.ll | 57 +-
llvm/test/CodeGen/AArch64/arm64-vcvt.ll | 28 +-
.../CodeGen/AArch64/fp-intrinsics-vector.ll | 6 +-
.../AArch64/sve-fixed-length-fp-to-int.ll | 6 +-
9 files changed, 2064 insertions(+), 72 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.s
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index c22929f379dfc..447fd9ef66343 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -6563,12 +6563,19 @@ multiclass FPToIntegerSIMDScalarPatterns<SDPatternOperator OpN, string INST> {
(!cast<Instruction>(INST # DHr) FPR16:$Rn)>;
def : Pat<(f64 (bitconvert (i64 (OpN (f32 FPR32:$Rn))))),
(!cast<Instruction>(INST # DSr) FPR32:$Rn)>;
+
+ def : Pat<(v1i64 (scalar_to_vector (i64 (OpN (f16 FPR16:$Rn))))),
+ (!cast<Instruction>(INST # DHr) FPR16:$Rn)>;
+ def : Pat<(v1i64 (scalar_to_vector (i64 (OpN (f32 FPR32:$Rn))))),
+ (!cast<Instruction>(INST # DSr) FPR32:$Rn)>;
}
def : Pat<(f32 (bitconvert (i32 (OpN (f32 FPR32:$Rn))))),
(!cast<Instruction>(INST # v1i32) FPR32:$Rn)>;
def : Pat<(f64 (bitconvert (i64 (OpN (f64 FPR64:$Rn))))),
(!cast<Instruction>(INST # v1i64) FPR64:$Rn)>;
-
+
+ def : Pat<(v1i64 (scalar_to_vector (i64 (OpN (f64 FPR64:$Rn))))),
+ (!cast<Instruction>(INST # v1i64) FPR64:$Rn)>;
}
defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtas, "FCVTAS">;
defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtau, "FCVTAU">;
@@ -6611,12 +6618,20 @@ multiclass FPToIntegerIntPats<Intrinsic round, string INST> {
(!cast<Instruction>(INST # DSr) $Rn)>;
def : Pat<(f32 (bitconvert (i32 (round f64:$Rn)))),
(!cast<Instruction>(INST # SDr) $Rn)>;
+
+ def : Pat<(v1i64 (scalar_to_vector (i64 (round f16:$Rn)))),
+ (!cast<Instruction>(INST # DHr) $Rn)>;
+ def : Pat<(v1i64 (scalar_to_vector (i64 (round f32:$Rn)))),
+ (!cast<Instruction>(INST # DSr) $Rn)>;
}
def : Pat<(f32 (bitconvert (i32 (round f32:$Rn)))),
(!cast<Instruction>(INST # v1i32) $Rn)>;
def : Pat<(f64 (bitconvert (i64 (round f64:$Rn)))),
(!cast<Instruction>(INST # v1i64) $Rn)>;
+ def : Pat<(v1i64 (scalar_to_vector (i64 (round f64:$Rn)))),
+ (!cast<Instruction>(INST # v1i64) $Rn)>;
+
let Predicates = [HasFullFP16] in {
def : Pat<(i32 (round (fmul f16:$Rn, fixedpoint_f16_i32:$scale))),
(!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
diff --git a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll
index a729772f2897a..ebaca00d2cdb9 100644
--- a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll
@@ -15,6 +15,10 @@
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i32_f64_simd
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i64_f64_simd
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i32_f32_simd
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcvtzs_scalar_to_vector_h_strict
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcvtzs_scalar_to_vector_s_strict
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcvtzu_scalar_to_vector_h_strict
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcvtzu_scalar_to_vector_s_strict
;
; FPTOI
@@ -1941,3 +1945,169 @@ define double @fcvtzu_dd_simd(double %a) {
%bc = bitcast i64 %i to double
ret double %bc
}
+
+;
+; FPTOI scalar_to_vector
+;
+
+define <1 x i64> @fcvtzs_scalar_to_vector_h(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_scalar_to_vector_h:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzs x8, h0
+; CHECK-NOFPRCVT-NEXT: fmov d0, x8
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtzs_scalar_to_vector_h:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs d0, h0
+; CHECK-NEXT: ret
+ %val = fptosi half %a to i64
+ %vec = insertelement <1 x i64> poison, i64 %val, i32 0
+ ret <1 x i64> %vec
+}
+
+define <1 x i64> @fcvtzs_scalar_to_vector_s(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_scalar_to_vector_s:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzs x8, s0
+; CHECK-NOFPRCVT-NEXT: fmov d0, x8
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtzs_scalar_to_vector_s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs d0, s0
+; CHECK-NEXT: ret
+ %val = fptosi float %a to i64
+ %vec = insertelement <1 x i64> poison, i64 %val, i32 0
+ ret <1 x i64> %vec
+}
+
+define <1 x i64> @fcvtzs_scalar_to_vector_d(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_scalar_to_vector_d:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzs d0, d0
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtzs_scalar_to_vector_d:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs d0, d0
+; CHECK-NEXT: ret
+ %val = fptosi double %a to i64
+ %vec = insertelement <1 x i64> poison, i64 %val, i32 0
+ ret <1 x i64> %vec
+}
+
+define <1 x i64> @fcvtzu_scalar_to_vector_h(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_scalar_to_vector_h:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzu x8, h0
+; CHECK-NOFPRCVT-NEXT: fmov d0, x8
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtzu_scalar_to_vector_h:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu d0, h0
+; CHECK-NEXT: ret
+ %val = fptoui half %a to i64
+ %vec = insertelement <1 x i64> poison, i64 %val, i32 0
+ ret <1 x i64> %vec
+}
+
+define <1 x i64> @fcvtzu_scalar_to_vector_s(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_scalar_to_vector_s:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzu x8, s0
+; CHECK-NOFPRCVT-NEXT: fmov d0, x8
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtzu_scalar_to_vector_s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu d0, s0
+; CHECK-NEXT: ret
+ %val = fptoui float %a to i64
+ %vec = insertelement <1 x i64> poison, i64 %val, i32 0
+ ret <1 x i64> %vec
+}
+
+define <1 x i64> @fcvtzu_scalar_to_vector_d(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_scalar_to_vector_d:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzu d0, d0
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtzu_scalar_to_vector_d:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu d0, d0
+; CHECK-NEXT: ret
+ %val = fptoui double %a to i64
+ %vec = insertelement <1 x i64> poison, i64 %val, i32 0
+ ret <1 x i64> %vec
+}
+
+;
+; FPTOI scalar_to_vector strictfp
+;
+
+define <1 x i64> @fcvtzs_scalar_to_vector_h_strict(half %x) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_scalar_to_vector_h_strict:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzs x8, h0
+; CHECK-NOFPRCVT-NEXT: fmov d0, x8
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtzs_scalar_to_vector_h_strict:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs d0, h0
+; CHECK-NEXT: ret
+ %val = call i64 @llvm.experimental.constrained.fptosi.i64.f16(half %x, metadata !"fpexcept.strict")
+ %vec = insertelement <1 x i64> poison, i64 %val, i32 0
+ ret <1 x i64> %vec
+}
+
+define <1 x i64> @fcvtzs_scalar_to_vector_s_strict(float %x) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_scalar_to_vector_s_strict:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzs x8, s0
+; CHECK-NOFPRCVT-NEXT: fmov d0, x8
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtzs_scalar_to_vector_s_strict:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs d0, s0
+; CHECK-NEXT: ret
+ %val = call i64 @llvm.experimental.constrained.fptosi.i64.f32(float %x, metadata !"fpexcept.strict")
+ %vec = insertelement <1 x i64> poison, i64 %val, i32 0
+ ret <1 x i64> %vec
+}
+
+define <1 x i64> @fcvtzu_scalar_to_vector_h_strict(half %x) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_scalar_to_vector_h_strict:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzu x8, h0
+; CHECK-NOFPRCVT-NEXT: fmov d0, x8
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtzu_scalar_to_vector_h_strict:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu d0, h0
+; CHECK-NEXT: ret
+ %val = call i64 @llvm.experimental.constrained.fptoui.i64.f16(half %x, metadata !"fpexcept.strict")
+ %vec = insertelement <1 x i64> poison, i64 %val, i32 0
+ ret <1 x i64> %vec
+}
+
+define <1 x i64> @fcvtzu_scalar_to_vector_s_strict(float %x) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_scalar_to_vector_s_strict:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzu x8, s0
+; CHECK-NOFPRCVT-NEXT: fmov d0, x8
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtzu_scalar_to_vector_s_strict:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu d0, s0
+; CHECK-NEXT: ret
+ %val = call i64 @llvm.experimental.constrained.fptoui.i64.f32(float %x, metadata !"fpexcept.strict")
+ %vec = insertelement <1 x i64> poison, i64 %val, i32 0
+ ret <1 x i64> %vec
+}
diff --git a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.s b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.s
new file mode 100644
index 0000000000000..0850b306e8c79
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.s
@@ -0,0 +1,1515 @@
+ .file "arm64-cvt-simd-fptoi.ll"
+ .text
+ .globl test_fptosi_f16_i32_simd // -- Begin function test_fptosi_f16_i32_simd
+ .p2align 2
+ .type test_fptosi_f16_i32_simd, at function
+test_fptosi_f16_i32_simd: // @test_fptosi_f16_i32_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzs s0, h0
+ ret
+.Lfunc_end0:
+ .size test_fptosi_f16_i32_simd, .Lfunc_end0-test_fptosi_f16_i32_simd
+ .cfi_endproc
+ // -- End function
+ .globl test_fptosi_f16_i64_simd // -- Begin function test_fptosi_f16_i64_simd
+ .p2align 2
+ .type test_fptosi_f16_i64_simd, at function
+test_fptosi_f16_i64_simd: // @test_fptosi_f16_i64_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzs d0, h0
+ ret
+.Lfunc_end1:
+ .size test_fptosi_f16_i64_simd, .Lfunc_end1-test_fptosi_f16_i64_simd
+ .cfi_endproc
+ // -- End function
+ .globl test_fptosi_f64_i32_simd // -- Begin function test_fptosi_f64_i32_simd
+ .p2align 2
+ .type test_fptosi_f64_i32_simd, at function
+test_fptosi_f64_i32_simd: // @test_fptosi_f64_i32_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzs s0, d0
+ ret
+.Lfunc_end2:
+ .size test_fptosi_f64_i32_simd, .Lfunc_end2-test_fptosi_f64_i32_simd
+ .cfi_endproc
+ // -- End function
+ .globl test_fptosi_f32_i64_simd // -- Begin function test_fptosi_f32_i64_simd
+ .p2align 2
+ .type test_fptosi_f32_i64_simd, at function
+test_fptosi_f32_i64_simd: // @test_fptosi_f32_i64_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzs d0, s0
+ ret
+.Lfunc_end3:
+ .size test_fptosi_f32_i64_simd, .Lfunc_end3-test_fptosi_f32_i64_simd
+ .cfi_endproc
+ // -- End function
+ .globl test_fptosi_f64_i64_simd // -- Begin function test_fptosi_f64_i64_simd
+ .p2align 2
+ .type test_fptosi_f64_i64_simd, at function
+test_fptosi_f64_i64_simd: // @test_fptosi_f64_i64_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzs d0, d0
+ ret
+.Lfunc_end4:
+ .size test_fptosi_f64_i64_simd, .Lfunc_end4-test_fptosi_f64_i64_simd
+ .cfi_endproc
+ // -- End function
+ .globl test_fptosi_f32_i32_simd // -- Begin function test_fptosi_f32_i32_simd
+ .p2align 2
+ .type test_fptosi_f32_i32_simd, at function
+test_fptosi_f32_i32_simd: // @test_fptosi_f32_i32_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzs s0, s0
+ ret
+.Lfunc_end5:
+ .size test_fptosi_f32_i32_simd, .Lfunc_end5-test_fptosi_f32_i32_simd
+ .cfi_endproc
+ // -- End function
+ .globl test_fptoui_f16_i32_simd // -- Begin function test_fptoui_f16_i32_simd
+ .p2align 2
+ .type test_fptoui_f16_i32_simd, at function
+test_fptoui_f16_i32_simd: // @test_fptoui_f16_i32_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzu s0, h0
+ ret
+.Lfunc_end6:
+ .size test_fptoui_f16_i32_simd, .Lfunc_end6-test_fptoui_f16_i32_simd
+ .cfi_endproc
+ // -- End function
+ .globl test_fptoui_f16_i64_simd // -- Begin function test_fptoui_f16_i64_simd
+ .p2align 2
+ .type test_fptoui_f16_i64_simd, at function
+test_fptoui_f16_i64_simd: // @test_fptoui_f16_i64_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzu d0, h0
+ ret
+.Lfunc_end7:
+ .size test_fptoui_f16_i64_simd, .Lfunc_end7-test_fptoui_f16_i64_simd
+ .cfi_endproc
+ // -- End function
+ .globl test_fptoui_f64_i32_simd // -- Begin function test_fptoui_f64_i32_simd
+ .p2align 2
+ .type test_fptoui_f64_i32_simd, at function
+test_fptoui_f64_i32_simd: // @test_fptoui_f64_i32_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzu s0, d0
+ ret
+.Lfunc_end8:
+ .size test_fptoui_f64_i32_simd, .Lfunc_end8-test_fptoui_f64_i32_simd
+ .cfi_endproc
+ // -- End function
+ .globl test_fptoui_f32_i64_simd // -- Begin function test_fptoui_f32_i64_simd
+ .p2align 2
+ .type test_fptoui_f32_i64_simd, at function
+test_fptoui_f32_i64_simd: // @test_fptoui_f32_i64_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzu d0, s0
+ ret
+.Lfunc_end9:
+ .size test_fptoui_f32_i64_simd, .Lfunc_end9-test_fptoui_f32_i64_simd
+ .cfi_endproc
+ // -- End function
+ .globl test_fptoui_f64_i64_simd // -- Begin function test_fptoui_f64_i64_simd
+ .p2align 2
+ .type test_fptoui_f64_i64_simd, at function
+test_fptoui_f64_i64_simd: // @test_fptoui_f64_i64_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzu d0, d0
+ ret
+.Lfunc_end10:
+ .size test_fptoui_f64_i64_simd, .Lfunc_end10-test_fptoui_f64_i64_simd
+ .cfi_endproc
+ // -- End function
+ .globl test_fptoui_f32_i32_simd // -- Begin function test_fptoui_f32_i32_simd
+ .p2align 2
+ .type test_fptoui_f32_i32_simd, at function
+test_fptoui_f32_i32_simd: // @test_fptoui_f32_i32_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzu s0, s0
+ ret
+.Lfunc_end11:
+ .size test_fptoui_f32_i32_simd, .Lfunc_end11-test_fptoui_f32_i32_simd
+ .cfi_endproc
+ // -- End function
+ .globl fptosi_i32_f16_simd // -- Begin function fptosi_i32_f16_simd
+ .p2align 2
+ .type fptosi_i32_f16_simd, at function
+fptosi_i32_f16_simd: // @fptosi_i32_f16_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzs s0, h0
+ ret
+.Lfunc_end12:
+ .size fptosi_i32_f16_simd, .Lfunc_end12-fptosi_i32_f16_simd
+ .cfi_endproc
+ // -- End function
+ .globl fptosi_i64_f16_simd // -- Begin function fptosi_i64_f16_simd
+ .p2align 2
+ .type fptosi_i64_f16_simd, at function
+fptosi_i64_f16_simd: // @fptosi_i64_f16_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzs d0, h0
+ ret
+.Lfunc_end13:
+ .size fptosi_i64_f16_simd, .Lfunc_end13-fptosi_i64_f16_simd
+ .cfi_endproc
+ // -- End function
+ .globl fptosi_i64_f32_simd // -- Begin function fptosi_i64_f32_simd
+ .p2align 2
+ .type fptosi_i64_f32_simd, at function
+fptosi_i64_f32_simd: // @fptosi_i64_f32_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzs d0, s0
+ ret
+.Lfunc_end14:
+ .size fptosi_i64_f32_simd, .Lfunc_end14-fptosi_i64_f32_simd
+ .cfi_endproc
+ // -- End function
+ .globl fptosi_i32_f64_simd // -- Begin function fptosi_i32_f64_simd
+ .p2align 2
+ .type fptosi_i32_f64_simd, at function
+fptosi_i32_f64_simd: // @fptosi_i32_f64_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzs s0, d0
+ ret
+.Lfunc_end15:
+ .size fptosi_i32_f64_simd, .Lfunc_end15-fptosi_i32_f64_simd
+ .cfi_endproc
+ // -- End function
+ .globl fptosi_i64_f64_simd // -- Begin function fptosi_i64_f64_simd
+ .p2align 2
+ .type fptosi_i64_f64_simd, at function
+fptosi_i64_f64_simd: // @fptosi_i64_f64_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzs d0, d0
+ ret
+.Lfunc_end16:
+ .size fptosi_i64_f64_simd, .Lfunc_end16-fptosi_i64_f64_simd
+ .cfi_endproc
+ // -- End function
+ .globl fptosi_i32_f32_simd // -- Begin function fptosi_i32_f32_simd
+ .p2align 2
+ .type fptosi_i32_f32_simd, at function
+fptosi_i32_f32_simd: // @fptosi_i32_f32_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzs s0, s0
+ ret
+.Lfunc_end17:
+ .size fptosi_i32_f32_simd, .Lfunc_end17-fptosi_i32_f32_simd
+ .cfi_endproc
+ // -- End function
+ .globl fptoui_i32_f16_simd // -- Begin function fptoui_i32_f16_simd
+ .p2align 2
+ .type fptoui_i32_f16_simd, at function
+fptoui_i32_f16_simd: // @fptoui_i32_f16_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzu s0, h0
+ ret
+.Lfunc_end18:
+ .size fptoui_i32_f16_simd, .Lfunc_end18-fptoui_i32_f16_simd
+ .cfi_endproc
+ // -- End function
+ .globl fptoui_i64_f16_simd // -- Begin function fptoui_i64_f16_simd
+ .p2align 2
+ .type fptoui_i64_f16_simd, at function
+fptoui_i64_f16_simd: // @fptoui_i64_f16_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzu d0, h0
+ ret
+.Lfunc_end19:
+ .size fptoui_i64_f16_simd, .Lfunc_end19-fptoui_i64_f16_simd
+ .cfi_endproc
+ // -- End function
+ .globl fptoui_i64_f32_simd // -- Begin function fptoui_i64_f32_simd
+ .p2align 2
+ .type fptoui_i64_f32_simd, at function
+fptoui_i64_f32_simd: // @fptoui_i64_f32_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzu d0, s0
+ ret
+.Lfunc_end20:
+ .size fptoui_i64_f32_simd, .Lfunc_end20-fptoui_i64_f32_simd
+ .cfi_endproc
+ // -- End function
+ .globl fptoui_i32_f64_simd // -- Begin function fptoui_i32_f64_simd
+ .p2align 2
+ .type fptoui_i32_f64_simd, at function
+fptoui_i32_f64_simd: // @fptoui_i32_f64_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzu s0, d0
+ ret
+.Lfunc_end21:
+ .size fptoui_i32_f64_simd, .Lfunc_end21-fptoui_i32_f64_simd
+ .cfi_endproc
+ // -- End function
+ .globl fptoui_i64_f64_simd // -- Begin function fptoui_i64_f64_simd
+ .p2align 2
+ .type fptoui_i64_f64_simd, at function
+fptoui_i64_f64_simd: // @fptoui_i64_f64_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzu d0, d0
+ ret
+.Lfunc_end22:
+ .size fptoui_i64_f64_simd, .Lfunc_end22-fptoui_i64_f64_simd
+ .cfi_endproc
+ // -- End function
+ .globl fptoui_i32_f32_simd // -- Begin function fptoui_i32_f32_simd
+ .p2align 2
+ .type fptoui_i32_f32_simd, at function
+fptoui_i32_f32_simd: // @fptoui_i32_f32_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzu s0, s0
+ ret
+.Lfunc_end23:
+ .size fptoui_i32_f32_simd, .Lfunc_end23-fptoui_i32_f32_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtas_ds_round_simd // -- Begin function fcvtas_ds_round_simd
+ .p2align 2
+ .type fcvtas_ds_round_simd, at function
+fcvtas_ds_round_simd: // @fcvtas_ds_round_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtas d0, s0
+ ret
+.Lfunc_end24:
+ .size fcvtas_ds_round_simd, .Lfunc_end24-fcvtas_ds_round_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtas_sd_round_simd // -- Begin function fcvtas_sd_round_simd
+ .p2align 2
+ .type fcvtas_sd_round_simd, at function
+fcvtas_sd_round_simd: // @fcvtas_sd_round_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtas s0, d0
+ ret
+.Lfunc_end25:
+ .size fcvtas_sd_round_simd, .Lfunc_end25-fcvtas_sd_round_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtas_ss_round_simd // -- Begin function fcvtas_ss_round_simd
+ .p2align 2
+ .type fcvtas_ss_round_simd, at function
+fcvtas_ss_round_simd: // @fcvtas_ss_round_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtas s0, s0
+ ret
+.Lfunc_end26:
+ .size fcvtas_ss_round_simd, .Lfunc_end26-fcvtas_ss_round_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtas_dd_round_simd // -- Begin function fcvtas_dd_round_simd
+ .p2align 2
+ .type fcvtas_dd_round_simd, at function
+fcvtas_dd_round_simd: // @fcvtas_dd_round_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtas d0, d0
+ ret
+.Lfunc_end27:
+ .size fcvtas_dd_round_simd, .Lfunc_end27-fcvtas_dd_round_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtau_ds_round_simd // -- Begin function fcvtau_ds_round_simd
+ .p2align 2
+ .type fcvtau_ds_round_simd, at function
+fcvtau_ds_round_simd: // @fcvtau_ds_round_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtau d0, s0
+ ret
+.Lfunc_end28:
+ .size fcvtau_ds_round_simd, .Lfunc_end28-fcvtau_ds_round_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtau_sd_round_simd // -- Begin function fcvtau_sd_round_simd
+ .p2align 2
+ .type fcvtau_sd_round_simd, at function
+fcvtau_sd_round_simd: // @fcvtau_sd_round_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtau s0, d0
+ ret
+.Lfunc_end29:
+ .size fcvtau_sd_round_simd, .Lfunc_end29-fcvtau_sd_round_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtau_ss_round_simd // -- Begin function fcvtau_ss_round_simd
+ .p2align 2
+ .type fcvtau_ss_round_simd, at function
+fcvtau_ss_round_simd: // @fcvtau_ss_round_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtas s0, s0
+ ret
+.Lfunc_end30:
+ .size fcvtau_ss_round_simd, .Lfunc_end30-fcvtau_ss_round_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtau_dd_round_simd // -- Begin function fcvtau_dd_round_simd
+ .p2align 2
+ .type fcvtau_dd_round_simd, at function
+fcvtau_dd_round_simd: // @fcvtau_dd_round_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtas d0, d0
+ ret
+.Lfunc_end31:
+ .size fcvtau_dd_round_simd, .Lfunc_end31-fcvtau_dd_round_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtms_ds_round_simd // -- Begin function fcvtms_ds_round_simd
+ .p2align 2
+ .type fcvtms_ds_round_simd, at function
+fcvtms_ds_round_simd: // @fcvtms_ds_round_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtms d0, s0
+ ret
+.Lfunc_end32:
+ .size fcvtms_ds_round_simd, .Lfunc_end32-fcvtms_ds_round_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtms_sd_round_simd // -- Begin function fcvtms_sd_round_simd
+ .p2align 2
+ .type fcvtms_sd_round_simd, at function
+fcvtms_sd_round_simd: // @fcvtms_sd_round_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtms s0, d0
+ ret
+.Lfunc_end33:
+ .size fcvtms_sd_round_simd, .Lfunc_end33-fcvtms_sd_round_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtms_ss_round_simd // -- Begin function fcvtms_ss_round_simd
+ .p2align 2
+ .type fcvtms_ss_round_simd, at function
+fcvtms_ss_round_simd: // @fcvtms_ss_round_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtms s0, s0
+ ret
+.Lfunc_end34:
+ .size fcvtms_ss_round_simd, .Lfunc_end34-fcvtms_ss_round_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtms_dd_round_simd // -- Begin function fcvtms_dd_round_simd
+ .p2align 2
+ .type fcvtms_dd_round_simd, at function
+fcvtms_dd_round_simd: // @fcvtms_dd_round_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtms d0, d0
+ ret
+.Lfunc_end35:
+ .size fcvtms_dd_round_simd, .Lfunc_end35-fcvtms_dd_round_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtmu_ds_round_simd // -- Begin function fcvtmu_ds_round_simd
+ .p2align 2
+ .type fcvtmu_ds_round_simd, at function
+fcvtmu_ds_round_simd: // @fcvtmu_ds_round_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtmu d0, s0
+ ret
+.Lfunc_end36:
+ .size fcvtmu_ds_round_simd, .Lfunc_end36-fcvtmu_ds_round_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtmu_sd_round_simd // -- Begin function fcvtmu_sd_round_simd
+ .p2align 2
+ .type fcvtmu_sd_round_simd, at function
+fcvtmu_sd_round_simd: // @fcvtmu_sd_round_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtmu s0, d0
+ ret
+.Lfunc_end37:
+ .size fcvtmu_sd_round_simd, .Lfunc_end37-fcvtmu_sd_round_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtmu_ss_round_simd // -- Begin function fcvtmu_ss_round_simd
+ .p2align 2
+ .type fcvtmu_ss_round_simd, at function
+fcvtmu_ss_round_simd: // @fcvtmu_ss_round_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtms s0, s0
+ ret
+.Lfunc_end38:
+ .size fcvtmu_ss_round_simd, .Lfunc_end38-fcvtmu_ss_round_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtmu_dd_round_simd // -- Begin function fcvtmu_dd_round_simd
+ .p2align 2
+ .type fcvtmu_dd_round_simd, at function
+fcvtmu_dd_round_simd: // @fcvtmu_dd_round_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtms d0, d0
+ ret
+.Lfunc_end39:
+ .size fcvtmu_dd_round_simd, .Lfunc_end39-fcvtmu_dd_round_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtps_ds_round_simd // -- Begin function fcvtps_ds_round_simd
+ .p2align 2
+ .type fcvtps_ds_round_simd, at function
+fcvtps_ds_round_simd: // @fcvtps_ds_round_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtps d0, s0
+ ret
+.Lfunc_end40:
+ .size fcvtps_ds_round_simd, .Lfunc_end40-fcvtps_ds_round_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtps_sd_round_simd // -- Begin function fcvtps_sd_round_simd
+ .p2align 2
+ .type fcvtps_sd_round_simd, at function
+fcvtps_sd_round_simd: // @fcvtps_sd_round_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtps s0, d0
+ ret
+.Lfunc_end41:
+ .size fcvtps_sd_round_simd, .Lfunc_end41-fcvtps_sd_round_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtps_ss_round_simd // -- Begin function fcvtps_ss_round_simd
+ .p2align 2
+ .type fcvtps_ss_round_simd, at function
+fcvtps_ss_round_simd: // @fcvtps_ss_round_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtps s0, s0
+ ret
+.Lfunc_end42:
+ .size fcvtps_ss_round_simd, .Lfunc_end42-fcvtps_ss_round_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtps_dd_round_simd // -- Begin function fcvtps_dd_round_simd
+ .p2align 2
+ .type fcvtps_dd_round_simd, at function
+fcvtps_dd_round_simd: // @fcvtps_dd_round_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtps d0, d0
+ ret
+.Lfunc_end43:
+ .size fcvtps_dd_round_simd, .Lfunc_end43-fcvtps_dd_round_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtpu_ds_round_simd // -- Begin function fcvtpu_ds_round_simd
+ .p2align 2
+ .type fcvtpu_ds_round_simd, at function
+fcvtpu_ds_round_simd: // @fcvtpu_ds_round_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtpu d0, s0
+ ret
+.Lfunc_end44:
+ .size fcvtpu_ds_round_simd, .Lfunc_end44-fcvtpu_ds_round_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtpu_sd_round_simd // -- Begin function fcvtpu_sd_round_simd
+ .p2align 2
+ .type fcvtpu_sd_round_simd, at function
+fcvtpu_sd_round_simd: // @fcvtpu_sd_round_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtpu s0, d0
+ ret
+.Lfunc_end45:
+ .size fcvtpu_sd_round_simd, .Lfunc_end45-fcvtpu_sd_round_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtpu_ss_round_simd // -- Begin function fcvtpu_ss_round_simd
+ .p2align 2
+ .type fcvtpu_ss_round_simd, at function
+fcvtpu_ss_round_simd: // @fcvtpu_ss_round_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtps s0, s0
+ ret
+.Lfunc_end46:
+ .size fcvtpu_ss_round_simd, .Lfunc_end46-fcvtpu_ss_round_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtpu_dd_round_simd // -- Begin function fcvtpu_dd_round_simd
+ .p2align 2
+ .type fcvtpu_dd_round_simd, at function
+fcvtpu_dd_round_simd: // @fcvtpu_dd_round_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtps d0, d0
+ ret
+.Lfunc_end47:
+ .size fcvtpu_dd_round_simd, .Lfunc_end47-fcvtpu_dd_round_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtzs_ds_round_simd // -- Begin function fcvtzs_ds_round_simd
+ .p2align 2
+ .type fcvtzs_ds_round_simd, at function
+fcvtzs_ds_round_simd: // @fcvtzs_ds_round_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzs d0, s0
+ ret
+.Lfunc_end48:
+ .size fcvtzs_ds_round_simd, .Lfunc_end48-fcvtzs_ds_round_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtzs_sd_round_simd // -- Begin function fcvtzs_sd_round_simd
+ .p2align 2
+ .type fcvtzs_sd_round_simd, at function
+fcvtzs_sd_round_simd: // @fcvtzs_sd_round_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzs s0, d0
+ ret
+.Lfunc_end49:
+ .size fcvtzs_sd_round_simd, .Lfunc_end49-fcvtzs_sd_round_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtzs_ss_round_simd // -- Begin function fcvtzs_ss_round_simd
+ .p2align 2
+ .type fcvtzs_ss_round_simd, at function
+fcvtzs_ss_round_simd: // @fcvtzs_ss_round_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzs s0, s0
+ ret
+.Lfunc_end50:
+ .size fcvtzs_ss_round_simd, .Lfunc_end50-fcvtzs_ss_round_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtzs_dd_round_simd // -- Begin function fcvtzs_dd_round_simd
+ .p2align 2
+ .type fcvtzs_dd_round_simd, at function
+fcvtzs_dd_round_simd: // @fcvtzs_dd_round_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzs d0, d0
+ ret
+.Lfunc_end51:
+ .size fcvtzs_dd_round_simd, .Lfunc_end51-fcvtzs_dd_round_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtzu_ds_round_simd // -- Begin function fcvtzu_ds_round_simd
+ .p2align 2
+ .type fcvtzu_ds_round_simd, at function
+fcvtzu_ds_round_simd: // @fcvtzu_ds_round_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzu d0, s0
+ ret
+.Lfunc_end52:
+ .size fcvtzu_ds_round_simd, .Lfunc_end52-fcvtzu_ds_round_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtzu_sd_round_simd // -- Begin function fcvtzu_sd_round_simd
+ .p2align 2
+ .type fcvtzu_sd_round_simd, at function
+fcvtzu_sd_round_simd: // @fcvtzu_sd_round_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzu s0, d0
+ ret
+.Lfunc_end53:
+ .size fcvtzu_sd_round_simd, .Lfunc_end53-fcvtzu_sd_round_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtzu_ss_round_simd // -- Begin function fcvtzu_ss_round_simd
+ .p2align 2
+ .type fcvtzu_ss_round_simd, at function
+fcvtzu_ss_round_simd: // @fcvtzu_ss_round_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzs s0, s0
+ ret
+.Lfunc_end54:
+ .size fcvtzu_ss_round_simd, .Lfunc_end54-fcvtzu_ss_round_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtzu_dd_round_simd // -- Begin function fcvtzu_dd_round_simd
+ .p2align 2
+ .type fcvtzu_dd_round_simd, at function
+fcvtzu_dd_round_simd: // @fcvtzu_dd_round_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzs d0, d0
+ ret
+.Lfunc_end55:
+ .size fcvtzu_dd_round_simd, .Lfunc_end55-fcvtzu_dd_round_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtzs_sh_sat_simd // -- Begin function fcvtzs_sh_sat_simd
+ .p2align 2
+ .type fcvtzs_sh_sat_simd, at function
+fcvtzs_sh_sat_simd: // @fcvtzs_sh_sat_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzs s0, h0
+ ret
+.Lfunc_end56:
+ .size fcvtzs_sh_sat_simd, .Lfunc_end56-fcvtzs_sh_sat_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtzs_dh_sat_simd // -- Begin function fcvtzs_dh_sat_simd
+ .p2align 2
+ .type fcvtzs_dh_sat_simd, at function
+fcvtzs_dh_sat_simd: // @fcvtzs_dh_sat_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzs d0, h0
+ ret
+.Lfunc_end57:
+ .size fcvtzs_dh_sat_simd, .Lfunc_end57-fcvtzs_dh_sat_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtzs_ds_sat_simd // -- Begin function fcvtzs_ds_sat_simd
+ .p2align 2
+ .type fcvtzs_ds_sat_simd, at function
+fcvtzs_ds_sat_simd: // @fcvtzs_ds_sat_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzs d0, s0
+ ret
+.Lfunc_end58:
+ .size fcvtzs_ds_sat_simd, .Lfunc_end58-fcvtzs_ds_sat_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtzs_sd_sat_simd // -- Begin function fcvtzs_sd_sat_simd
+ .p2align 2
+ .type fcvtzs_sd_sat_simd, at function
+fcvtzs_sd_sat_simd: // @fcvtzs_sd_sat_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzs s0, d0
+ ret
+.Lfunc_end59:
+ .size fcvtzs_sd_sat_simd, .Lfunc_end59-fcvtzs_sd_sat_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtzs_ss_sat_simd // -- Begin function fcvtzs_ss_sat_simd
+ .p2align 2
+ .type fcvtzs_ss_sat_simd, at function
+fcvtzs_ss_sat_simd: // @fcvtzs_ss_sat_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzs s0, s0
+ ret
+.Lfunc_end60:
+ .size fcvtzs_ss_sat_simd, .Lfunc_end60-fcvtzs_ss_sat_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtzs_dd_sat_simd // -- Begin function fcvtzs_dd_sat_simd
+ .p2align 2
+ .type fcvtzs_dd_sat_simd, at function
+fcvtzs_dd_sat_simd: // @fcvtzs_dd_sat_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzs d0, d0
+ ret
+.Lfunc_end61:
+ .size fcvtzs_dd_sat_simd, .Lfunc_end61-fcvtzs_dd_sat_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtzu_sh_sat_simd // -- Begin function fcvtzu_sh_sat_simd
+ .p2align 2
+ .type fcvtzu_sh_sat_simd, at function
+fcvtzu_sh_sat_simd: // @fcvtzu_sh_sat_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzu s0, h0
+ ret
+.Lfunc_end62:
+ .size fcvtzu_sh_sat_simd, .Lfunc_end62-fcvtzu_sh_sat_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtzu_dh_sat_simd // -- Begin function fcvtzu_dh_sat_simd
+ .p2align 2
+ .type fcvtzu_dh_sat_simd, at function
+fcvtzu_dh_sat_simd: // @fcvtzu_dh_sat_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzu d0, h0
+ ret
+.Lfunc_end63:
+ .size fcvtzu_dh_sat_simd, .Lfunc_end63-fcvtzu_dh_sat_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtzu_ds_sat_simd // -- Begin function fcvtzu_ds_sat_simd
+ .p2align 2
+ .type fcvtzu_ds_sat_simd, at function
+fcvtzu_ds_sat_simd: // @fcvtzu_ds_sat_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzu d0, s0
+ ret
+.Lfunc_end64:
+ .size fcvtzu_ds_sat_simd, .Lfunc_end64-fcvtzu_ds_sat_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtzu_sd_sat_simd // -- Begin function fcvtzu_sd_sat_simd
+ .p2align 2
+ .type fcvtzu_sd_sat_simd, at function
+fcvtzu_sd_sat_simd: // @fcvtzu_sd_sat_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzu s0, d0
+ ret
+.Lfunc_end65:
+ .size fcvtzu_sd_sat_simd, .Lfunc_end65-fcvtzu_sd_sat_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtzu_ss_sat_simd // -- Begin function fcvtzu_ss_sat_simd
+ .p2align 2
+ .type fcvtzu_ss_sat_simd, at function
+fcvtzu_ss_sat_simd: // @fcvtzu_ss_sat_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzs s0, s0
+ ret
+.Lfunc_end66:
+ .size fcvtzu_ss_sat_simd, .Lfunc_end66-fcvtzu_ss_sat_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtzu_dd_sat_simd // -- Begin function fcvtzu_dd_sat_simd
+ .p2align 2
+ .type fcvtzu_dd_sat_simd, at function
+fcvtzu_dd_sat_simd: // @fcvtzu_dd_sat_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzs d0, d0
+ ret
+.Lfunc_end67:
+ .size fcvtzu_dd_sat_simd, .Lfunc_end67-fcvtzu_dd_sat_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtas_sh_simd // -- Begin function fcvtas_sh_simd
+ .p2align 2
+ .type fcvtas_sh_simd, at function
+fcvtas_sh_simd: // @fcvtas_sh_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtas s0, h0
+ ret
+.Lfunc_end68:
+ .size fcvtas_sh_simd, .Lfunc_end68-fcvtas_sh_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtas_dh_simd // -- Begin function fcvtas_dh_simd
+ .p2align 2
+ .type fcvtas_dh_simd, at function
+fcvtas_dh_simd: // @fcvtas_dh_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtas d0, h0
+ ret
+.Lfunc_end69:
+ .size fcvtas_dh_simd, .Lfunc_end69-fcvtas_dh_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtas_ds_simd // -- Begin function fcvtas_ds_simd
+ .p2align 2
+ .type fcvtas_ds_simd, at function
+fcvtas_ds_simd: // @fcvtas_ds_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtas d0, s0
+ ret
+.Lfunc_end70:
+ .size fcvtas_ds_simd, .Lfunc_end70-fcvtas_ds_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtas_sd_simd // -- Begin function fcvtas_sd_simd
+ .p2align 2
+ .type fcvtas_sd_simd, at function
+fcvtas_sd_simd: // @fcvtas_sd_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtas s0, d0
+ ret
+.Lfunc_end71:
+ .size fcvtas_sd_simd, .Lfunc_end71-fcvtas_sd_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtas_ss_simd // -- Begin function fcvtas_ss_simd
+ .p2align 2
+ .type fcvtas_ss_simd, at function
+fcvtas_ss_simd: // @fcvtas_ss_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtas s0, s0
+ ret
+.Lfunc_end72:
+ .size fcvtas_ss_simd, .Lfunc_end72-fcvtas_ss_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtas_dd_simd // -- Begin function fcvtas_dd_simd
+ .p2align 2
+ .type fcvtas_dd_simd, at function
+fcvtas_dd_simd: // @fcvtas_dd_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtas d0, d0
+ ret
+.Lfunc_end73:
+ .size fcvtas_dd_simd, .Lfunc_end73-fcvtas_dd_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtau_sh_simd // -- Begin function fcvtau_sh_simd
+ .p2align 2
+ .type fcvtau_sh_simd, at function
+fcvtau_sh_simd: // @fcvtau_sh_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtau s0, h0
+ ret
+.Lfunc_end74:
+ .size fcvtau_sh_simd, .Lfunc_end74-fcvtau_sh_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtau_dh_simd // -- Begin function fcvtau_dh_simd
+ .p2align 2
+ .type fcvtau_dh_simd, at function
+fcvtau_dh_simd: // @fcvtau_dh_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtau d0, h0
+ ret
+.Lfunc_end75:
+ .size fcvtau_dh_simd, .Lfunc_end75-fcvtau_dh_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtau_ds_simd // -- Begin function fcvtau_ds_simd
+ .p2align 2
+ .type fcvtau_ds_simd, at function
+fcvtau_ds_simd: // @fcvtau_ds_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtau d0, s0
+ ret
+.Lfunc_end76:
+ .size fcvtau_ds_simd, .Lfunc_end76-fcvtau_ds_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtau_sd_simd // -- Begin function fcvtau_sd_simd
+ .p2align 2
+ .type fcvtau_sd_simd, at function
+fcvtau_sd_simd: // @fcvtau_sd_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtau s0, d0
+ ret
+.Lfunc_end77:
+ .size fcvtau_sd_simd, .Lfunc_end77-fcvtau_sd_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtau_ss_simd // -- Begin function fcvtau_ss_simd
+ .p2align 2
+ .type fcvtau_ss_simd, at function
+fcvtau_ss_simd: // @fcvtau_ss_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtas s0, s0
+ ret
+.Lfunc_end78:
+ .size fcvtau_ss_simd, .Lfunc_end78-fcvtau_ss_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtau_dd_simd // -- Begin function fcvtau_dd_simd
+ .p2align 2
+ .type fcvtau_dd_simd, at function
+fcvtau_dd_simd: // @fcvtau_dd_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtas d0, d0
+ ret
+.Lfunc_end79:
+ .size fcvtau_dd_simd, .Lfunc_end79-fcvtau_dd_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtms_sh_simd // -- Begin function fcvtms_sh_simd
+ .p2align 2
+ .type fcvtms_sh_simd, at function
+fcvtms_sh_simd: // @fcvtms_sh_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtms s0, h0
+ ret
+.Lfunc_end80:
+ .size fcvtms_sh_simd, .Lfunc_end80-fcvtms_sh_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtms_dh_simd // -- Begin function fcvtms_dh_simd
+ .p2align 2
+ .type fcvtms_dh_simd, at function
+fcvtms_dh_simd: // @fcvtms_dh_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtms d0, h0
+ ret
+.Lfunc_end81:
+ .size fcvtms_dh_simd, .Lfunc_end81-fcvtms_dh_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtms_ds_simd // -- Begin function fcvtms_ds_simd
+ .p2align 2
+ .type fcvtms_ds_simd, at function
+fcvtms_ds_simd: // @fcvtms_ds_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtms d0, s0
+ ret
+.Lfunc_end82:
+ .size fcvtms_ds_simd, .Lfunc_end82-fcvtms_ds_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtms_sd_simd // -- Begin function fcvtms_sd_simd
+ .p2align 2
+ .type fcvtms_sd_simd, at function
+fcvtms_sd_simd: // @fcvtms_sd_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtms s0, d0
+ ret
+.Lfunc_end83:
+ .size fcvtms_sd_simd, .Lfunc_end83-fcvtms_sd_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtms_ss_simd // -- Begin function fcvtms_ss_simd
+ .p2align 2
+ .type fcvtms_ss_simd, at function
+fcvtms_ss_simd: // @fcvtms_ss_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtms s0, s0
+ ret
+.Lfunc_end84:
+ .size fcvtms_ss_simd, .Lfunc_end84-fcvtms_ss_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtms_dd_simd // -- Begin function fcvtms_dd_simd
+ .p2align 2
+ .type fcvtms_dd_simd, at function
+fcvtms_dd_simd: // @fcvtms_dd_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtms d0, d0
+ ret
+.Lfunc_end85:
+ .size fcvtms_dd_simd, .Lfunc_end85-fcvtms_dd_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtmu_sh_simd // -- Begin function fcvtmu_sh_simd
+ .p2align 2
+ .type fcvtmu_sh_simd, at function
+fcvtmu_sh_simd: // @fcvtmu_sh_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtmu s0, h0
+ ret
+.Lfunc_end86:
+ .size fcvtmu_sh_simd, .Lfunc_end86-fcvtmu_sh_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtmu_dh_simd // -- Begin function fcvtmu_dh_simd
+ .p2align 2
+ .type fcvtmu_dh_simd, at function
+fcvtmu_dh_simd: // @fcvtmu_dh_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtmu d0, h0
+ ret
+.Lfunc_end87:
+ .size fcvtmu_dh_simd, .Lfunc_end87-fcvtmu_dh_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtmu_ds_simd // -- Begin function fcvtmu_ds_simd
+ .p2align 2
+ .type fcvtmu_ds_simd, at function
+fcvtmu_ds_simd: // @fcvtmu_ds_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtmu d0, s0
+ ret
+.Lfunc_end88:
+ .size fcvtmu_ds_simd, .Lfunc_end88-fcvtmu_ds_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtmu_sd_simd // -- Begin function fcvtmu_sd_simd
+ .p2align 2
+ .type fcvtmu_sd_simd, at function
+fcvtmu_sd_simd: // @fcvtmu_sd_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtmu s0, d0
+ ret
+.Lfunc_end89:
+ .size fcvtmu_sd_simd, .Lfunc_end89-fcvtmu_sd_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtmu_ss_simd // -- Begin function fcvtmu_ss_simd
+ .p2align 2
+ .type fcvtmu_ss_simd, at function
+fcvtmu_ss_simd: // @fcvtmu_ss_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtms s0, s0
+ ret
+.Lfunc_end90:
+ .size fcvtmu_ss_simd, .Lfunc_end90-fcvtmu_ss_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtmu_dd_simd // -- Begin function fcvtmu_dd_simd
+ .p2align 2
+ .type fcvtmu_dd_simd, at function
+fcvtmu_dd_simd: // @fcvtmu_dd_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtms d0, d0
+ ret
+.Lfunc_end91:
+ .size fcvtmu_dd_simd, .Lfunc_end91-fcvtmu_dd_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtps_sh_simd // -- Begin function fcvtps_sh_simd
+ .p2align 2
+ .type fcvtps_sh_simd, at function
+fcvtps_sh_simd: // @fcvtps_sh_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtps s0, h0
+ ret
+.Lfunc_end92:
+ .size fcvtps_sh_simd, .Lfunc_end92-fcvtps_sh_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtps_dh_simd // -- Begin function fcvtps_dh_simd
+ .p2align 2
+ .type fcvtps_dh_simd, at function
+fcvtps_dh_simd: // @fcvtps_dh_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtps d0, h0
+ ret
+.Lfunc_end93:
+ .size fcvtps_dh_simd, .Lfunc_end93-fcvtps_dh_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtps_ds_simd // -- Begin function fcvtps_ds_simd
+ .p2align 2
+ .type fcvtps_ds_simd, at function
+fcvtps_ds_simd: // @fcvtps_ds_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtps d0, s0
+ ret
+.Lfunc_end94:
+ .size fcvtps_ds_simd, .Lfunc_end94-fcvtps_ds_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtps_sd_simd // -- Begin function fcvtps_sd_simd
+ .p2align 2
+ .type fcvtps_sd_simd, at function
+fcvtps_sd_simd: // @fcvtps_sd_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtps s0, d0
+ ret
+.Lfunc_end95:
+ .size fcvtps_sd_simd, .Lfunc_end95-fcvtps_sd_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtps_ss_simd // -- Begin function fcvtps_ss_simd
+ .p2align 2
+ .type fcvtps_ss_simd, at function
+fcvtps_ss_simd: // @fcvtps_ss_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtps s0, s0
+ ret
+.Lfunc_end96:
+ .size fcvtps_ss_simd, .Lfunc_end96-fcvtps_ss_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtps_dd_simd // -- Begin function fcvtps_dd_simd
+ .p2align 2
+ .type fcvtps_dd_simd, at function
+fcvtps_dd_simd: // @fcvtps_dd_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtps d0, d0
+ ret
+.Lfunc_end97:
+ .size fcvtps_dd_simd, .Lfunc_end97-fcvtps_dd_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtpu_sh_simd // -- Begin function fcvtpu_sh_simd
+ .p2align 2
+ .type fcvtpu_sh_simd, at function
+fcvtpu_sh_simd: // @fcvtpu_sh_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtpu s0, h0
+ ret
+.Lfunc_end98:
+ .size fcvtpu_sh_simd, .Lfunc_end98-fcvtpu_sh_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtpu_dh_simd // -- Begin function fcvtpu_dh_simd
+ .p2align 2
+ .type fcvtpu_dh_simd, at function
+fcvtpu_dh_simd: // @fcvtpu_dh_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtpu d0, h0
+ ret
+.Lfunc_end99:
+ .size fcvtpu_dh_simd, .Lfunc_end99-fcvtpu_dh_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtpu_ds_simd // -- Begin function fcvtpu_ds_simd
+ .p2align 2
+ .type fcvtpu_ds_simd, at function
+fcvtpu_ds_simd: // @fcvtpu_ds_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtpu d0, s0
+ ret
+.Lfunc_end100:
+ .size fcvtpu_ds_simd, .Lfunc_end100-fcvtpu_ds_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtpu_sd_simd // -- Begin function fcvtpu_sd_simd
+ .p2align 2
+ .type fcvtpu_sd_simd, at function
+fcvtpu_sd_simd: // @fcvtpu_sd_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtpu s0, d0
+ ret
+.Lfunc_end101:
+ .size fcvtpu_sd_simd, .Lfunc_end101-fcvtpu_sd_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtpu_ss_simd // -- Begin function fcvtpu_ss_simd
+ .p2align 2
+ .type fcvtpu_ss_simd, at function
+fcvtpu_ss_simd: // @fcvtpu_ss_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtps s0, s0
+ ret
+.Lfunc_end102:
+ .size fcvtpu_ss_simd, .Lfunc_end102-fcvtpu_ss_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtpu_dd_simd // -- Begin function fcvtpu_dd_simd
+ .p2align 2
+ .type fcvtpu_dd_simd, at function
+fcvtpu_dd_simd: // @fcvtpu_dd_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtps d0, d0
+ ret
+.Lfunc_end103:
+ .size fcvtpu_dd_simd, .Lfunc_end103-fcvtpu_dd_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtzs_sh_simd // -- Begin function fcvtzs_sh_simd
+ .p2align 2
+ .type fcvtzs_sh_simd, at function
+fcvtzs_sh_simd: // @fcvtzs_sh_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzs s0, h0
+ ret
+.Lfunc_end104:
+ .size fcvtzs_sh_simd, .Lfunc_end104-fcvtzs_sh_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtzs_dh_simd // -- Begin function fcvtzs_dh_simd
+ .p2align 2
+ .type fcvtzs_dh_simd, at function
+fcvtzs_dh_simd: // @fcvtzs_dh_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzs d0, h0
+ ret
+.Lfunc_end105:
+ .size fcvtzs_dh_simd, .Lfunc_end105-fcvtzs_dh_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtzs_ds_simd // -- Begin function fcvtzs_ds_simd
+ .p2align 2
+ .type fcvtzs_ds_simd, at function
+fcvtzs_ds_simd: // @fcvtzs_ds_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzs d0, s0
+ ret
+.Lfunc_end106:
+ .size fcvtzs_ds_simd, .Lfunc_end106-fcvtzs_ds_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtzs_sd_simd // -- Begin function fcvtzs_sd_simd
+ .p2align 2
+ .type fcvtzs_sd_simd, at function
+fcvtzs_sd_simd: // @fcvtzs_sd_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzs s0, d0
+ ret
+.Lfunc_end107:
+ .size fcvtzs_sd_simd, .Lfunc_end107-fcvtzs_sd_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtzs_ss_simd // -- Begin function fcvtzs_ss_simd
+ .p2align 2
+ .type fcvtzs_ss_simd, at function
+fcvtzs_ss_simd: // @fcvtzs_ss_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzs s0, s0
+ ret
+.Lfunc_end108:
+ .size fcvtzs_ss_simd, .Lfunc_end108-fcvtzs_ss_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtzs_dd_simd // -- Begin function fcvtzs_dd_simd
+ .p2align 2
+ .type fcvtzs_dd_simd, at function
+fcvtzs_dd_simd: // @fcvtzs_dd_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzs d0, d0
+ ret
+.Lfunc_end109:
+ .size fcvtzs_dd_simd, .Lfunc_end109-fcvtzs_dd_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtzu_sh_simd // -- Begin function fcvtzu_sh_simd
+ .p2align 2
+ .type fcvtzu_sh_simd, at function
+fcvtzu_sh_simd: // @fcvtzu_sh_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzu s0, h0
+ ret
+.Lfunc_end110:
+ .size fcvtzu_sh_simd, .Lfunc_end110-fcvtzu_sh_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtzu_dh_simd // -- Begin function fcvtzu_dh_simd
+ .p2align 2
+ .type fcvtzu_dh_simd, at function
+fcvtzu_dh_simd: // @fcvtzu_dh_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzu d0, h0
+ ret
+.Lfunc_end111:
+ .size fcvtzu_dh_simd, .Lfunc_end111-fcvtzu_dh_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtzu_ds_simd // -- Begin function fcvtzu_ds_simd
+ .p2align 2
+ .type fcvtzu_ds_simd, at function
+fcvtzu_ds_simd: // @fcvtzu_ds_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzu d0, s0
+ ret
+.Lfunc_end112:
+ .size fcvtzu_ds_simd, .Lfunc_end112-fcvtzu_ds_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtzu_sd_simd // -- Begin function fcvtzu_sd_simd
+ .p2align 2
+ .type fcvtzu_sd_simd, at function
+fcvtzu_sd_simd: // @fcvtzu_sd_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzu s0, d0
+ ret
+.Lfunc_end113:
+ .size fcvtzu_sd_simd, .Lfunc_end113-fcvtzu_sd_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtzu_ss_simd // -- Begin function fcvtzu_ss_simd
+ .p2align 2
+ .type fcvtzu_ss_simd, at function
+fcvtzu_ss_simd: // @fcvtzu_ss_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzu s0, s0
+ ret
+.Lfunc_end114:
+ .size fcvtzu_ss_simd, .Lfunc_end114-fcvtzu_ss_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtzu_dd_simd // -- Begin function fcvtzu_dd_simd
+ .p2align 2
+ .type fcvtzu_dd_simd, at function
+fcvtzu_dd_simd: // @fcvtzu_dd_simd
+ .cfi_startproc
+// %bb.0:
+ fcvtzu d0, d0
+ ret
+.Lfunc_end115:
+ .size fcvtzu_dd_simd, .Lfunc_end115-fcvtzu_dd_simd
+ .cfi_endproc
+ // -- End function
+ .globl fcvtzs_scalar_to_vector_h // -- Begin function fcvtzs_scalar_to_vector_h
+ .p2align 2
+ .type fcvtzs_scalar_to_vector_h, at function
+fcvtzs_scalar_to_vector_h: // @fcvtzs_scalar_to_vector_h
+ .cfi_startproc
+// %bb.0:
+ fcvtzs d0, h0
+ ret
+.Lfunc_end116:
+ .size fcvtzs_scalar_to_vector_h, .Lfunc_end116-fcvtzs_scalar_to_vector_h
+ .cfi_endproc
+ // -- End function
+ .globl fcvtzs_scalar_to_vector_s // -- Begin function fcvtzs_scalar_to_vector_s
+ .p2align 2
+ .type fcvtzs_scalar_to_vector_s, at function
+fcvtzs_scalar_to_vector_s: // @fcvtzs_scalar_to_vector_s
+ .cfi_startproc
+// %bb.0:
+ fcvtzs d0, s0
+ ret
+.Lfunc_end117:
+ .size fcvtzs_scalar_to_vector_s, .Lfunc_end117-fcvtzs_scalar_to_vector_s
+ .cfi_endproc
+ // -- End function
+ .globl fcvtzs_scalar_to_vector_d // -- Begin function fcvtzs_scalar_to_vector_d
+ .p2align 2
+ .type fcvtzs_scalar_to_vector_d, at function
+fcvtzs_scalar_to_vector_d: // @fcvtzs_scalar_to_vector_d
+ .cfi_startproc
+// %bb.0:
+ fcvtzs d0, d0
+ ret
+.Lfunc_end118:
+ .size fcvtzs_scalar_to_vector_d, .Lfunc_end118-fcvtzs_scalar_to_vector_d
+ .cfi_endproc
+ // -- End function
+ .globl fcvtzu_scalar_to_vector_h // -- Begin function fcvtzu_scalar_to_vector_h
+ .p2align 2
+ .type fcvtzu_scalar_to_vector_h, at function
+fcvtzu_scalar_to_vector_h: // @fcvtzu_scalar_to_vector_h
+ .cfi_startproc
+// %bb.0:
+ fcvtzu d0, h0
+ ret
+.Lfunc_end119:
+ .size fcvtzu_scalar_to_vector_h, .Lfunc_end119-fcvtzu_scalar_to_vector_h
+ .cfi_endproc
+ // -- End function
+ .globl fcvtzu_scalar_to_vector_s // -- Begin function fcvtzu_scalar_to_vector_s
+ .p2align 2
+ .type fcvtzu_scalar_to_vector_s, at function
+fcvtzu_scalar_to_vector_s: // @fcvtzu_scalar_to_vector_s
+ .cfi_startproc
+// %bb.0:
+ fcvtzu d0, s0
+ ret
+.Lfunc_end120:
+ .size fcvtzu_scalar_to_vector_s, .Lfunc_end120-fcvtzu_scalar_to_vector_s
+ .cfi_endproc
+ // -- End function
+ .globl fcvtzu_scalar_to_vector_d // -- Begin function fcvtzu_scalar_to_vector_d
+ .p2align 2
+ .type fcvtzu_scalar_to_vector_d, at function
+fcvtzu_scalar_to_vector_d: // @fcvtzu_scalar_to_vector_d
+ .cfi_startproc
+// %bb.0:
+ fcvtzu d0, d0
+ ret
+.Lfunc_end121:
+ .size fcvtzu_scalar_to_vector_d, .Lfunc_end121-fcvtzu_scalar_to_vector_d
+ .cfi_endproc
+ // -- End function
+ .globl fcvtzs_scalar_to_vector_h_strict // -- Begin function fcvtzs_scalar_to_vector_h_strict
+ .p2align 2
+ .type fcvtzs_scalar_to_vector_h_strict, at function
+fcvtzs_scalar_to_vector_h_strict: // @fcvtzs_scalar_to_vector_h_strict
+ .cfi_startproc
+// %bb.0:
+ fcvtzs d0, h0
+ ret
+.Lfunc_end122:
+ .size fcvtzs_scalar_to_vector_h_strict, .Lfunc_end122-fcvtzs_scalar_to_vector_h_strict
+ .cfi_endproc
+ // -- End function
+ .globl fcvtzs_scalar_to_vector_s_strict // -- Begin function fcvtzs_scalar_to_vector_s_strict
+ .p2align 2
+ .type fcvtzs_scalar_to_vector_s_strict, at function
+fcvtzs_scalar_to_vector_s_strict: // @fcvtzs_scalar_to_vector_s_strict
+ .cfi_startproc
+// %bb.0:
+ fcvtzs d0, s0
+ ret
+.Lfunc_end123:
+ .size fcvtzs_scalar_to_vector_s_strict, .Lfunc_end123-fcvtzs_scalar_to_vector_s_strict
+ .cfi_endproc
+ // -- End function
+ .globl fcvtzu_scalar_to_vector_h_strict // -- Begin function fcvtzu_scalar_to_vector_h_strict
+ .p2align 2
+ .type fcvtzu_scalar_to_vector_h_strict, at function
+fcvtzu_scalar_to_vector_h_strict: // @fcvtzu_scalar_to_vector_h_strict
+ .cfi_startproc
+// %bb.0:
+ fcvtzu d0, h0
+ ret
+.Lfunc_end124:
+ .size fcvtzu_scalar_to_vector_h_strict, .Lfunc_end124-fcvtzu_scalar_to_vector_h_strict
+ .cfi_endproc
+ // -- End function
+ .globl fcvtzu_scalar_to_vector_s_strict // -- Begin function fcvtzu_scalar_to_vector_s_strict
+ .p2align 2
+ .type fcvtzu_scalar_to_vector_s_strict, at function
+fcvtzu_scalar_to_vector_s_strict: // @fcvtzu_scalar_to_vector_s_strict
+ .cfi_startproc
+// %bb.0:
+ fcvtzu d0, s0
+ ret
+.Lfunc_end125:
+ .size fcvtzu_scalar_to_vector_s_strict, .Lfunc_end125-fcvtzu_scalar_to_vector_s_strict
+ .cfi_endproc
+ // -- End function
+ .section ".note.GNU-stack","", at progbits
diff --git a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll
index b1b9fcf8a8b3c..8b8f23a049107 100644
--- a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll
@@ -4,7 +4,7 @@
;
-; Intriniscs
+; Intriniscs (bitcast)
;
define float @fcvtas_1s1d_simd(double %A) nounwind {
@@ -607,3 +607,335 @@ define float @fcvtzu_1s1s_simd(float %a) {
%d = bitcast i32 %vcvtah_s32_f32 to float
ret float %d
}
+
+;
+; Intriniscs (scalar_to_vector)
+;
+
+define <1 x i64> @fcvtas_1d1s_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtas_1d1s_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtas d0, s0
+; CHECK-NEXT: ret
+ %i = call i64 @llvm.aarch64.neon.fcvtas.i64.f32(float %A)
+ %vec = insertelement <1 x i64> poison, i64 %i, i32 0
+ ret <1 x i64> %vec
+}
+
+
+define <1 x i64> @fcvtas_1d1h_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtas_1d1h_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtas d0, h0
+; CHECK-NEXT: ret
+ %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtas.i64.f16(half %a)
+ %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f16, i32 0
+ ret <1 x i64> %vec
+}
+
+define <1 x i64> @fcvtas_1d1d_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtas_1d1d_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtas d0, d0
+; CHECK-NEXT: ret
+ %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtas.i64.f64(double %a)
+ %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f64, i32 0
+ ret <1 x i64> %vec
+}
+
+
+
+define <1 x i64> @fcvtau_1d1s_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtau_1d1s_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtau d0, s0
+; CHECK-NEXT: ret
+ %i = call i64 @llvm.aarch64.neon.fcvtau.i64.f32(float %A)
+ %vec = insertelement <1 x i64> poison, i64 %i, i32 0
+ ret <1 x i64> %vec
+}
+
+
+define <1 x i64> @fcvtau_1d1h_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtau_1d1h_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtau d0, h0
+; CHECK-NEXT: ret
+ %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtau.i64.f16(half %a)
+ %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f16, i32 0
+ ret <1 x i64> %vec
+}
+
+define <1 x i64> @fcvtau_1d1d_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtau_1d1d_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtau d0, d0
+; CHECK-NEXT: ret
+ %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtau.i64.f64(double %a)
+ %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f64, i32 0
+ ret <1 x i64> %vec
+}
+
+
+
+define <1 x i64> @fcvtms_1d1s_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtms_1d1s_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtms d0, s0
+; CHECK-NEXT: ret
+ %i = call i64 @llvm.aarch64.neon.fcvtms.i64.f32(float %A)
+ %vec = insertelement <1 x i64> poison, i64 %i, i32 0
+ ret <1 x i64> %vec
+}
+
+
+define <1 x i64> @fcvtms_1d1h_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtms_1d1h_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtms d0, h0
+; CHECK-NEXT: ret
+ %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtms.i64.f16(half %a)
+ %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f16, i32 0
+ ret <1 x i64> %vec
+}
+
+define <1 x i64> @fcvtms_1d1d_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtms_1d1d_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtms d0, d0
+; CHECK-NEXT: ret
+ %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtms.i64.f64(double %a)
+ %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f64, i32 0
+ ret <1 x i64> %vec
+}
+
+
+
+define <1 x i64> @fcvtmu_1d1s_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtmu_1d1s_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtmu d0, s0
+; CHECK-NEXT: ret
+ %i = call i64 @llvm.aarch64.neon.fcvtmu.i64.f32(float %A)
+ %vec = insertelement <1 x i64> poison, i64 %i, i32 0
+ ret <1 x i64> %vec
+}
+
+
+define <1 x i64> @fcvtmu_1d1h_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtmu_1d1h_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtmu d0, h0
+; CHECK-NEXT: ret
+ %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtmu.i64.f16(half %a)
+ %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f16, i32 0
+ ret <1 x i64> %vec
+}
+
+define <1 x i64> @fcvtmu_1d1d_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtmu_1d1d_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtmu d0, d0
+; CHECK-NEXT: ret
+ %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtmu.i64.f64(double %a)
+ %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f64, i32 0
+ ret <1 x i64> %vec
+}
+
+
+
+define <1 x i64> @fcvtns_1d1s_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtns_1d1s_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtns d0, s0
+; CHECK-NEXT: ret
+ %i = call i64 @llvm.aarch64.neon.fcvtns.i64.f32(float %A)
+ %vec = insertelement <1 x i64> poison, i64 %i, i32 0
+ ret <1 x i64> %vec
+}
+
+
+define <1 x i64> @fcvtns_1d1h_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtns_1d1h_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtns d0, h0
+; CHECK-NEXT: ret
+ %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtns.i64.f16(half %a)
+ %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f16, i32 0
+ ret <1 x i64> %vec
+}
+
+define <1 x i64> @fcvtns_1d1d_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtns_1d1d_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtns d0, d0
+; CHECK-NEXT: ret
+ %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtns.i64.f64(double %a)
+ %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f64, i32 0
+ ret <1 x i64> %vec
+}
+
+
+
+define <1 x i64> @fcvtnu_1d1s_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtnu_1d1s_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtnu d0, s0
+; CHECK-NEXT: ret
+ %i = call i64 @llvm.aarch64.neon.fcvtnu.i64.f32(float %A)
+ %vec = insertelement <1 x i64> poison, i64 %i, i32 0
+ ret <1 x i64> %vec
+}
+
+
+define <1 x i64> @fcvtnu_1d1h_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtnu_1d1h_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtnu d0, h0
+; CHECK-NEXT: ret
+ %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtnu.i64.f16(half %a)
+ %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f16, i32 0
+ ret <1 x i64> %vec
+}
+
+define <1 x i64> @fcvtnu_1d1d_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtnu_1d1d_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtnu d0, d0
+; CHECK-NEXT: ret
+ %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtnu.i64.f64(double %a)
+ %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f64, i32 0
+ ret <1 x i64> %vec
+}
+
+
+
+define <1 x i64> @fcvtps_1d1s_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtps_1d1s_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtps d0, s0
+; CHECK-NEXT: ret
+ %i = call i64 @llvm.aarch64.neon.fcvtps.i64.f32(float %A)
+ %vec = insertelement <1 x i64> poison, i64 %i, i32 0
+ ret <1 x i64> %vec
+}
+
+
+define <1 x i64> @fcvtps_1d1h_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtps_1d1h_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtps d0, h0
+; CHECK-NEXT: ret
+ %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtps.i64.f16(half %a)
+ %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f16, i32 0
+ ret <1 x i64> %vec
+}
+
+define <1 x i64> @fcvtps_1d1d_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtps_1d1d_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtps d0, d0
+; CHECK-NEXT: ret
+ %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtps.i64.f64(double %a)
+ %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f64, i32 0
+ ret <1 x i64> %vec
+}
+
+
+
+define <1 x i64> @fcvtpu_1d1s_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtpu_1d1s_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtpu d0, s0
+; CHECK-NEXT: ret
+ %i = call i64 @llvm.aarch64.neon.fcvtpu.i64.f32(float %A)
+ %vec = insertelement <1 x i64> poison, i64 %i, i32 0
+ ret <1 x i64> %vec
+}
+
+
+define <1 x i64> @fcvtpu_1d1h_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtpu_1d1h_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtpu d0, h0
+; CHECK-NEXT: ret
+ %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtpu.i64.f16(half %a)
+ %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f16, i32 0
+ ret <1 x i64> %vec
+}
+
+define <1 x i64> @fcvtpu_1d1d_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtpu_1d1d_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtpu d0, d0
+; CHECK-NEXT: ret
+ %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtpu.i64.f64(double %a)
+ %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f64, i32 0
+ ret <1 x i64> %vec
+}
+
+
+
+define <1 x i64> @fcvtzs_1d1s_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtzs_1d1s_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs d0, s0
+; CHECK-NEXT: ret
+ %i = call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %A)
+ %vec = insertelement <1 x i64> poison, i64 %i, i32 0
+ ret <1 x i64> %vec
+}
+
+
+define <1 x i64> @fcvtzs_1d1h_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtzs_1d1h_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs d0, h0
+; CHECK-NEXT: ret
+ %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f16(half %a)
+ %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f16, i32 0
+ ret <1 x i64> %vec
+}
+
+define <1 x i64> @fcvtzs_1d1d_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtzs_1d1d_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs d0, d0
+; CHECK-NEXT: ret
+ %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f64(double %a)
+ %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f64, i32 0
+ ret <1 x i64> %vec
+}
+
+
+
+define <1 x i64> @fcvtzu_1d1s_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtzu_1d1s_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu d0, s0
+; CHECK-NEXT: ret
+ %i = call i64 @llvm.aarch64.neon.fcvtzu.i64.f32(float %A)
+ %vec = insertelement <1 x i64> poison, i64 %i, i32 0
+ ret <1 x i64> %vec
+}
+
+
+define <1 x i64> @fcvtzu_1d1h_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtzu_1d1h_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu d0, h0
+; CHECK-NEXT: ret
+ %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtzu.i64.f16(half %a)
+ %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f16, i32 0
+ ret <1 x i64> %vec
+}
+
+define <1 x i64> @fcvtzu_1d1d_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtzu_1d1d_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu d0, d0
+; CHECK-NEXT: ret
+ %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtzu.i64.f64(double %a)
+ %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f64, i32 0
+ ret <1 x i64> %vec
+}
diff --git a/llvm/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll b/llvm/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll
index b580c4921fb66..35f62e52ffd76 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll
@@ -21,8 +21,7 @@ define double @bar(ptr %iVals, ptr %fVals, ptr %dVals) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldr d0, [x2, #128]
; CHECK-NEXT: frinti d0, d0
-; CHECK-NEXT: fcvtzs x8, d0
-; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: fcvtzs d0, d0
; CHECK-NEXT: sri d0, d0, #1
; CHECK-NEXT: scvtf.2d v0, v0, #1
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
index d8f370884c84a..c2f39fb14ee24 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
@@ -980,18 +980,11 @@ define <1 x double> @test_bitcasti64tov1f64(i64 %in) {
}
define <1 x i64> @test_bitcastv8i8tov1f64(<8 x i8> %a) #0 {
-; CHECK-SD-LABEL: test_bitcastv8i8tov1f64:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: neg v0.8b, v0.8b
-; CHECK-SD-NEXT: fcvtzs x8, d0
-; CHECK-SD-NEXT: fmov d0, x8
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_bitcastv8i8tov1f64:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: neg v0.8b, v0.8b
-; CHECK-GI-NEXT: fcvtzs d0, d0
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_bitcastv8i8tov1f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: neg v0.8b, v0.8b
+; CHECK-NEXT: fcvtzs d0, d0
+; CHECK-NEXT: ret
%sub.i = sub <8 x i8> zeroinitializer, %a
%1 = bitcast <8 x i8> %sub.i to <1 x double>
%vcvt.i = fptosi <1 x double> %1 to <1 x i64>
@@ -999,18 +992,11 @@ define <1 x i64> @test_bitcastv8i8tov1f64(<8 x i8> %a) #0 {
}
define <1 x i64> @test_bitcastv4i16tov1f64(<4 x i16> %a) #0 {
-; CHECK-SD-LABEL: test_bitcastv4i16tov1f64:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: neg v0.4h, v0.4h
-; CHECK-SD-NEXT: fcvtzs x8, d0
-; CHECK-SD-NEXT: fmov d0, x8
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_bitcastv4i16tov1f64:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: neg v0.4h, v0.4h
-; CHECK-GI-NEXT: fcvtzs d0, d0
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_bitcastv4i16tov1f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: neg v0.4h, v0.4h
+; CHECK-NEXT: fcvtzs d0, d0
+; CHECK-NEXT: ret
%sub.i = sub <4 x i16> zeroinitializer, %a
%1 = bitcast <4 x i16> %sub.i to <1 x double>
%vcvt.i = fptosi <1 x double> %1 to <1 x i64>
@@ -1018,18 +1004,11 @@ define <1 x i64> @test_bitcastv4i16tov1f64(<4 x i16> %a) #0 {
}
define <1 x i64> @test_bitcastv2i32tov1f64(<2 x i32> %a) #0 {
-; CHECK-SD-LABEL: test_bitcastv2i32tov1f64:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: neg v0.2s, v0.2s
-; CHECK-SD-NEXT: fcvtzs x8, d0
-; CHECK-SD-NEXT: fmov d0, x8
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_bitcastv2i32tov1f64:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: neg v0.2s, v0.2s
-; CHECK-GI-NEXT: fcvtzs d0, d0
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_bitcastv2i32tov1f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: neg v0.2s, v0.2s
+; CHECK-NEXT: fcvtzs d0, d0
+; CHECK-NEXT: ret
%sub.i = sub <2 x i32> zeroinitializer, %a
%1 = bitcast <2 x i32> %sub.i to <1 x double>
%vcvt.i = fptosi <1 x double> %1 to <1 x i64>
@@ -1040,8 +1019,7 @@ define <1 x i64> @test_bitcastv1i64tov1f64(<1 x i64> %a) #0 {
; CHECK-SD-LABEL: test_bitcastv1i64tov1f64:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: neg d0, d0
-; CHECK-SD-NEXT: fcvtzs x8, d0
-; CHECK-SD-NEXT: fmov d0, x8
+; CHECK-SD-NEXT: fcvtzs d0, d0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_bitcastv1i64tov1f64:
@@ -1061,8 +1039,7 @@ define <1 x i64> @test_bitcastv2f32tov1f64(<2 x float> %a) #0 {
; CHECK-LABEL: test_bitcastv2f32tov1f64:
; CHECK: // %bb.0:
; CHECK-NEXT: fneg v0.2s, v0.2s
-; CHECK-NEXT: fcvtzs x8, d0
-; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: fcvtzs d0, d0
; CHECK-NEXT: ret
%sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %a
%1 = bitcast <2 x float> %sub.i to <1 x double>
diff --git a/llvm/test/CodeGen/AArch64/arm64-vcvt.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt.ll
index 1e0cfa0201263..dcb3b9b24627b 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vcvt.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vcvt.ll
@@ -359,16 +359,10 @@ define <2 x i64> @fcvtzs_2d(<2 x double> %A) nounwind {
; FIXME: Generate "fcvtzs d0, d0"?
define <1 x i64> @fcvtzs_1d(<1 x double> %A) nounwind {
-; CHECK-SD-LABEL: fcvtzs_1d:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: fcvtzs x8, d0
-; CHECK-SD-NEXT: fmov d0, x8
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: fcvtzs_1d:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: fcvtzs d0, d0
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: fcvtzs_1d:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs d0, d0
+; CHECK-NEXT: ret
%tmp3 = fptosi <1 x double> %A to <1 x i64>
ret <1 x i64> %tmp3
}
@@ -443,16 +437,10 @@ define <2 x i64> @fcvtzu_2d(<2 x double> %A) nounwind {
; FIXME: Generate "fcvtzu d0, d0"?
define <1 x i64> @fcvtzu_1d(<1 x double> %A) nounwind {
-; CHECK-SD-LABEL: fcvtzu_1d:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: fcvtzu x8, d0
-; CHECK-SD-NEXT: fmov d0, x8
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: fcvtzu_1d:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: fcvtzu d0, d0
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: fcvtzu_1d:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu d0, d0
+; CHECK-NEXT: ret
%tmp3 = fptoui <1 x double> %A to <1 x i64>
ret <1 x i64> %tmp3
}
diff --git a/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll b/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll
index c3da22757f1d2..0b05e00a1b0db 100644
--- a/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll
@@ -717,8 +717,7 @@ define <1 x i32> @fptoui_v1i32_v1f64(<1 x double> %x) #0 {
define <1 x i64> @fptosi_v1i64_v1f64(<1 x double> %x) #0 {
; CHECK-LABEL: fptosi_v1i64_v1f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzs x8, d0
-; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: fcvtzs d0, d0
; CHECK-NEXT: ret
%val = call <1 x i64> @llvm.experimental.constrained.fptosi.v1i64.v1f64(<1 x double> %x, metadata !"fpexcept.strict") #0
ret <1 x i64> %val
@@ -727,8 +726,7 @@ define <1 x i64> @fptosi_v1i64_v1f64(<1 x double> %x) #0 {
define <1 x i64> @fptoui_v1i64_v1f64(<1 x double> %x) #0 {
; CHECK-LABEL: fptoui_v1i64_v1f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzu x8, d0
-; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: fcvtzu d0, d0
; CHECK-NEXT: ret
%val = call <1 x i64> @llvm.experimental.constrained.fptoui.v1i64.v1f64(<1 x double> %x, metadata !"fpexcept.strict") #0
ret <1 x i64> %val
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-to-int.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-to-int.ll
index c8f6d98f5a63f..312d158cfb2b6 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-to-int.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-to-int.ll
@@ -815,8 +815,7 @@ define void @fcvtzu_v32f64_v32i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
define <1 x i64> @fcvtzu_v1f64_v1i64(<1 x double> %op1) vscale_range(2,0) #0 {
; CHECK-LABEL: fcvtzu_v1f64_v1i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzu x8, d0
-; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: fcvtzu d0, d0
; CHECK-NEXT: ret
%res = fptoui <1 x double> %op1 to <1 x i64>
ret <1 x i64> %res
@@ -1710,8 +1709,7 @@ define void @fcvtzs_v32f64_v32i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
define <1 x i64> @fcvtzs_v1f64_v1i64(<1 x double> %op1) vscale_range(2,0) #0 {
; CHECK-LABEL: fcvtzs_v1f64_v1i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzs x8, d0
-; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: fcvtzs d0, d0
; CHECK-NEXT: ret
%res = fptosi <1 x double> %op1 to <1 x i64>
ret <1 x i64> %res
>From 0927e44a9c2247935ae10e4e368a4be6f8aae8c0 Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Thu, 18 Dec 2025 13:05:46 +0000
Subject: [PATCH 2/8] FIx
---
.../CodeGen/AArch64/arm64-cvt-simd-fptoi.s | 1515 -----------------
.../AArch64/arm64-cvt-simd-intrinsics.ll | 2 +-
2 files changed, 1 insertion(+), 1516 deletions(-)
delete mode 100644 llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.s
diff --git a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.s b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.s
deleted file mode 100644
index 0850b306e8c79..0000000000000
--- a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.s
+++ /dev/null
@@ -1,1515 +0,0 @@
- .file "arm64-cvt-simd-fptoi.ll"
- .text
- .globl test_fptosi_f16_i32_simd // -- Begin function test_fptosi_f16_i32_simd
- .p2align 2
- .type test_fptosi_f16_i32_simd, at function
-test_fptosi_f16_i32_simd: // @test_fptosi_f16_i32_simd
- .cfi_startproc
-// %bb.0:
- fcvtzs s0, h0
- ret
-.Lfunc_end0:
- .size test_fptosi_f16_i32_simd, .Lfunc_end0-test_fptosi_f16_i32_simd
- .cfi_endproc
- // -- End function
- .globl test_fptosi_f16_i64_simd // -- Begin function test_fptosi_f16_i64_simd
- .p2align 2
- .type test_fptosi_f16_i64_simd, at function
-test_fptosi_f16_i64_simd: // @test_fptosi_f16_i64_simd
- .cfi_startproc
-// %bb.0:
- fcvtzs d0, h0
- ret
-.Lfunc_end1:
- .size test_fptosi_f16_i64_simd, .Lfunc_end1-test_fptosi_f16_i64_simd
- .cfi_endproc
- // -- End function
- .globl test_fptosi_f64_i32_simd // -- Begin function test_fptosi_f64_i32_simd
- .p2align 2
- .type test_fptosi_f64_i32_simd, at function
-test_fptosi_f64_i32_simd: // @test_fptosi_f64_i32_simd
- .cfi_startproc
-// %bb.0:
- fcvtzs s0, d0
- ret
-.Lfunc_end2:
- .size test_fptosi_f64_i32_simd, .Lfunc_end2-test_fptosi_f64_i32_simd
- .cfi_endproc
- // -- End function
- .globl test_fptosi_f32_i64_simd // -- Begin function test_fptosi_f32_i64_simd
- .p2align 2
- .type test_fptosi_f32_i64_simd, at function
-test_fptosi_f32_i64_simd: // @test_fptosi_f32_i64_simd
- .cfi_startproc
-// %bb.0:
- fcvtzs d0, s0
- ret
-.Lfunc_end3:
- .size test_fptosi_f32_i64_simd, .Lfunc_end3-test_fptosi_f32_i64_simd
- .cfi_endproc
- // -- End function
- .globl test_fptosi_f64_i64_simd // -- Begin function test_fptosi_f64_i64_simd
- .p2align 2
- .type test_fptosi_f64_i64_simd, at function
-test_fptosi_f64_i64_simd: // @test_fptosi_f64_i64_simd
- .cfi_startproc
-// %bb.0:
- fcvtzs d0, d0
- ret
-.Lfunc_end4:
- .size test_fptosi_f64_i64_simd, .Lfunc_end4-test_fptosi_f64_i64_simd
- .cfi_endproc
- // -- End function
- .globl test_fptosi_f32_i32_simd // -- Begin function test_fptosi_f32_i32_simd
- .p2align 2
- .type test_fptosi_f32_i32_simd, at function
-test_fptosi_f32_i32_simd: // @test_fptosi_f32_i32_simd
- .cfi_startproc
-// %bb.0:
- fcvtzs s0, s0
- ret
-.Lfunc_end5:
- .size test_fptosi_f32_i32_simd, .Lfunc_end5-test_fptosi_f32_i32_simd
- .cfi_endproc
- // -- End function
- .globl test_fptoui_f16_i32_simd // -- Begin function test_fptoui_f16_i32_simd
- .p2align 2
- .type test_fptoui_f16_i32_simd, at function
-test_fptoui_f16_i32_simd: // @test_fptoui_f16_i32_simd
- .cfi_startproc
-// %bb.0:
- fcvtzu s0, h0
- ret
-.Lfunc_end6:
- .size test_fptoui_f16_i32_simd, .Lfunc_end6-test_fptoui_f16_i32_simd
- .cfi_endproc
- // -- End function
- .globl test_fptoui_f16_i64_simd // -- Begin function test_fptoui_f16_i64_simd
- .p2align 2
- .type test_fptoui_f16_i64_simd, at function
-test_fptoui_f16_i64_simd: // @test_fptoui_f16_i64_simd
- .cfi_startproc
-// %bb.0:
- fcvtzu d0, h0
- ret
-.Lfunc_end7:
- .size test_fptoui_f16_i64_simd, .Lfunc_end7-test_fptoui_f16_i64_simd
- .cfi_endproc
- // -- End function
- .globl test_fptoui_f64_i32_simd // -- Begin function test_fptoui_f64_i32_simd
- .p2align 2
- .type test_fptoui_f64_i32_simd, at function
-test_fptoui_f64_i32_simd: // @test_fptoui_f64_i32_simd
- .cfi_startproc
-// %bb.0:
- fcvtzu s0, d0
- ret
-.Lfunc_end8:
- .size test_fptoui_f64_i32_simd, .Lfunc_end8-test_fptoui_f64_i32_simd
- .cfi_endproc
- // -- End function
- .globl test_fptoui_f32_i64_simd // -- Begin function test_fptoui_f32_i64_simd
- .p2align 2
- .type test_fptoui_f32_i64_simd, at function
-test_fptoui_f32_i64_simd: // @test_fptoui_f32_i64_simd
- .cfi_startproc
-// %bb.0:
- fcvtzu d0, s0
- ret
-.Lfunc_end9:
- .size test_fptoui_f32_i64_simd, .Lfunc_end9-test_fptoui_f32_i64_simd
- .cfi_endproc
- // -- End function
- .globl test_fptoui_f64_i64_simd // -- Begin function test_fptoui_f64_i64_simd
- .p2align 2
- .type test_fptoui_f64_i64_simd, at function
-test_fptoui_f64_i64_simd: // @test_fptoui_f64_i64_simd
- .cfi_startproc
-// %bb.0:
- fcvtzu d0, d0
- ret
-.Lfunc_end10:
- .size test_fptoui_f64_i64_simd, .Lfunc_end10-test_fptoui_f64_i64_simd
- .cfi_endproc
- // -- End function
- .globl test_fptoui_f32_i32_simd // -- Begin function test_fptoui_f32_i32_simd
- .p2align 2
- .type test_fptoui_f32_i32_simd, at function
-test_fptoui_f32_i32_simd: // @test_fptoui_f32_i32_simd
- .cfi_startproc
-// %bb.0:
- fcvtzu s0, s0
- ret
-.Lfunc_end11:
- .size test_fptoui_f32_i32_simd, .Lfunc_end11-test_fptoui_f32_i32_simd
- .cfi_endproc
- // -- End function
- .globl fptosi_i32_f16_simd // -- Begin function fptosi_i32_f16_simd
- .p2align 2
- .type fptosi_i32_f16_simd, at function
-fptosi_i32_f16_simd: // @fptosi_i32_f16_simd
- .cfi_startproc
-// %bb.0:
- fcvtzs s0, h0
- ret
-.Lfunc_end12:
- .size fptosi_i32_f16_simd, .Lfunc_end12-fptosi_i32_f16_simd
- .cfi_endproc
- // -- End function
- .globl fptosi_i64_f16_simd // -- Begin function fptosi_i64_f16_simd
- .p2align 2
- .type fptosi_i64_f16_simd, at function
-fptosi_i64_f16_simd: // @fptosi_i64_f16_simd
- .cfi_startproc
-// %bb.0:
- fcvtzs d0, h0
- ret
-.Lfunc_end13:
- .size fptosi_i64_f16_simd, .Lfunc_end13-fptosi_i64_f16_simd
- .cfi_endproc
- // -- End function
- .globl fptosi_i64_f32_simd // -- Begin function fptosi_i64_f32_simd
- .p2align 2
- .type fptosi_i64_f32_simd, at function
-fptosi_i64_f32_simd: // @fptosi_i64_f32_simd
- .cfi_startproc
-// %bb.0:
- fcvtzs d0, s0
- ret
-.Lfunc_end14:
- .size fptosi_i64_f32_simd, .Lfunc_end14-fptosi_i64_f32_simd
- .cfi_endproc
- // -- End function
- .globl fptosi_i32_f64_simd // -- Begin function fptosi_i32_f64_simd
- .p2align 2
- .type fptosi_i32_f64_simd, at function
-fptosi_i32_f64_simd: // @fptosi_i32_f64_simd
- .cfi_startproc
-// %bb.0:
- fcvtzs s0, d0
- ret
-.Lfunc_end15:
- .size fptosi_i32_f64_simd, .Lfunc_end15-fptosi_i32_f64_simd
- .cfi_endproc
- // -- End function
- .globl fptosi_i64_f64_simd // -- Begin function fptosi_i64_f64_simd
- .p2align 2
- .type fptosi_i64_f64_simd, at function
-fptosi_i64_f64_simd: // @fptosi_i64_f64_simd
- .cfi_startproc
-// %bb.0:
- fcvtzs d0, d0
- ret
-.Lfunc_end16:
- .size fptosi_i64_f64_simd, .Lfunc_end16-fptosi_i64_f64_simd
- .cfi_endproc
- // -- End function
- .globl fptosi_i32_f32_simd // -- Begin function fptosi_i32_f32_simd
- .p2align 2
- .type fptosi_i32_f32_simd, at function
-fptosi_i32_f32_simd: // @fptosi_i32_f32_simd
- .cfi_startproc
-// %bb.0:
- fcvtzs s0, s0
- ret
-.Lfunc_end17:
- .size fptosi_i32_f32_simd, .Lfunc_end17-fptosi_i32_f32_simd
- .cfi_endproc
- // -- End function
- .globl fptoui_i32_f16_simd // -- Begin function fptoui_i32_f16_simd
- .p2align 2
- .type fptoui_i32_f16_simd, at function
-fptoui_i32_f16_simd: // @fptoui_i32_f16_simd
- .cfi_startproc
-// %bb.0:
- fcvtzu s0, h0
- ret
-.Lfunc_end18:
- .size fptoui_i32_f16_simd, .Lfunc_end18-fptoui_i32_f16_simd
- .cfi_endproc
- // -- End function
- .globl fptoui_i64_f16_simd // -- Begin function fptoui_i64_f16_simd
- .p2align 2
- .type fptoui_i64_f16_simd, at function
-fptoui_i64_f16_simd: // @fptoui_i64_f16_simd
- .cfi_startproc
-// %bb.0:
- fcvtzu d0, h0
- ret
-.Lfunc_end19:
- .size fptoui_i64_f16_simd, .Lfunc_end19-fptoui_i64_f16_simd
- .cfi_endproc
- // -- End function
- .globl fptoui_i64_f32_simd // -- Begin function fptoui_i64_f32_simd
- .p2align 2
- .type fptoui_i64_f32_simd, at function
-fptoui_i64_f32_simd: // @fptoui_i64_f32_simd
- .cfi_startproc
-// %bb.0:
- fcvtzu d0, s0
- ret
-.Lfunc_end20:
- .size fptoui_i64_f32_simd, .Lfunc_end20-fptoui_i64_f32_simd
- .cfi_endproc
- // -- End function
- .globl fptoui_i32_f64_simd // -- Begin function fptoui_i32_f64_simd
- .p2align 2
- .type fptoui_i32_f64_simd, at function
-fptoui_i32_f64_simd: // @fptoui_i32_f64_simd
- .cfi_startproc
-// %bb.0:
- fcvtzu s0, d0
- ret
-.Lfunc_end21:
- .size fptoui_i32_f64_simd, .Lfunc_end21-fptoui_i32_f64_simd
- .cfi_endproc
- // -- End function
- .globl fptoui_i64_f64_simd // -- Begin function fptoui_i64_f64_simd
- .p2align 2
- .type fptoui_i64_f64_simd, at function
-fptoui_i64_f64_simd: // @fptoui_i64_f64_simd
- .cfi_startproc
-// %bb.0:
- fcvtzu d0, d0
- ret
-.Lfunc_end22:
- .size fptoui_i64_f64_simd, .Lfunc_end22-fptoui_i64_f64_simd
- .cfi_endproc
- // -- End function
- .globl fptoui_i32_f32_simd // -- Begin function fptoui_i32_f32_simd
- .p2align 2
- .type fptoui_i32_f32_simd, at function
-fptoui_i32_f32_simd: // @fptoui_i32_f32_simd
- .cfi_startproc
-// %bb.0:
- fcvtzu s0, s0
- ret
-.Lfunc_end23:
- .size fptoui_i32_f32_simd, .Lfunc_end23-fptoui_i32_f32_simd
- .cfi_endproc
- // -- End function
- .globl fcvtas_ds_round_simd // -- Begin function fcvtas_ds_round_simd
- .p2align 2
- .type fcvtas_ds_round_simd, at function
-fcvtas_ds_round_simd: // @fcvtas_ds_round_simd
- .cfi_startproc
-// %bb.0:
- fcvtas d0, s0
- ret
-.Lfunc_end24:
- .size fcvtas_ds_round_simd, .Lfunc_end24-fcvtas_ds_round_simd
- .cfi_endproc
- // -- End function
- .globl fcvtas_sd_round_simd // -- Begin function fcvtas_sd_round_simd
- .p2align 2
- .type fcvtas_sd_round_simd, at function
-fcvtas_sd_round_simd: // @fcvtas_sd_round_simd
- .cfi_startproc
-// %bb.0:
- fcvtas s0, d0
- ret
-.Lfunc_end25:
- .size fcvtas_sd_round_simd, .Lfunc_end25-fcvtas_sd_round_simd
- .cfi_endproc
- // -- End function
- .globl fcvtas_ss_round_simd // -- Begin function fcvtas_ss_round_simd
- .p2align 2
- .type fcvtas_ss_round_simd, at function
-fcvtas_ss_round_simd: // @fcvtas_ss_round_simd
- .cfi_startproc
-// %bb.0:
- fcvtas s0, s0
- ret
-.Lfunc_end26:
- .size fcvtas_ss_round_simd, .Lfunc_end26-fcvtas_ss_round_simd
- .cfi_endproc
- // -- End function
- .globl fcvtas_dd_round_simd // -- Begin function fcvtas_dd_round_simd
- .p2align 2
- .type fcvtas_dd_round_simd, at function
-fcvtas_dd_round_simd: // @fcvtas_dd_round_simd
- .cfi_startproc
-// %bb.0:
- fcvtas d0, d0
- ret
-.Lfunc_end27:
- .size fcvtas_dd_round_simd, .Lfunc_end27-fcvtas_dd_round_simd
- .cfi_endproc
- // -- End function
- .globl fcvtau_ds_round_simd // -- Begin function fcvtau_ds_round_simd
- .p2align 2
- .type fcvtau_ds_round_simd, at function
-fcvtau_ds_round_simd: // @fcvtau_ds_round_simd
- .cfi_startproc
-// %bb.0:
- fcvtau d0, s0
- ret
-.Lfunc_end28:
- .size fcvtau_ds_round_simd, .Lfunc_end28-fcvtau_ds_round_simd
- .cfi_endproc
- // -- End function
- .globl fcvtau_sd_round_simd // -- Begin function fcvtau_sd_round_simd
- .p2align 2
- .type fcvtau_sd_round_simd, at function
-fcvtau_sd_round_simd: // @fcvtau_sd_round_simd
- .cfi_startproc
-// %bb.0:
- fcvtau s0, d0
- ret
-.Lfunc_end29:
- .size fcvtau_sd_round_simd, .Lfunc_end29-fcvtau_sd_round_simd
- .cfi_endproc
- // -- End function
- .globl fcvtau_ss_round_simd // -- Begin function fcvtau_ss_round_simd
- .p2align 2
- .type fcvtau_ss_round_simd, at function
-fcvtau_ss_round_simd: // @fcvtau_ss_round_simd
- .cfi_startproc
-// %bb.0:
- fcvtas s0, s0
- ret
-.Lfunc_end30:
- .size fcvtau_ss_round_simd, .Lfunc_end30-fcvtau_ss_round_simd
- .cfi_endproc
- // -- End function
- .globl fcvtau_dd_round_simd // -- Begin function fcvtau_dd_round_simd
- .p2align 2
- .type fcvtau_dd_round_simd, at function
-fcvtau_dd_round_simd: // @fcvtau_dd_round_simd
- .cfi_startproc
-// %bb.0:
- fcvtas d0, d0
- ret
-.Lfunc_end31:
- .size fcvtau_dd_round_simd, .Lfunc_end31-fcvtau_dd_round_simd
- .cfi_endproc
- // -- End function
- .globl fcvtms_ds_round_simd // -- Begin function fcvtms_ds_round_simd
- .p2align 2
- .type fcvtms_ds_round_simd, at function
-fcvtms_ds_round_simd: // @fcvtms_ds_round_simd
- .cfi_startproc
-// %bb.0:
- fcvtms d0, s0
- ret
-.Lfunc_end32:
- .size fcvtms_ds_round_simd, .Lfunc_end32-fcvtms_ds_round_simd
- .cfi_endproc
- // -- End function
- .globl fcvtms_sd_round_simd // -- Begin function fcvtms_sd_round_simd
- .p2align 2
- .type fcvtms_sd_round_simd, at function
-fcvtms_sd_round_simd: // @fcvtms_sd_round_simd
- .cfi_startproc
-// %bb.0:
- fcvtms s0, d0
- ret
-.Lfunc_end33:
- .size fcvtms_sd_round_simd, .Lfunc_end33-fcvtms_sd_round_simd
- .cfi_endproc
- // -- End function
- .globl fcvtms_ss_round_simd // -- Begin function fcvtms_ss_round_simd
- .p2align 2
- .type fcvtms_ss_round_simd, at function
-fcvtms_ss_round_simd: // @fcvtms_ss_round_simd
- .cfi_startproc
-// %bb.0:
- fcvtms s0, s0
- ret
-.Lfunc_end34:
- .size fcvtms_ss_round_simd, .Lfunc_end34-fcvtms_ss_round_simd
- .cfi_endproc
- // -- End function
- .globl fcvtms_dd_round_simd // -- Begin function fcvtms_dd_round_simd
- .p2align 2
- .type fcvtms_dd_round_simd, at function
-fcvtms_dd_round_simd: // @fcvtms_dd_round_simd
- .cfi_startproc
-// %bb.0:
- fcvtms d0, d0
- ret
-.Lfunc_end35:
- .size fcvtms_dd_round_simd, .Lfunc_end35-fcvtms_dd_round_simd
- .cfi_endproc
- // -- End function
- .globl fcvtmu_ds_round_simd // -- Begin function fcvtmu_ds_round_simd
- .p2align 2
- .type fcvtmu_ds_round_simd, at function
-fcvtmu_ds_round_simd: // @fcvtmu_ds_round_simd
- .cfi_startproc
-// %bb.0:
- fcvtmu d0, s0
- ret
-.Lfunc_end36:
- .size fcvtmu_ds_round_simd, .Lfunc_end36-fcvtmu_ds_round_simd
- .cfi_endproc
- // -- End function
- .globl fcvtmu_sd_round_simd // -- Begin function fcvtmu_sd_round_simd
- .p2align 2
- .type fcvtmu_sd_round_simd, at function
-fcvtmu_sd_round_simd: // @fcvtmu_sd_round_simd
- .cfi_startproc
-// %bb.0:
- fcvtmu s0, d0
- ret
-.Lfunc_end37:
- .size fcvtmu_sd_round_simd, .Lfunc_end37-fcvtmu_sd_round_simd
- .cfi_endproc
- // -- End function
- .globl fcvtmu_ss_round_simd // -- Begin function fcvtmu_ss_round_simd
- .p2align 2
- .type fcvtmu_ss_round_simd, at function
-fcvtmu_ss_round_simd: // @fcvtmu_ss_round_simd
- .cfi_startproc
-// %bb.0:
- fcvtms s0, s0
- ret
-.Lfunc_end38:
- .size fcvtmu_ss_round_simd, .Lfunc_end38-fcvtmu_ss_round_simd
- .cfi_endproc
- // -- End function
- .globl fcvtmu_dd_round_simd // -- Begin function fcvtmu_dd_round_simd
- .p2align 2
- .type fcvtmu_dd_round_simd, at function
-fcvtmu_dd_round_simd: // @fcvtmu_dd_round_simd
- .cfi_startproc
-// %bb.0:
- fcvtms d0, d0
- ret
-.Lfunc_end39:
- .size fcvtmu_dd_round_simd, .Lfunc_end39-fcvtmu_dd_round_simd
- .cfi_endproc
- // -- End function
- .globl fcvtps_ds_round_simd // -- Begin function fcvtps_ds_round_simd
- .p2align 2
- .type fcvtps_ds_round_simd, at function
-fcvtps_ds_round_simd: // @fcvtps_ds_round_simd
- .cfi_startproc
-// %bb.0:
- fcvtps d0, s0
- ret
-.Lfunc_end40:
- .size fcvtps_ds_round_simd, .Lfunc_end40-fcvtps_ds_round_simd
- .cfi_endproc
- // -- End function
- .globl fcvtps_sd_round_simd // -- Begin function fcvtps_sd_round_simd
- .p2align 2
- .type fcvtps_sd_round_simd, at function
-fcvtps_sd_round_simd: // @fcvtps_sd_round_simd
- .cfi_startproc
-// %bb.0:
- fcvtps s0, d0
- ret
-.Lfunc_end41:
- .size fcvtps_sd_round_simd, .Lfunc_end41-fcvtps_sd_round_simd
- .cfi_endproc
- // -- End function
- .globl fcvtps_ss_round_simd // -- Begin function fcvtps_ss_round_simd
- .p2align 2
- .type fcvtps_ss_round_simd, at function
-fcvtps_ss_round_simd: // @fcvtps_ss_round_simd
- .cfi_startproc
-// %bb.0:
- fcvtps s0, s0
- ret
-.Lfunc_end42:
- .size fcvtps_ss_round_simd, .Lfunc_end42-fcvtps_ss_round_simd
- .cfi_endproc
- // -- End function
- .globl fcvtps_dd_round_simd // -- Begin function fcvtps_dd_round_simd
- .p2align 2
- .type fcvtps_dd_round_simd, at function
-fcvtps_dd_round_simd: // @fcvtps_dd_round_simd
- .cfi_startproc
-// %bb.0:
- fcvtps d0, d0
- ret
-.Lfunc_end43:
- .size fcvtps_dd_round_simd, .Lfunc_end43-fcvtps_dd_round_simd
- .cfi_endproc
- // -- End function
- .globl fcvtpu_ds_round_simd // -- Begin function fcvtpu_ds_round_simd
- .p2align 2
- .type fcvtpu_ds_round_simd, at function
-fcvtpu_ds_round_simd: // @fcvtpu_ds_round_simd
- .cfi_startproc
-// %bb.0:
- fcvtpu d0, s0
- ret
-.Lfunc_end44:
- .size fcvtpu_ds_round_simd, .Lfunc_end44-fcvtpu_ds_round_simd
- .cfi_endproc
- // -- End function
- .globl fcvtpu_sd_round_simd // -- Begin function fcvtpu_sd_round_simd
- .p2align 2
- .type fcvtpu_sd_round_simd, at function
-fcvtpu_sd_round_simd: // @fcvtpu_sd_round_simd
- .cfi_startproc
-// %bb.0:
- fcvtpu s0, d0
- ret
-.Lfunc_end45:
- .size fcvtpu_sd_round_simd, .Lfunc_end45-fcvtpu_sd_round_simd
- .cfi_endproc
- // -- End function
- .globl fcvtpu_ss_round_simd // -- Begin function fcvtpu_ss_round_simd
- .p2align 2
- .type fcvtpu_ss_round_simd, at function
-fcvtpu_ss_round_simd: // @fcvtpu_ss_round_simd
- .cfi_startproc
-// %bb.0:
- fcvtps s0, s0
- ret
-.Lfunc_end46:
- .size fcvtpu_ss_round_simd, .Lfunc_end46-fcvtpu_ss_round_simd
- .cfi_endproc
- // -- End function
- .globl fcvtpu_dd_round_simd // -- Begin function fcvtpu_dd_round_simd
- .p2align 2
- .type fcvtpu_dd_round_simd, at function
-fcvtpu_dd_round_simd: // @fcvtpu_dd_round_simd
- .cfi_startproc
-// %bb.0:
- fcvtps d0, d0
- ret
-.Lfunc_end47:
- .size fcvtpu_dd_round_simd, .Lfunc_end47-fcvtpu_dd_round_simd
- .cfi_endproc
- // -- End function
- .globl fcvtzs_ds_round_simd // -- Begin function fcvtzs_ds_round_simd
- .p2align 2
- .type fcvtzs_ds_round_simd, at function
-fcvtzs_ds_round_simd: // @fcvtzs_ds_round_simd
- .cfi_startproc
-// %bb.0:
- fcvtzs d0, s0
- ret
-.Lfunc_end48:
- .size fcvtzs_ds_round_simd, .Lfunc_end48-fcvtzs_ds_round_simd
- .cfi_endproc
- // -- End function
- .globl fcvtzs_sd_round_simd // -- Begin function fcvtzs_sd_round_simd
- .p2align 2
- .type fcvtzs_sd_round_simd, at function
-fcvtzs_sd_round_simd: // @fcvtzs_sd_round_simd
- .cfi_startproc
-// %bb.0:
- fcvtzs s0, d0
- ret
-.Lfunc_end49:
- .size fcvtzs_sd_round_simd, .Lfunc_end49-fcvtzs_sd_round_simd
- .cfi_endproc
- // -- End function
- .globl fcvtzs_ss_round_simd // -- Begin function fcvtzs_ss_round_simd
- .p2align 2
- .type fcvtzs_ss_round_simd, at function
-fcvtzs_ss_round_simd: // @fcvtzs_ss_round_simd
- .cfi_startproc
-// %bb.0:
- fcvtzs s0, s0
- ret
-.Lfunc_end50:
- .size fcvtzs_ss_round_simd, .Lfunc_end50-fcvtzs_ss_round_simd
- .cfi_endproc
- // -- End function
- .globl fcvtzs_dd_round_simd // -- Begin function fcvtzs_dd_round_simd
- .p2align 2
- .type fcvtzs_dd_round_simd, at function
-fcvtzs_dd_round_simd: // @fcvtzs_dd_round_simd
- .cfi_startproc
-// %bb.0:
- fcvtzs d0, d0
- ret
-.Lfunc_end51:
- .size fcvtzs_dd_round_simd, .Lfunc_end51-fcvtzs_dd_round_simd
- .cfi_endproc
- // -- End function
- .globl fcvtzu_ds_round_simd // -- Begin function fcvtzu_ds_round_simd
- .p2align 2
- .type fcvtzu_ds_round_simd, at function
-fcvtzu_ds_round_simd: // @fcvtzu_ds_round_simd
- .cfi_startproc
-// %bb.0:
- fcvtzu d0, s0
- ret
-.Lfunc_end52:
- .size fcvtzu_ds_round_simd, .Lfunc_end52-fcvtzu_ds_round_simd
- .cfi_endproc
- // -- End function
- .globl fcvtzu_sd_round_simd // -- Begin function fcvtzu_sd_round_simd
- .p2align 2
- .type fcvtzu_sd_round_simd, at function
-fcvtzu_sd_round_simd: // @fcvtzu_sd_round_simd
- .cfi_startproc
-// %bb.0:
- fcvtzu s0, d0
- ret
-.Lfunc_end53:
- .size fcvtzu_sd_round_simd, .Lfunc_end53-fcvtzu_sd_round_simd
- .cfi_endproc
- // -- End function
- .globl fcvtzu_ss_round_simd // -- Begin function fcvtzu_ss_round_simd
- .p2align 2
- .type fcvtzu_ss_round_simd, at function
-fcvtzu_ss_round_simd: // @fcvtzu_ss_round_simd
- .cfi_startproc
-// %bb.0:
- fcvtzs s0, s0
- ret
-.Lfunc_end54:
- .size fcvtzu_ss_round_simd, .Lfunc_end54-fcvtzu_ss_round_simd
- .cfi_endproc
- // -- End function
- .globl fcvtzu_dd_round_simd // -- Begin function fcvtzu_dd_round_simd
- .p2align 2
- .type fcvtzu_dd_round_simd, at function
-fcvtzu_dd_round_simd: // @fcvtzu_dd_round_simd
- .cfi_startproc
-// %bb.0:
- fcvtzs d0, d0
- ret
-.Lfunc_end55:
- .size fcvtzu_dd_round_simd, .Lfunc_end55-fcvtzu_dd_round_simd
- .cfi_endproc
- // -- End function
- .globl fcvtzs_sh_sat_simd // -- Begin function fcvtzs_sh_sat_simd
- .p2align 2
- .type fcvtzs_sh_sat_simd, at function
-fcvtzs_sh_sat_simd: // @fcvtzs_sh_sat_simd
- .cfi_startproc
-// %bb.0:
- fcvtzs s0, h0
- ret
-.Lfunc_end56:
- .size fcvtzs_sh_sat_simd, .Lfunc_end56-fcvtzs_sh_sat_simd
- .cfi_endproc
- // -- End function
- .globl fcvtzs_dh_sat_simd // -- Begin function fcvtzs_dh_sat_simd
- .p2align 2
- .type fcvtzs_dh_sat_simd, at function
-fcvtzs_dh_sat_simd: // @fcvtzs_dh_sat_simd
- .cfi_startproc
-// %bb.0:
- fcvtzs d0, h0
- ret
-.Lfunc_end57:
- .size fcvtzs_dh_sat_simd, .Lfunc_end57-fcvtzs_dh_sat_simd
- .cfi_endproc
- // -- End function
- .globl fcvtzs_ds_sat_simd // -- Begin function fcvtzs_ds_sat_simd
- .p2align 2
- .type fcvtzs_ds_sat_simd, at function
-fcvtzs_ds_sat_simd: // @fcvtzs_ds_sat_simd
- .cfi_startproc
-// %bb.0:
- fcvtzs d0, s0
- ret
-.Lfunc_end58:
- .size fcvtzs_ds_sat_simd, .Lfunc_end58-fcvtzs_ds_sat_simd
- .cfi_endproc
- // -- End function
- .globl fcvtzs_sd_sat_simd // -- Begin function fcvtzs_sd_sat_simd
- .p2align 2
- .type fcvtzs_sd_sat_simd, at function
-fcvtzs_sd_sat_simd: // @fcvtzs_sd_sat_simd
- .cfi_startproc
-// %bb.0:
- fcvtzs s0, d0
- ret
-.Lfunc_end59:
- .size fcvtzs_sd_sat_simd, .Lfunc_end59-fcvtzs_sd_sat_simd
- .cfi_endproc
- // -- End function
- .globl fcvtzs_ss_sat_simd // -- Begin function fcvtzs_ss_sat_simd
- .p2align 2
- .type fcvtzs_ss_sat_simd, at function
-fcvtzs_ss_sat_simd: // @fcvtzs_ss_sat_simd
- .cfi_startproc
-// %bb.0:
- fcvtzs s0, s0
- ret
-.Lfunc_end60:
- .size fcvtzs_ss_sat_simd, .Lfunc_end60-fcvtzs_ss_sat_simd
- .cfi_endproc
- // -- End function
- .globl fcvtzs_dd_sat_simd // -- Begin function fcvtzs_dd_sat_simd
- .p2align 2
- .type fcvtzs_dd_sat_simd, at function
-fcvtzs_dd_sat_simd: // @fcvtzs_dd_sat_simd
- .cfi_startproc
-// %bb.0:
- fcvtzs d0, d0
- ret
-.Lfunc_end61:
- .size fcvtzs_dd_sat_simd, .Lfunc_end61-fcvtzs_dd_sat_simd
- .cfi_endproc
- // -- End function
- .globl fcvtzu_sh_sat_simd // -- Begin function fcvtzu_sh_sat_simd
- .p2align 2
- .type fcvtzu_sh_sat_simd, at function
-fcvtzu_sh_sat_simd: // @fcvtzu_sh_sat_simd
- .cfi_startproc
-// %bb.0:
- fcvtzu s0, h0
- ret
-.Lfunc_end62:
- .size fcvtzu_sh_sat_simd, .Lfunc_end62-fcvtzu_sh_sat_simd
- .cfi_endproc
- // -- End function
- .globl fcvtzu_dh_sat_simd // -- Begin function fcvtzu_dh_sat_simd
- .p2align 2
- .type fcvtzu_dh_sat_simd, at function
-fcvtzu_dh_sat_simd: // @fcvtzu_dh_sat_simd
- .cfi_startproc
-// %bb.0:
- fcvtzu d0, h0
- ret
-.Lfunc_end63:
- .size fcvtzu_dh_sat_simd, .Lfunc_end63-fcvtzu_dh_sat_simd
- .cfi_endproc
- // -- End function
- .globl fcvtzu_ds_sat_simd // -- Begin function fcvtzu_ds_sat_simd
- .p2align 2
- .type fcvtzu_ds_sat_simd, at function
-fcvtzu_ds_sat_simd: // @fcvtzu_ds_sat_simd
- .cfi_startproc
-// %bb.0:
- fcvtzu d0, s0
- ret
-.Lfunc_end64:
- .size fcvtzu_ds_sat_simd, .Lfunc_end64-fcvtzu_ds_sat_simd
- .cfi_endproc
- // -- End function
- .globl fcvtzu_sd_sat_simd // -- Begin function fcvtzu_sd_sat_simd
- .p2align 2
- .type fcvtzu_sd_sat_simd, at function
-fcvtzu_sd_sat_simd: // @fcvtzu_sd_sat_simd
- .cfi_startproc
-// %bb.0:
- fcvtzu s0, d0
- ret
-.Lfunc_end65:
- .size fcvtzu_sd_sat_simd, .Lfunc_end65-fcvtzu_sd_sat_simd
- .cfi_endproc
- // -- End function
- .globl fcvtzu_ss_sat_simd // -- Begin function fcvtzu_ss_sat_simd
- .p2align 2
- .type fcvtzu_ss_sat_simd, at function
-fcvtzu_ss_sat_simd: // @fcvtzu_ss_sat_simd
- .cfi_startproc
-// %bb.0:
- fcvtzs s0, s0
- ret
-.Lfunc_end66:
- .size fcvtzu_ss_sat_simd, .Lfunc_end66-fcvtzu_ss_sat_simd
- .cfi_endproc
- // -- End function
- .globl fcvtzu_dd_sat_simd // -- Begin function fcvtzu_dd_sat_simd
- .p2align 2
- .type fcvtzu_dd_sat_simd, at function
-fcvtzu_dd_sat_simd: // @fcvtzu_dd_sat_simd
- .cfi_startproc
-// %bb.0:
- fcvtzs d0, d0
- ret
-.Lfunc_end67:
- .size fcvtzu_dd_sat_simd, .Lfunc_end67-fcvtzu_dd_sat_simd
- .cfi_endproc
- // -- End function
- .globl fcvtas_sh_simd // -- Begin function fcvtas_sh_simd
- .p2align 2
- .type fcvtas_sh_simd, at function
-fcvtas_sh_simd: // @fcvtas_sh_simd
- .cfi_startproc
-// %bb.0:
- fcvtas s0, h0
- ret
-.Lfunc_end68:
- .size fcvtas_sh_simd, .Lfunc_end68-fcvtas_sh_simd
- .cfi_endproc
- // -- End function
- .globl fcvtas_dh_simd // -- Begin function fcvtas_dh_simd
- .p2align 2
- .type fcvtas_dh_simd, at function
-fcvtas_dh_simd: // @fcvtas_dh_simd
- .cfi_startproc
-// %bb.0:
- fcvtas d0, h0
- ret
-.Lfunc_end69:
- .size fcvtas_dh_simd, .Lfunc_end69-fcvtas_dh_simd
- .cfi_endproc
- // -- End function
- .globl fcvtas_ds_simd // -- Begin function fcvtas_ds_simd
- .p2align 2
- .type fcvtas_ds_simd, at function
-fcvtas_ds_simd: // @fcvtas_ds_simd
- .cfi_startproc
-// %bb.0:
- fcvtas d0, s0
- ret
-.Lfunc_end70:
- .size fcvtas_ds_simd, .Lfunc_end70-fcvtas_ds_simd
- .cfi_endproc
- // -- End function
- .globl fcvtas_sd_simd // -- Begin function fcvtas_sd_simd
- .p2align 2
- .type fcvtas_sd_simd, at function
-fcvtas_sd_simd: // @fcvtas_sd_simd
- .cfi_startproc
-// %bb.0:
- fcvtas s0, d0
- ret
-.Lfunc_end71:
- .size fcvtas_sd_simd, .Lfunc_end71-fcvtas_sd_simd
- .cfi_endproc
- // -- End function
- .globl fcvtas_ss_simd // -- Begin function fcvtas_ss_simd
- .p2align 2
- .type fcvtas_ss_simd, at function
-fcvtas_ss_simd: // @fcvtas_ss_simd
- .cfi_startproc
-// %bb.0:
- fcvtas s0, s0
- ret
-.Lfunc_end72:
- .size fcvtas_ss_simd, .Lfunc_end72-fcvtas_ss_simd
- .cfi_endproc
- // -- End function
- .globl fcvtas_dd_simd // -- Begin function fcvtas_dd_simd
- .p2align 2
- .type fcvtas_dd_simd, at function
-fcvtas_dd_simd: // @fcvtas_dd_simd
- .cfi_startproc
-// %bb.0:
- fcvtas d0, d0
- ret
-.Lfunc_end73:
- .size fcvtas_dd_simd, .Lfunc_end73-fcvtas_dd_simd
- .cfi_endproc
- // -- End function
- .globl fcvtau_sh_simd // -- Begin function fcvtau_sh_simd
- .p2align 2
- .type fcvtau_sh_simd, at function
-fcvtau_sh_simd: // @fcvtau_sh_simd
- .cfi_startproc
-// %bb.0:
- fcvtau s0, h0
- ret
-.Lfunc_end74:
- .size fcvtau_sh_simd, .Lfunc_end74-fcvtau_sh_simd
- .cfi_endproc
- // -- End function
- .globl fcvtau_dh_simd // -- Begin function fcvtau_dh_simd
- .p2align 2
- .type fcvtau_dh_simd, at function
-fcvtau_dh_simd: // @fcvtau_dh_simd
- .cfi_startproc
-// %bb.0:
- fcvtau d0, h0
- ret
-.Lfunc_end75:
- .size fcvtau_dh_simd, .Lfunc_end75-fcvtau_dh_simd
- .cfi_endproc
- // -- End function
- .globl fcvtau_ds_simd // -- Begin function fcvtau_ds_simd
- .p2align 2
- .type fcvtau_ds_simd, at function
-fcvtau_ds_simd: // @fcvtau_ds_simd
- .cfi_startproc
-// %bb.0:
- fcvtau d0, s0
- ret
-.Lfunc_end76:
- .size fcvtau_ds_simd, .Lfunc_end76-fcvtau_ds_simd
- .cfi_endproc
- // -- End function
- .globl fcvtau_sd_simd // -- Begin function fcvtau_sd_simd
- .p2align 2
- .type fcvtau_sd_simd, at function
-fcvtau_sd_simd: // @fcvtau_sd_simd
- .cfi_startproc
-// %bb.0:
- fcvtau s0, d0
- ret
-.Lfunc_end77:
- .size fcvtau_sd_simd, .Lfunc_end77-fcvtau_sd_simd
- .cfi_endproc
- // -- End function
- .globl fcvtau_ss_simd // -- Begin function fcvtau_ss_simd
- .p2align 2
- .type fcvtau_ss_simd, at function
-fcvtau_ss_simd: // @fcvtau_ss_simd
- .cfi_startproc
-// %bb.0:
- fcvtas s0, s0
- ret
-.Lfunc_end78:
- .size fcvtau_ss_simd, .Lfunc_end78-fcvtau_ss_simd
- .cfi_endproc
- // -- End function
- .globl fcvtau_dd_simd // -- Begin function fcvtau_dd_simd
- .p2align 2
- .type fcvtau_dd_simd, at function
-fcvtau_dd_simd: // @fcvtau_dd_simd
- .cfi_startproc
-// %bb.0:
- fcvtas d0, d0
- ret
-.Lfunc_end79:
- .size fcvtau_dd_simd, .Lfunc_end79-fcvtau_dd_simd
- .cfi_endproc
- // -- End function
- .globl fcvtms_sh_simd // -- Begin function fcvtms_sh_simd
- .p2align 2
- .type fcvtms_sh_simd, at function
-fcvtms_sh_simd: // @fcvtms_sh_simd
- .cfi_startproc
-// %bb.0:
- fcvtms s0, h0
- ret
-.Lfunc_end80:
- .size fcvtms_sh_simd, .Lfunc_end80-fcvtms_sh_simd
- .cfi_endproc
- // -- End function
- .globl fcvtms_dh_simd // -- Begin function fcvtms_dh_simd
- .p2align 2
- .type fcvtms_dh_simd, at function
-fcvtms_dh_simd: // @fcvtms_dh_simd
- .cfi_startproc
-// %bb.0:
- fcvtms d0, h0
- ret
-.Lfunc_end81:
- .size fcvtms_dh_simd, .Lfunc_end81-fcvtms_dh_simd
- .cfi_endproc
- // -- End function
- .globl fcvtms_ds_simd // -- Begin function fcvtms_ds_simd
- .p2align 2
- .type fcvtms_ds_simd, at function
-fcvtms_ds_simd: // @fcvtms_ds_simd
- .cfi_startproc
-// %bb.0:
- fcvtms d0, s0
- ret
-.Lfunc_end82:
- .size fcvtms_ds_simd, .Lfunc_end82-fcvtms_ds_simd
- .cfi_endproc
- // -- End function
- .globl fcvtms_sd_simd // -- Begin function fcvtms_sd_simd
- .p2align 2
- .type fcvtms_sd_simd, at function
-fcvtms_sd_simd: // @fcvtms_sd_simd
- .cfi_startproc
-// %bb.0:
- fcvtms s0, d0
- ret
-.Lfunc_end83:
- .size fcvtms_sd_simd, .Lfunc_end83-fcvtms_sd_simd
- .cfi_endproc
- // -- End function
- .globl fcvtms_ss_simd // -- Begin function fcvtms_ss_simd
- .p2align 2
- .type fcvtms_ss_simd, at function
-fcvtms_ss_simd: // @fcvtms_ss_simd
- .cfi_startproc
-// %bb.0:
- fcvtms s0, s0
- ret
-.Lfunc_end84:
- .size fcvtms_ss_simd, .Lfunc_end84-fcvtms_ss_simd
- .cfi_endproc
- // -- End function
- .globl fcvtms_dd_simd // -- Begin function fcvtms_dd_simd
- .p2align 2
- .type fcvtms_dd_simd, at function
-fcvtms_dd_simd: // @fcvtms_dd_simd
- .cfi_startproc
-// %bb.0:
- fcvtms d0, d0
- ret
-.Lfunc_end85:
- .size fcvtms_dd_simd, .Lfunc_end85-fcvtms_dd_simd
- .cfi_endproc
- // -- End function
- .globl fcvtmu_sh_simd // -- Begin function fcvtmu_sh_simd
- .p2align 2
- .type fcvtmu_sh_simd, at function
-fcvtmu_sh_simd: // @fcvtmu_sh_simd
- .cfi_startproc
-// %bb.0:
- fcvtmu s0, h0
- ret
-.Lfunc_end86:
- .size fcvtmu_sh_simd, .Lfunc_end86-fcvtmu_sh_simd
- .cfi_endproc
- // -- End function
- .globl fcvtmu_dh_simd // -- Begin function fcvtmu_dh_simd
- .p2align 2
- .type fcvtmu_dh_simd, at function
-fcvtmu_dh_simd: // @fcvtmu_dh_simd
- .cfi_startproc
-// %bb.0:
- fcvtmu d0, h0
- ret
-.Lfunc_end87:
- .size fcvtmu_dh_simd, .Lfunc_end87-fcvtmu_dh_simd
- .cfi_endproc
- // -- End function
- .globl fcvtmu_ds_simd // -- Begin function fcvtmu_ds_simd
- .p2align 2
- .type fcvtmu_ds_simd, at function
-fcvtmu_ds_simd: // @fcvtmu_ds_simd
- .cfi_startproc
-// %bb.0:
- fcvtmu d0, s0
- ret
-.Lfunc_end88:
- .size fcvtmu_ds_simd, .Lfunc_end88-fcvtmu_ds_simd
- .cfi_endproc
- // -- End function
- .globl fcvtmu_sd_simd // -- Begin function fcvtmu_sd_simd
- .p2align 2
- .type fcvtmu_sd_simd, at function
-fcvtmu_sd_simd: // @fcvtmu_sd_simd
- .cfi_startproc
-// %bb.0:
- fcvtmu s0, d0
- ret
-.Lfunc_end89:
- .size fcvtmu_sd_simd, .Lfunc_end89-fcvtmu_sd_simd
- .cfi_endproc
- // -- End function
- .globl fcvtmu_ss_simd // -- Begin function fcvtmu_ss_simd
- .p2align 2
- .type fcvtmu_ss_simd, at function
-fcvtmu_ss_simd: // @fcvtmu_ss_simd
- .cfi_startproc
-// %bb.0:
- fcvtms s0, s0
- ret
-.Lfunc_end90:
- .size fcvtmu_ss_simd, .Lfunc_end90-fcvtmu_ss_simd
- .cfi_endproc
- // -- End function
- .globl fcvtmu_dd_simd // -- Begin function fcvtmu_dd_simd
- .p2align 2
- .type fcvtmu_dd_simd, at function
-fcvtmu_dd_simd: // @fcvtmu_dd_simd
- .cfi_startproc
-// %bb.0:
- fcvtms d0, d0
- ret
-.Lfunc_end91:
- .size fcvtmu_dd_simd, .Lfunc_end91-fcvtmu_dd_simd
- .cfi_endproc
- // -- End function
- .globl fcvtps_sh_simd // -- Begin function fcvtps_sh_simd
- .p2align 2
- .type fcvtps_sh_simd, at function
-fcvtps_sh_simd: // @fcvtps_sh_simd
- .cfi_startproc
-// %bb.0:
- fcvtps s0, h0
- ret
-.Lfunc_end92:
- .size fcvtps_sh_simd, .Lfunc_end92-fcvtps_sh_simd
- .cfi_endproc
- // -- End function
- .globl fcvtps_dh_simd // -- Begin function fcvtps_dh_simd
- .p2align 2
- .type fcvtps_dh_simd, at function
-fcvtps_dh_simd: // @fcvtps_dh_simd
- .cfi_startproc
-// %bb.0:
- fcvtps d0, h0
- ret
-.Lfunc_end93:
- .size fcvtps_dh_simd, .Lfunc_end93-fcvtps_dh_simd
- .cfi_endproc
- // -- End function
- .globl fcvtps_ds_simd // -- Begin function fcvtps_ds_simd
- .p2align 2
- .type fcvtps_ds_simd, at function
-fcvtps_ds_simd: // @fcvtps_ds_simd
- .cfi_startproc
-// %bb.0:
- fcvtps d0, s0
- ret
-.Lfunc_end94:
- .size fcvtps_ds_simd, .Lfunc_end94-fcvtps_ds_simd
- .cfi_endproc
- // -- End function
- .globl fcvtps_sd_simd // -- Begin function fcvtps_sd_simd
- .p2align 2
- .type fcvtps_sd_simd, at function
-fcvtps_sd_simd: // @fcvtps_sd_simd
- .cfi_startproc
-// %bb.0:
- fcvtps s0, d0
- ret
-.Lfunc_end95:
- .size fcvtps_sd_simd, .Lfunc_end95-fcvtps_sd_simd
- .cfi_endproc
- // -- End function
- .globl fcvtps_ss_simd // -- Begin function fcvtps_ss_simd
- .p2align 2
- .type fcvtps_ss_simd, at function
-fcvtps_ss_simd: // @fcvtps_ss_simd
- .cfi_startproc
-// %bb.0:
- fcvtps s0, s0
- ret
-.Lfunc_end96:
- .size fcvtps_ss_simd, .Lfunc_end96-fcvtps_ss_simd
- .cfi_endproc
- // -- End function
- .globl fcvtps_dd_simd // -- Begin function fcvtps_dd_simd
- .p2align 2
- .type fcvtps_dd_simd, at function
-fcvtps_dd_simd: // @fcvtps_dd_simd
- .cfi_startproc
-// %bb.0:
- fcvtps d0, d0
- ret
-.Lfunc_end97:
- .size fcvtps_dd_simd, .Lfunc_end97-fcvtps_dd_simd
- .cfi_endproc
- // -- End function
- .globl fcvtpu_sh_simd // -- Begin function fcvtpu_sh_simd
- .p2align 2
- .type fcvtpu_sh_simd, at function
-fcvtpu_sh_simd: // @fcvtpu_sh_simd
- .cfi_startproc
-// %bb.0:
- fcvtpu s0, h0
- ret
-.Lfunc_end98:
- .size fcvtpu_sh_simd, .Lfunc_end98-fcvtpu_sh_simd
- .cfi_endproc
- // -- End function
- .globl fcvtpu_dh_simd // -- Begin function fcvtpu_dh_simd
- .p2align 2
- .type fcvtpu_dh_simd, at function
-fcvtpu_dh_simd: // @fcvtpu_dh_simd
- .cfi_startproc
-// %bb.0:
- fcvtpu d0, h0
- ret
-.Lfunc_end99:
- .size fcvtpu_dh_simd, .Lfunc_end99-fcvtpu_dh_simd
- .cfi_endproc
- // -- End function
- .globl fcvtpu_ds_simd // -- Begin function fcvtpu_ds_simd
- .p2align 2
- .type fcvtpu_ds_simd, at function
-fcvtpu_ds_simd: // @fcvtpu_ds_simd
- .cfi_startproc
-// %bb.0:
- fcvtpu d0, s0
- ret
-.Lfunc_end100:
- .size fcvtpu_ds_simd, .Lfunc_end100-fcvtpu_ds_simd
- .cfi_endproc
- // -- End function
- .globl fcvtpu_sd_simd // -- Begin function fcvtpu_sd_simd
- .p2align 2
- .type fcvtpu_sd_simd, at function
-fcvtpu_sd_simd: // @fcvtpu_sd_simd
- .cfi_startproc
-// %bb.0:
- fcvtpu s0, d0
- ret
-.Lfunc_end101:
- .size fcvtpu_sd_simd, .Lfunc_end101-fcvtpu_sd_simd
- .cfi_endproc
- // -- End function
- .globl fcvtpu_ss_simd // -- Begin function fcvtpu_ss_simd
- .p2align 2
- .type fcvtpu_ss_simd, at function
-fcvtpu_ss_simd: // @fcvtpu_ss_simd
- .cfi_startproc
-// %bb.0:
- fcvtps s0, s0
- ret
-.Lfunc_end102:
- .size fcvtpu_ss_simd, .Lfunc_end102-fcvtpu_ss_simd
- .cfi_endproc
- // -- End function
- .globl fcvtpu_dd_simd // -- Begin function fcvtpu_dd_simd
- .p2align 2
- .type fcvtpu_dd_simd, at function
-fcvtpu_dd_simd: // @fcvtpu_dd_simd
- .cfi_startproc
-// %bb.0:
- fcvtps d0, d0
- ret
-.Lfunc_end103:
- .size fcvtpu_dd_simd, .Lfunc_end103-fcvtpu_dd_simd
- .cfi_endproc
- // -- End function
- .globl fcvtzs_sh_simd // -- Begin function fcvtzs_sh_simd
- .p2align 2
- .type fcvtzs_sh_simd, at function
-fcvtzs_sh_simd: // @fcvtzs_sh_simd
- .cfi_startproc
-// %bb.0:
- fcvtzs s0, h0
- ret
-.Lfunc_end104:
- .size fcvtzs_sh_simd, .Lfunc_end104-fcvtzs_sh_simd
- .cfi_endproc
- // -- End function
- .globl fcvtzs_dh_simd // -- Begin function fcvtzs_dh_simd
- .p2align 2
- .type fcvtzs_dh_simd, at function
-fcvtzs_dh_simd: // @fcvtzs_dh_simd
- .cfi_startproc
-// %bb.0:
- fcvtzs d0, h0
- ret
-.Lfunc_end105:
- .size fcvtzs_dh_simd, .Lfunc_end105-fcvtzs_dh_simd
- .cfi_endproc
- // -- End function
- .globl fcvtzs_ds_simd // -- Begin function fcvtzs_ds_simd
- .p2align 2
- .type fcvtzs_ds_simd, at function
-fcvtzs_ds_simd: // @fcvtzs_ds_simd
- .cfi_startproc
-// %bb.0:
- fcvtzs d0, s0
- ret
-.Lfunc_end106:
- .size fcvtzs_ds_simd, .Lfunc_end106-fcvtzs_ds_simd
- .cfi_endproc
- // -- End function
- .globl fcvtzs_sd_simd // -- Begin function fcvtzs_sd_simd
- .p2align 2
- .type fcvtzs_sd_simd, at function
-fcvtzs_sd_simd: // @fcvtzs_sd_simd
- .cfi_startproc
-// %bb.0:
- fcvtzs s0, d0
- ret
-.Lfunc_end107:
- .size fcvtzs_sd_simd, .Lfunc_end107-fcvtzs_sd_simd
- .cfi_endproc
- // -- End function
- .globl fcvtzs_ss_simd // -- Begin function fcvtzs_ss_simd
- .p2align 2
- .type fcvtzs_ss_simd, at function
-fcvtzs_ss_simd: // @fcvtzs_ss_simd
- .cfi_startproc
-// %bb.0:
- fcvtzs s0, s0
- ret
-.Lfunc_end108:
- .size fcvtzs_ss_simd, .Lfunc_end108-fcvtzs_ss_simd
- .cfi_endproc
- // -- End function
- .globl fcvtzs_dd_simd // -- Begin function fcvtzs_dd_simd
- .p2align 2
- .type fcvtzs_dd_simd, at function
-fcvtzs_dd_simd: // @fcvtzs_dd_simd
- .cfi_startproc
-// %bb.0:
- fcvtzs d0, d0
- ret
-.Lfunc_end109:
- .size fcvtzs_dd_simd, .Lfunc_end109-fcvtzs_dd_simd
- .cfi_endproc
- // -- End function
- .globl fcvtzu_sh_simd // -- Begin function fcvtzu_sh_simd
- .p2align 2
- .type fcvtzu_sh_simd, at function
-fcvtzu_sh_simd: // @fcvtzu_sh_simd
- .cfi_startproc
-// %bb.0:
- fcvtzu s0, h0
- ret
-.Lfunc_end110:
- .size fcvtzu_sh_simd, .Lfunc_end110-fcvtzu_sh_simd
- .cfi_endproc
- // -- End function
- .globl fcvtzu_dh_simd // -- Begin function fcvtzu_dh_simd
- .p2align 2
- .type fcvtzu_dh_simd, at function
-fcvtzu_dh_simd: // @fcvtzu_dh_simd
- .cfi_startproc
-// %bb.0:
- fcvtzu d0, h0
- ret
-.Lfunc_end111:
- .size fcvtzu_dh_simd, .Lfunc_end111-fcvtzu_dh_simd
- .cfi_endproc
- // -- End function
- .globl fcvtzu_ds_simd // -- Begin function fcvtzu_ds_simd
- .p2align 2
- .type fcvtzu_ds_simd, at function
-fcvtzu_ds_simd: // @fcvtzu_ds_simd
- .cfi_startproc
-// %bb.0:
- fcvtzu d0, s0
- ret
-.Lfunc_end112:
- .size fcvtzu_ds_simd, .Lfunc_end112-fcvtzu_ds_simd
- .cfi_endproc
- // -- End function
- .globl fcvtzu_sd_simd // -- Begin function fcvtzu_sd_simd
- .p2align 2
- .type fcvtzu_sd_simd, at function
-fcvtzu_sd_simd: // @fcvtzu_sd_simd
- .cfi_startproc
-// %bb.0:
- fcvtzu s0, d0
- ret
-.Lfunc_end113:
- .size fcvtzu_sd_simd, .Lfunc_end113-fcvtzu_sd_simd
- .cfi_endproc
- // -- End function
- .globl fcvtzu_ss_simd // -- Begin function fcvtzu_ss_simd
- .p2align 2
- .type fcvtzu_ss_simd, at function
-fcvtzu_ss_simd: // @fcvtzu_ss_simd
- .cfi_startproc
-// %bb.0:
- fcvtzu s0, s0
- ret
-.Lfunc_end114:
- .size fcvtzu_ss_simd, .Lfunc_end114-fcvtzu_ss_simd
- .cfi_endproc
- // -- End function
- .globl fcvtzu_dd_simd // -- Begin function fcvtzu_dd_simd
- .p2align 2
- .type fcvtzu_dd_simd, at function
-fcvtzu_dd_simd: // @fcvtzu_dd_simd
- .cfi_startproc
-// %bb.0:
- fcvtzu d0, d0
- ret
-.Lfunc_end115:
- .size fcvtzu_dd_simd, .Lfunc_end115-fcvtzu_dd_simd
- .cfi_endproc
- // -- End function
- .globl fcvtzs_scalar_to_vector_h // -- Begin function fcvtzs_scalar_to_vector_h
- .p2align 2
- .type fcvtzs_scalar_to_vector_h, at function
-fcvtzs_scalar_to_vector_h: // @fcvtzs_scalar_to_vector_h
- .cfi_startproc
-// %bb.0:
- fcvtzs d0, h0
- ret
-.Lfunc_end116:
- .size fcvtzs_scalar_to_vector_h, .Lfunc_end116-fcvtzs_scalar_to_vector_h
- .cfi_endproc
- // -- End function
- .globl fcvtzs_scalar_to_vector_s // -- Begin function fcvtzs_scalar_to_vector_s
- .p2align 2
- .type fcvtzs_scalar_to_vector_s, at function
-fcvtzs_scalar_to_vector_s: // @fcvtzs_scalar_to_vector_s
- .cfi_startproc
-// %bb.0:
- fcvtzs d0, s0
- ret
-.Lfunc_end117:
- .size fcvtzs_scalar_to_vector_s, .Lfunc_end117-fcvtzs_scalar_to_vector_s
- .cfi_endproc
- // -- End function
- .globl fcvtzs_scalar_to_vector_d // -- Begin function fcvtzs_scalar_to_vector_d
- .p2align 2
- .type fcvtzs_scalar_to_vector_d, at function
-fcvtzs_scalar_to_vector_d: // @fcvtzs_scalar_to_vector_d
- .cfi_startproc
-// %bb.0:
- fcvtzs d0, d0
- ret
-.Lfunc_end118:
- .size fcvtzs_scalar_to_vector_d, .Lfunc_end118-fcvtzs_scalar_to_vector_d
- .cfi_endproc
- // -- End function
- .globl fcvtzu_scalar_to_vector_h // -- Begin function fcvtzu_scalar_to_vector_h
- .p2align 2
- .type fcvtzu_scalar_to_vector_h, at function
-fcvtzu_scalar_to_vector_h: // @fcvtzu_scalar_to_vector_h
- .cfi_startproc
-// %bb.0:
- fcvtzu d0, h0
- ret
-.Lfunc_end119:
- .size fcvtzu_scalar_to_vector_h, .Lfunc_end119-fcvtzu_scalar_to_vector_h
- .cfi_endproc
- // -- End function
- .globl fcvtzu_scalar_to_vector_s // -- Begin function fcvtzu_scalar_to_vector_s
- .p2align 2
- .type fcvtzu_scalar_to_vector_s, at function
-fcvtzu_scalar_to_vector_s: // @fcvtzu_scalar_to_vector_s
- .cfi_startproc
-// %bb.0:
- fcvtzu d0, s0
- ret
-.Lfunc_end120:
- .size fcvtzu_scalar_to_vector_s, .Lfunc_end120-fcvtzu_scalar_to_vector_s
- .cfi_endproc
- // -- End function
- .globl fcvtzu_scalar_to_vector_d // -- Begin function fcvtzu_scalar_to_vector_d
- .p2align 2
- .type fcvtzu_scalar_to_vector_d, at function
-fcvtzu_scalar_to_vector_d: // @fcvtzu_scalar_to_vector_d
- .cfi_startproc
-// %bb.0:
- fcvtzu d0, d0
- ret
-.Lfunc_end121:
- .size fcvtzu_scalar_to_vector_d, .Lfunc_end121-fcvtzu_scalar_to_vector_d
- .cfi_endproc
- // -- End function
- .globl fcvtzs_scalar_to_vector_h_strict // -- Begin function fcvtzs_scalar_to_vector_h_strict
- .p2align 2
- .type fcvtzs_scalar_to_vector_h_strict, at function
-fcvtzs_scalar_to_vector_h_strict: // @fcvtzs_scalar_to_vector_h_strict
- .cfi_startproc
-// %bb.0:
- fcvtzs d0, h0
- ret
-.Lfunc_end122:
- .size fcvtzs_scalar_to_vector_h_strict, .Lfunc_end122-fcvtzs_scalar_to_vector_h_strict
- .cfi_endproc
- // -- End function
- .globl fcvtzs_scalar_to_vector_s_strict // -- Begin function fcvtzs_scalar_to_vector_s_strict
- .p2align 2
- .type fcvtzs_scalar_to_vector_s_strict, at function
-fcvtzs_scalar_to_vector_s_strict: // @fcvtzs_scalar_to_vector_s_strict
- .cfi_startproc
-// %bb.0:
- fcvtzs d0, s0
- ret
-.Lfunc_end123:
- .size fcvtzs_scalar_to_vector_s_strict, .Lfunc_end123-fcvtzs_scalar_to_vector_s_strict
- .cfi_endproc
- // -- End function
- .globl fcvtzu_scalar_to_vector_h_strict // -- Begin function fcvtzu_scalar_to_vector_h_strict
- .p2align 2
- .type fcvtzu_scalar_to_vector_h_strict, at function
-fcvtzu_scalar_to_vector_h_strict: // @fcvtzu_scalar_to_vector_h_strict
- .cfi_startproc
-// %bb.0:
- fcvtzu d0, h0
- ret
-.Lfunc_end124:
- .size fcvtzu_scalar_to_vector_h_strict, .Lfunc_end124-fcvtzu_scalar_to_vector_h_strict
- .cfi_endproc
- // -- End function
- .globl fcvtzu_scalar_to_vector_s_strict // -- Begin function fcvtzu_scalar_to_vector_s_strict
- .p2align 2
- .type fcvtzu_scalar_to_vector_s_strict, at function
-fcvtzu_scalar_to_vector_s_strict: // @fcvtzu_scalar_to_vector_s_strict
- .cfi_startproc
-// %bb.0:
- fcvtzu d0, s0
- ret
-.Lfunc_end125:
- .size fcvtzu_scalar_to_vector_s_strict, .Lfunc_end125-fcvtzu_scalar_to_vector_s_strict
- .cfi_endproc
- // -- End function
- .section ".note.GNU-stack","", at progbits
diff --git a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll
index 8b8f23a049107..68c24f2a30709 100644
--- a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll
@@ -4,7 +4,7 @@
;
-; Intriniscs (bitcast)
+; Intrinsics (bitcast)
;
define float @fcvtas_1s1d_simd(double %A) nounwind {
>From da8e86fb8c9b2c1cfbcbf65fa332564807d1c2fc Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Thu, 15 Jan 2026 14:42:57 +0000
Subject: [PATCH 3/8] Add missing patterns address review comments
---
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 66 +-
.../AArch64/arm64-cvt-simd-fptoi-strictfp.ll | 575 ++++++++
.../CodeGen/AArch64/arm64-cvt-simd-fptoi.ll | 567 ++++----
.../AArch64/arm64-cvt-simd-intrinsics.ll | 1172 ++++++++++++++---
llvm/test/CodeGen/AArch64/arm64-vcvt.ll | 1 -
5 files changed, 1940 insertions(+), 441 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi-strictfp.ll
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 447fd9ef66343..6a0fe9b4619c6 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -6563,19 +6563,44 @@ multiclass FPToIntegerSIMDScalarPatterns<SDPatternOperator OpN, string INST> {
(!cast<Instruction>(INST # DHr) FPR16:$Rn)>;
def : Pat<(f64 (bitconvert (i64 (OpN (f32 FPR32:$Rn))))),
(!cast<Instruction>(INST # DSr) FPR32:$Rn)>;
+ }
+ def : Pat<(f32 (bitconvert (i32 (OpN (f32 FPR32:$Rn))))),
+ (!cast<Instruction>(INST # v1i32) FPR32:$Rn)>;
+ def : Pat<(f64 (bitconvert (i64 (OpN (f64 FPR64:$Rn))))),
+ (!cast<Instruction>(INST # v1i64) FPR64:$Rn)>;
+ foreach ret_type = [v2i32, v4i32] in {
+ let Predicates = [HasFPRCVT] in {
+ def : Pat<(ret_type (scalar_to_vector (i32 (OpN (f64 FPR64:$Rn))))),
+ (INSERT_SUBREG (IMPLICIT_DEF),
+ (!cast<Instruction>(INST # SDr) FPR64:$Rn), ssub)>;
+ def : Pat<(ret_type (scalar_to_vector (i32 (OpN (f16 FPR16:$Rn))))),
+ (INSERT_SUBREG (IMPLICIT_DEF),
+ (!cast<Instruction>(INST # SHr) FPR16:$Rn), ssub)>;
+ }
+ def : Pat<(ret_type (scalar_to_vector (i32 (OpN (f32 FPR32:$Rn))))),
+ (INSERT_SUBREG (IMPLICIT_DEF),
+ (!cast<Instruction>(INST # v1i32) FPR32:$Rn), ssub)>;
+ }
+
+ let Predicates = [HasFPRCVT] in {
def : Pat<(v1i64 (scalar_to_vector (i64 (OpN (f16 FPR16:$Rn))))),
(!cast<Instruction>(INST # DHr) FPR16:$Rn)>;
def : Pat<(v1i64 (scalar_to_vector (i64 (OpN (f32 FPR32:$Rn))))),
(!cast<Instruction>(INST # DSr) FPR32:$Rn)>;
+ def : Pat<(v2i64 (scalar_to_vector (i64 (OpN (f16 FPR16:$Rn))))),
+ (INSERT_SUBREG (IMPLICIT_DEF),
+ (!cast<Instruction>(INST # DHr) FPR16:$Rn), dsub)>;
+ def : Pat<(v2i64 (scalar_to_vector (i64 (OpN (f32 FPR32:$Rn))))),
+ (INSERT_SUBREG (IMPLICIT_DEF),
+ (!cast<Instruction>(INST # DSr) FPR32:$Rn), dsub)>;
}
- def : Pat<(f32 (bitconvert (i32 (OpN (f32 FPR32:$Rn))))),
- (!cast<Instruction>(INST # v1i32) FPR32:$Rn)>;
- def : Pat<(f64 (bitconvert (i64 (OpN (f64 FPR64:$Rn))))),
- (!cast<Instruction>(INST # v1i64) FPR64:$Rn)>;
def : Pat<(v1i64 (scalar_to_vector (i64 (OpN (f64 FPR64:$Rn))))),
(!cast<Instruction>(INST # v1i64) FPR64:$Rn)>;
+ def : Pat<(v2i64 (scalar_to_vector (i64 (OpN (f64 FPR64:$Rn))))),
+ (INSERT_SUBREG (IMPLICIT_DEF),
+ (!cast<Instruction>(INST # v1i64) FPR64:$Rn), dsub)>;
}
defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtas, "FCVTAS">;
defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtau, "FCVTAU">;
@@ -6618,19 +6643,44 @@ multiclass FPToIntegerIntPats<Intrinsic round, string INST> {
(!cast<Instruction>(INST # DSr) $Rn)>;
def : Pat<(f32 (bitconvert (i32 (round f64:$Rn)))),
(!cast<Instruction>(INST # SDr) $Rn)>;
+ }
+ def : Pat<(f32 (bitconvert (i32 (round f32:$Rn)))),
+ (!cast<Instruction>(INST # v1i32) $Rn)>;
+ def : Pat<(f64 (bitconvert (i64 (round f64:$Rn)))),
+ (!cast<Instruction>(INST # v1i64) $Rn)>;
+ foreach ret_type = [v2i32, v4i32] in {
+ let Predicates = [HasFPRCVT] in {
+ def : Pat<(ret_type (scalar_to_vector (i32 (round f16:$Rn)))),
+ (INSERT_SUBREG (IMPLICIT_DEF),
+ (!cast<Instruction>(INST # SHr) $Rn), ssub)>;
+ def : Pat<(ret_type (scalar_to_vector (i32 (round f64:$Rn)))),
+ (INSERT_SUBREG (IMPLICIT_DEF),
+ (!cast<Instruction>(INST # SDr) $Rn), ssub)>;
+ }
+ def : Pat<(ret_type (scalar_to_vector (i32 (round f32:$Rn)))),
+ (INSERT_SUBREG (IMPLICIT_DEF),
+ (!cast<Instruction>(INST # v1i32) $Rn), ssub)>;
+ }
+
+ let Predicates = [HasFPRCVT] in {
def : Pat<(v1i64 (scalar_to_vector (i64 (round f16:$Rn)))),
(!cast<Instruction>(INST # DHr) $Rn)>;
def : Pat<(v1i64 (scalar_to_vector (i64 (round f32:$Rn)))),
(!cast<Instruction>(INST # DSr) $Rn)>;
+ def : Pat<(v2i64 (scalar_to_vector (i64 (round f16:$Rn)))),
+ (INSERT_SUBREG (IMPLICIT_DEF),
+ (!cast<Instruction>(INST # DHr) $Rn), dsub)>;
+ def : Pat<(v2i64 (scalar_to_vector (i64 (round f32:$Rn)))),
+ (INSERT_SUBREG (IMPLICIT_DEF),
+ (!cast<Instruction>(INST # DSr) $Rn), dsub)>;
}
- def : Pat<(f32 (bitconvert (i32 (round f32:$Rn)))),
- (!cast<Instruction>(INST # v1i32) $Rn)>;
- def : Pat<(f64 (bitconvert (i64 (round f64:$Rn)))),
- (!cast<Instruction>(INST # v1i64) $Rn)>;
def : Pat<(v1i64 (scalar_to_vector (i64 (round f64:$Rn)))),
(!cast<Instruction>(INST # v1i64) $Rn)>;
+ def : Pat<(v2i64 (scalar_to_vector (i64 (round f64:$Rn)))),
+ (INSERT_SUBREG (IMPLICIT_DEF),
+ (!cast<Instruction>(INST # v1i64) $Rn), dsub)>;
let Predicates = [HasFullFP16] in {
def : Pat<(i32 (round (fmul f16:$Rn, fixedpoint_f16_i32:$scale))),
diff --git a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi-strictfp.ll b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi-strictfp.ll
new file mode 100644
index 0000000000000..1afe981ea816c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi-strictfp.ll
@@ -0,0 +1,575 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple aarch64-unknown-unknown -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK-NOFPRCVT
+; RUN: llc < %s -mtriple aarch64-unknown-unknown -mattr=+fprcvt,+fullfp16 | FileCheck %s --check-prefixes=CHECK
+
+;
+; FPTOI strictfp
+;
+
+define float @fptosi_i32_f16_simd(half %x) {
+; CHECK-NOFPRCVT-LABEL: fptosi_i32_f16_simd:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzs w8, h0
+; CHECK-NOFPRCVT-NEXT: fmov s0, w8
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fptosi_i32_f16_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs s0, h0
+; CHECK-NEXT: ret
+ %val = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %x, metadata !"fpexcept.strict")
+ %sum = bitcast i32 %val to float
+ ret float %sum
+}
+
+define double @fptosi_i64_f16_simd(half %x) {
+; CHECK-NOFPRCVT-LABEL: fptosi_i64_f16_simd:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzs x8, h0
+; CHECK-NOFPRCVT-NEXT: fmov d0, x8
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fptosi_i64_f16_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs d0, h0
+; CHECK-NEXT: ret
+ %val = call i64 @llvm.experimental.constrained.fptosi.i64.f16(half %x, metadata !"fpexcept.strict")
+ %sum = bitcast i64 %val to double
+ ret double %sum
+}
+
+define double @fptosi_i64_f32_simd(float %x) {
+; CHECK-NOFPRCVT-LABEL: fptosi_i64_f32_simd:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzs x8, s0
+; CHECK-NOFPRCVT-NEXT: fmov d0, x8
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fptosi_i64_f32_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs d0, s0
+; CHECK-NEXT: ret
+ %val = call i64 @llvm.experimental.constrained.fptosi.i64.f32(float %x, metadata !"fpexcept.strict")
+ %bc = bitcast i64 %val to double
+ ret double %bc
+}
+
+define float @fptosi_i32_f64_simd(double %x) {
+; CHECK-NOFPRCVT-LABEL: fptosi_i32_f64_simd:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzs w8, d0
+; CHECK-NOFPRCVT-NEXT: fmov s0, w8
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fptosi_i32_f64_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs s0, d0
+; CHECK-NEXT: ret
+ %val = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %x, metadata !"fpexcept.strict")
+ %bc = bitcast i32 %val to float
+ ret float %bc
+}
+
+define double @fptosi_i64_f64_simd(double %x) {
+; CHECK-NOFPRCVT-LABEL: fptosi_i64_f64_simd:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzs d0, d0
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fptosi_i64_f64_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs d0, d0
+; CHECK-NEXT: ret
+ %val = call i64 @llvm.experimental.constrained.fptosi.i64.f64(double %x, metadata !"fpexcept.strict")
+ %bc = bitcast i64 %val to double
+ ret double %bc
+}
+
+define float @fptosi_i32_f32_simd(float %x) {
+; CHECK-NOFPRCVT-LABEL: fptosi_i32_f32_simd:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzs s0, s0
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fptosi_i32_f32_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs s0, s0
+; CHECK-NEXT: ret
+ %val = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %x, metadata !"fpexcept.strict")
+ %bc = bitcast i32 %val to float
+ ret float %bc
+}
+
+define float @fptoui_i32_f16_simd(half %x) {
+; CHECK-NOFPRCVT-LABEL: fptoui_i32_f16_simd:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzu w8, h0
+; CHECK-NOFPRCVT-NEXT: fmov s0, w8
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fptoui_i32_f16_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu s0, h0
+; CHECK-NEXT: ret
+ %val = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %x, metadata !"fpexcept.strict")
+ %sum = bitcast i32 %val to float
+ ret float %sum
+}
+
+define double @fptoui_i64_f16_simd(half %x) {
+; CHECK-NOFPRCVT-LABEL: fptoui_i64_f16_simd:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzu x8, h0
+; CHECK-NOFPRCVT-NEXT: fmov d0, x8
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fptoui_i64_f16_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu d0, h0
+; CHECK-NEXT: ret
+ %val = call i64 @llvm.experimental.constrained.fptoui.i64.f16(half %x, metadata !"fpexcept.strict")
+ %sum = bitcast i64 %val to double
+ ret double %sum
+}
+
+define double @fptoui_i64_f32_simd(float %x) {
+; CHECK-NOFPRCVT-LABEL: fptoui_i64_f32_simd:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzu x8, s0
+; CHECK-NOFPRCVT-NEXT: fmov d0, x8
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fptoui_i64_f32_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu d0, s0
+; CHECK-NEXT: ret
+ %val = call i64 @llvm.experimental.constrained.fptoui.i64.f32(float %x, metadata !"fpexcept.strict")
+ %bc = bitcast i64 %val to double
+ ret double %bc
+}
+
+define float @fptoui_i32_f64_simd(double %x) {
+; CHECK-NOFPRCVT-LABEL: fptoui_i32_f64_simd:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzu w8, d0
+; CHECK-NOFPRCVT-NEXT: fmov s0, w8
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fptoui_i32_f64_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu s0, d0
+; CHECK-NEXT: ret
+ %val = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %x, metadata !"fpexcept.strict")
+ %bc = bitcast i32 %val to float
+ ret float %bc
+}
+
+define double @fptoui_i64_f64_simd(double %x) {
+; CHECK-NOFPRCVT-LABEL: fptoui_i64_f64_simd:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzu d0, d0
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fptoui_i64_f64_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu d0, d0
+; CHECK-NEXT: ret
+ %val = call i64 @llvm.experimental.constrained.fptoui.i64.f64(double %x, metadata !"fpexcept.strict")
+ %bc = bitcast i64 %val to double
+ ret double %bc
+}
+
+define float @fptoui_i32_f32_simd(float %x) {
+; CHECK-NOFPRCVT-LABEL: fptoui_i32_f32_simd:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzu s0, s0
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fptoui_i32_f32_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu s0, s0
+; CHECK-NEXT: ret
+ %val = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %x, metadata !"fpexcept.strict")
+ %bc = bitcast i32 %val to float
+ ret float %bc
+}
+
+;
+; FPTOI scalar_to_vector strictfp
+;
+
+define <2 x i32> @fcvtzs_v2i32_from_f16_scalar_to_vector_simd_strict(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_v2i32_from_f16_scalar_to_vector_simd_strict:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzs w8, h0
+; CHECK-NOFPRCVT-NEXT: fmov s0, w8
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtzs_v2i32_from_f16_scalar_to_vector_simd_strict:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs s0, h0
+; CHECK-NEXT: ret
+ %fcvtzs_scalar = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %a, metadata !"fpexcept.strict")
+ %fcvtzs_vector = insertelement <2 x i32> poison, i32 %fcvtzs_scalar, i32 0
+ ret <2 x i32> %fcvtzs_vector
+}
+
+define <2 x i32> @fcvtzs_v2i32_from_f32_scalar_to_vector_simd_strict(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_v2i32_from_f32_scalar_to_vector_simd_strict:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzs s0, s0
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtzs_v2i32_from_f32_scalar_to_vector_simd_strict:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs s0, s0
+; CHECK-NEXT: ret
+ %fcvtzs_scalar = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %a, metadata !"fpexcept.strict")
+ %fcvtzs_vector = insertelement <2 x i32> poison, i32 %fcvtzs_scalar, i32 0
+ ret <2 x i32> %fcvtzs_vector
+}
+
+define <2 x i32> @fcvtzs_v2i32_from_f64_scalar_to_vector_simd_strict(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_v2i32_from_f64_scalar_to_vector_simd_strict:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzs w8, d0
+; CHECK-NOFPRCVT-NEXT: fmov s0, w8
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtzs_v2i32_from_f64_scalar_to_vector_simd_strict:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs s0, d0
+; CHECK-NEXT: ret
+ %fcvtzs_scalar = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %a, metadata !"fpexcept.strict")
+ %fcvtzs_vector = insertelement <2 x i32> poison, i32 %fcvtzs_scalar, i32 0
+ ret <2 x i32> %fcvtzs_vector
+}
+
+define <4 x i32> @fcvtzs_v4i32_from_f16_scalar_to_vector_simd_strict(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_v4i32_from_f16_scalar_to_vector_simd_strict:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzs w8, h0
+; CHECK-NOFPRCVT-NEXT: fmov s0, w8
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtzs_v4i32_from_f16_scalar_to_vector_simd_strict:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs s0, h0
+; CHECK-NEXT: ret
+ %fcvtzs_scalar = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %a, metadata !"fpexcept.strict")
+ %fcvtzs_vector = insertelement <4 x i32> poison, i32 %fcvtzs_scalar, i32 0
+ ret <4 x i32> %fcvtzs_vector
+}
+
+define <4 x i32> @fcvtzs_v4i32_from_f32_scalar_to_vector_simd_strict(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_v4i32_from_f32_scalar_to_vector_simd_strict:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzs s0, s0
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtzs_v4i32_from_f32_scalar_to_vector_simd_strict:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs s0, s0
+; CHECK-NEXT: ret
+ %fcvtzs_scalar = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %a, metadata !"fpexcept.strict")
+ %fcvtzs_vector = insertelement <4 x i32> poison, i32 %fcvtzs_scalar, i32 0
+ ret <4 x i32> %fcvtzs_vector
+}
+
+define <4 x i32> @fcvtzs_v4i32_from_f64_scalar_to_vector_simd_strict(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_v4i32_from_f64_scalar_to_vector_simd_strict:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzs w8, d0
+; CHECK-NOFPRCVT-NEXT: fmov s0, w8
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtzs_v4i32_from_f64_scalar_to_vector_simd_strict:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs s0, d0
+; CHECK-NEXT: ret
+ %fcvtzs_scalar = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %a, metadata !"fpexcept.strict")
+ %fcvtzs_vector = insertelement <4 x i32> poison, i32 %fcvtzs_scalar, i32 0
+ ret <4 x i32> %fcvtzs_vector
+}
+
+define <1 x i64> @fcvtzs_v1i64_from_f16_scalar_to_vector_simd_strict(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_v1i64_from_f16_scalar_to_vector_simd_strict:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzs x8, h0
+; CHECK-NOFPRCVT-NEXT: fmov d0, x8
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtzs_v1i64_from_f16_scalar_to_vector_simd_strict:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs d0, h0
+; CHECK-NEXT: ret
+ %fcvtzs_scalar = call i64 @llvm.experimental.constrained.fptosi.i64.f16(half %a, metadata !"fpexcept.strict")
+ %fcvtzs_vector = insertelement <1 x i64> poison, i64 %fcvtzs_scalar, i32 0
+ ret <1 x i64> %fcvtzs_vector
+}
+
+define <1 x i64> @fcvtzs_v1i64_from_f32_scalar_to_vector_simd_strict(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_v1i64_from_f32_scalar_to_vector_simd_strict:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzs x8, s0
+; CHECK-NOFPRCVT-NEXT: fmov d0, x8
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtzs_v1i64_from_f32_scalar_to_vector_simd_strict:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs d0, s0
+; CHECK-NEXT: ret
+ %fcvtzs_scalar = call i64 @llvm.experimental.constrained.fptosi.i64.f32(float %a, metadata !"fpexcept.strict")
+ %fcvtzs_vector = insertelement <1 x i64> poison, i64 %fcvtzs_scalar, i32 0
+ ret <1 x i64> %fcvtzs_vector
+}
+
+define <1 x i64> @fcvtzs_v1i64_from_f64_scalar_to_vector_simd_strict(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_v1i64_from_f64_scalar_to_vector_simd_strict:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzs d0, d0
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtzs_v1i64_from_f64_scalar_to_vector_simd_strict:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs d0, d0
+; CHECK-NEXT: ret
+ %fcvtzs_scalar = call i64 @llvm.experimental.constrained.fptosi.i64.f64(double %a, metadata !"fpexcept.strict")
+ %fcvtzs_vector = insertelement <1 x i64> poison, i64 %fcvtzs_scalar, i32 0
+ ret <1 x i64> %fcvtzs_vector
+}
+
+define <2 x i64> @fcvtzs_v2i64_from_f16_scalar_to_vector_simd_strict(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_v2i64_from_f16_scalar_to_vector_simd_strict:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzs x8, h0
+; CHECK-NOFPRCVT-NEXT: fmov d0, x8
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtzs_v2i64_from_f16_scalar_to_vector_simd_strict:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs d0, h0
+; CHECK-NEXT: ret
+ %fcvtzs_scalar = call i64 @llvm.experimental.constrained.fptosi.i64.f16(half %a, metadata !"fpexcept.strict")
+ %fcvtzs_vector = insertelement <2 x i64> poison, i64 %fcvtzs_scalar, i32 0
+ ret <2 x i64> %fcvtzs_vector
+}
+
+define <2 x i64> @fcvtzs_v2i64_from_f32_scalar_to_vector_simd_strict(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_v2i64_from_f32_scalar_to_vector_simd_strict:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzs x8, s0
+; CHECK-NOFPRCVT-NEXT: fmov d0, x8
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtzs_v2i64_from_f32_scalar_to_vector_simd_strict:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs d0, s0
+; CHECK-NEXT: ret
+ %fcvtzs_scalar = call i64 @llvm.experimental.constrained.fptosi.i64.f32(float %a, metadata !"fpexcept.strict")
+ %fcvtzs_vector = insertelement <2 x i64> poison, i64 %fcvtzs_scalar, i32 0
+ ret <2 x i64> %fcvtzs_vector
+}
+
+define <2 x i64> @fcvtzs_v2i64_from_f64_scalar_to_vector_simd_strict(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_v2i64_from_f64_scalar_to_vector_simd_strict:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzs d0, d0
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtzs_v2i64_from_f64_scalar_to_vector_simd_strict:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs d0, d0
+; CHECK-NEXT: ret
+ %fcvtzs_scalar = call i64 @llvm.experimental.constrained.fptosi.i64.f64(double %a, metadata !"fpexcept.strict")
+ %fcvtzs_vector = insertelement <2 x i64> poison, i64 %fcvtzs_scalar, i32 0
+ ret <2 x i64> %fcvtzs_vector
+}
+
+define <2 x i32> @fcvtzu_v2i32_from_f16_scalar_to_vector_simd_strict(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_v2i32_from_f16_scalar_to_vector_simd_strict:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzu w8, h0
+; CHECK-NOFPRCVT-NEXT: fmov s0, w8
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtzu_v2i32_from_f16_scalar_to_vector_simd_strict:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu s0, h0
+; CHECK-NEXT: ret
+ %fcvtzu_scalar = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %a, metadata !"fpexcept.strict")
+ %fcvtzu_vector = insertelement <2 x i32> poison, i32 %fcvtzu_scalar, i32 0
+ ret <2 x i32> %fcvtzu_vector
+}
+
+define <2 x i32> @fcvtzu_v2i32_from_f32_scalar_to_vector_simd_strict(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_v2i32_from_f32_scalar_to_vector_simd_strict:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzu s0, s0
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtzu_v2i32_from_f32_scalar_to_vector_simd_strict:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu s0, s0
+; CHECK-NEXT: ret
+ %fcvtzu_scalar = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %a, metadata !"fpexcept.strict")
+ %fcvtzu_vector = insertelement <2 x i32> poison, i32 %fcvtzu_scalar, i32 0
+ ret <2 x i32> %fcvtzu_vector
+}
+
+define <2 x i32> @fcvtzu_v2i32_from_f64_scalar_to_vector_simd_strict(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_v2i32_from_f64_scalar_to_vector_simd_strict:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzu w8, d0
+; CHECK-NOFPRCVT-NEXT: fmov s0, w8
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtzu_v2i32_from_f64_scalar_to_vector_simd_strict:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu s0, d0
+; CHECK-NEXT: ret
+ %fcvtzu_scalar = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %a, metadata !"fpexcept.strict")
+ %fcvtzu_vector = insertelement <2 x i32> poison, i32 %fcvtzu_scalar, i32 0
+ ret <2 x i32> %fcvtzu_vector
+}
+
+define <4 x i32> @fcvtzu_v4i32_from_f16_scalar_to_vector_simd_strict(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_v4i32_from_f16_scalar_to_vector_simd_strict:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzu w8, h0
+; CHECK-NOFPRCVT-NEXT: fmov s0, w8
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtzu_v4i32_from_f16_scalar_to_vector_simd_strict:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu s0, h0
+; CHECK-NEXT: ret
+ %fcvtzu_scalar = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %a, metadata !"fpexcept.strict")
+ %fcvtzu_vector = insertelement <4 x i32> poison, i32 %fcvtzu_scalar, i32 0
+ ret <4 x i32> %fcvtzu_vector
+}
+
+define <4 x i32> @fcvtzu_v4i32_from_f32_scalar_to_vector_simd_strict(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_v4i32_from_f32_scalar_to_vector_simd_strict:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzu s0, s0
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtzu_v4i32_from_f32_scalar_to_vector_simd_strict:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu s0, s0
+; CHECK-NEXT: ret
+ %fcvtzu_scalar = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %a, metadata !"fpexcept.strict")
+ %fcvtzu_vector = insertelement <4 x i32> poison, i32 %fcvtzu_scalar, i32 0
+ ret <4 x i32> %fcvtzu_vector
+}
+
+define <4 x i32> @fcvtzu_v4i32_from_f64_scalar_to_vector_simd_strict(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_v4i32_from_f64_scalar_to_vector_simd_strict:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzu w8, d0
+; CHECK-NOFPRCVT-NEXT: fmov s0, w8
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtzu_v4i32_from_f64_scalar_to_vector_simd_strict:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu s0, d0
+; CHECK-NEXT: ret
+ %fcvtzu_scalar = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %a, metadata !"fpexcept.strict")
+ %fcvtzu_vector = insertelement <4 x i32> poison, i32 %fcvtzu_scalar, i32 0
+ ret <4 x i32> %fcvtzu_vector
+}
+
+define <1 x i64> @fcvtzu_v1i64_from_f16_scalar_to_vector_simd_strict(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_v1i64_from_f16_scalar_to_vector_simd_strict:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzu x8, h0
+; CHECK-NOFPRCVT-NEXT: fmov d0, x8
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtzu_v1i64_from_f16_scalar_to_vector_simd_strict:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu d0, h0
+; CHECK-NEXT: ret
+ %fcvtzu_scalar = call i64 @llvm.experimental.constrained.fptoui.i64.f16(half %a, metadata !"fpexcept.strict")
+ %fcvtzu_vector = insertelement <1 x i64> poison, i64 %fcvtzu_scalar, i32 0
+ ret <1 x i64> %fcvtzu_vector
+}
+
+define <1 x i64> @fcvtzu_v1i64_from_f32_scalar_to_vector_simd_strict(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_v1i64_from_f32_scalar_to_vector_simd_strict:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzu x8, s0
+; CHECK-NOFPRCVT-NEXT: fmov d0, x8
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtzu_v1i64_from_f32_scalar_to_vector_simd_strict:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu d0, s0
+; CHECK-NEXT: ret
+ %fcvtzu_scalar = call i64 @llvm.experimental.constrained.fptoui.i64.f32(float %a, metadata !"fpexcept.strict")
+ %fcvtzu_vector = insertelement <1 x i64> poison, i64 %fcvtzu_scalar, i32 0
+ ret <1 x i64> %fcvtzu_vector
+}
+
+define <1 x i64> @fcvtzu_v1i64_from_f64_scalar_to_vector_simd_strict(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_v1i64_from_f64_scalar_to_vector_simd_strict:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzu d0, d0
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtzu_v1i64_from_f64_scalar_to_vector_simd_strict:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu d0, d0
+; CHECK-NEXT: ret
+ %fcvtzu_scalar = call i64 @llvm.experimental.constrained.fptoui.i64.f64(double %a, metadata !"fpexcept.strict")
+ %fcvtzu_vector = insertelement <1 x i64> poison, i64 %fcvtzu_scalar, i32 0
+ ret <1 x i64> %fcvtzu_vector
+}
+
+define <2 x i64> @fcvtzu_v2i64_from_f16_scalar_to_vector_simd_strict(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_v2i64_from_f16_scalar_to_vector_simd_strict:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzu x8, h0
+; CHECK-NOFPRCVT-NEXT: fmov d0, x8
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtzu_v2i64_from_f16_scalar_to_vector_simd_strict:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu d0, h0
+; CHECK-NEXT: ret
+ %fcvtzu_scalar = call i64 @llvm.experimental.constrained.fptoui.i64.f16(half %a, metadata !"fpexcept.strict")
+ %fcvtzu_vector = insertelement <2 x i64> poison, i64 %fcvtzu_scalar, i32 0
+ ret <2 x i64> %fcvtzu_vector
+}
+
+define <2 x i64> @fcvtzu_v2i64_from_f32_scalar_to_vector_simd_strict(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_v2i64_from_f32_scalar_to_vector_simd_strict:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzu x8, s0
+; CHECK-NOFPRCVT-NEXT: fmov d0, x8
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtzu_v2i64_from_f32_scalar_to_vector_simd_strict:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu d0, s0
+; CHECK-NEXT: ret
+ %fcvtzu_scalar = call i64 @llvm.experimental.constrained.fptoui.i64.f32(float %a, metadata !"fpexcept.strict")
+ %fcvtzu_vector = insertelement <2 x i64> poison, i64 %fcvtzu_scalar, i32 0
+ ret <2 x i64> %fcvtzu_vector
+}
+
+define <2 x i64> @fcvtzu_v2i64_from_f64_scalar_to_vector_simd_strict(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_v2i64_from_f64_scalar_to_vector_simd_strict:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzu d0, d0
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtzu_v2i64_from_f64_scalar_to_vector_simd_strict:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu d0, d0
+; CHECK-NEXT: ret
+ %fcvtzu_scalar = call i64 @llvm.experimental.constrained.fptoui.i64.f64(double %a, metadata !"fpexcept.strict")
+ %fcvtzu_vector = insertelement <2 x i64> poison, i64 %fcvtzu_scalar, i32 0
+ ret <2 x i64> %fcvtzu_vector
+}
diff --git a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll
index ebaca00d2cdb9..52c35ce872b61 100644
--- a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll
@@ -3,23 +3,6 @@
; RUN: llc < %s -mtriple aarch64-unknown-unknown -mattr=+fprcvt,+fullfp16 | FileCheck %s --check-prefixes=CHECK
; RUN: llc < %s -mtriple aarch64-unknown-unknown -global-isel -global-isel-abort=2 -mattr=+fprcvt,+fullfp16 2>&1 | FileCheck %s --check-prefixes=CHECK
-; CHECK-GI: warning: Instruction selection used fallback path for fptosi_i32_f16_simd
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i64_f16_simd
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i64_f32_simd
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i32_f64_simd
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i64_f64_simd
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i32_f32_simd
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i32_f16_simd
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i64_f16_simd
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i64_f32_simd
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i32_f64_simd
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i64_f64_simd
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i32_f32_simd
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcvtzs_scalar_to_vector_h_strict
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcvtzs_scalar_to_vector_s_strict
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcvtzu_scalar_to_vector_h_strict
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcvtzu_scalar_to_vector_s_strict
-
;
; FPTOI
;
@@ -215,200 +198,6 @@ define float @test_fptoui_f32_i32_simd(float %a) {
}
-;
-; FPTOI strictfp
-;
-
-define float @fptosi_i32_f16_simd(half %x) {
-; CHECK-NOFPRCVT-LABEL: fptosi_i32_f16_simd:
-; CHECK-NOFPRCVT: // %bb.0:
-; CHECK-NOFPRCVT-NEXT: fcvtzs w8, h0
-; CHECK-NOFPRCVT-NEXT: fmov s0, w8
-; CHECK-NOFPRCVT-NEXT: ret
-;
-; CHECK-LABEL: fptosi_i32_f16_simd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzs s0, h0
-; CHECK-NEXT: ret
- %val = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %x, metadata !"fpexcept.strict")
- %sum = bitcast i32 %val to float
- ret float %sum
-}
-
-define double @fptosi_i64_f16_simd(half %x) {
-; CHECK-NOFPRCVT-LABEL: fptosi_i64_f16_simd:
-; CHECK-NOFPRCVT: // %bb.0:
-; CHECK-NOFPRCVT-NEXT: fcvtzs x8, h0
-; CHECK-NOFPRCVT-NEXT: fmov d0, x8
-; CHECK-NOFPRCVT-NEXT: ret
-;
-; CHECK-LABEL: fptosi_i64_f16_simd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzs d0, h0
-; CHECK-NEXT: ret
- %val = call i64 @llvm.experimental.constrained.fptosi.i64.f16(half %x, metadata !"fpexcept.strict")
- %sum = bitcast i64 %val to double
- ret double %sum
-}
-
-define double @fptosi_i64_f32_simd(float %x) {
-; CHECK-NOFPRCVT-LABEL: fptosi_i64_f32_simd:
-; CHECK-NOFPRCVT: // %bb.0:
-; CHECK-NOFPRCVT-NEXT: fcvtzs x8, s0
-; CHECK-NOFPRCVT-NEXT: fmov d0, x8
-; CHECK-NOFPRCVT-NEXT: ret
-;
-; CHECK-LABEL: fptosi_i64_f32_simd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzs d0, s0
-; CHECK-NEXT: ret
- %val = call i64 @llvm.experimental.constrained.fptosi.i64.f32(float %x, metadata !"fpexcept.strict")
- %bc = bitcast i64 %val to double
- ret double %bc
-}
-
-define float @fptosi_i32_f64_simd(double %x) {
-; CHECK-NOFPRCVT-LABEL: fptosi_i32_f64_simd:
-; CHECK-NOFPRCVT: // %bb.0:
-; CHECK-NOFPRCVT-NEXT: fcvtzs w8, d0
-; CHECK-NOFPRCVT-NEXT: fmov s0, w8
-; CHECK-NOFPRCVT-NEXT: ret
-;
-; CHECK-LABEL: fptosi_i32_f64_simd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzs s0, d0
-; CHECK-NEXT: ret
- %val = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %x, metadata !"fpexcept.strict")
- %bc = bitcast i32 %val to float
- ret float %bc
-}
-
-define double @fptosi_i64_f64_simd(double %x) {
-; CHECK-NOFPRCVT-LABEL: fptosi_i64_f64_simd:
-; CHECK-NOFPRCVT: // %bb.0:
-; CHECK-NOFPRCVT-NEXT: fcvtzs d0, d0
-; CHECK-NOFPRCVT-NEXT: ret
-;
-; CHECK-LABEL: fptosi_i64_f64_simd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzs d0, d0
-; CHECK-NEXT: ret
- %val = call i64 @llvm.experimental.constrained.fptosi.i64.f64(double %x, metadata !"fpexcept.strict")
- %bc = bitcast i64 %val to double
- ret double %bc
-}
-
-define float @fptosi_i32_f32_simd(float %x) {
-; CHECK-NOFPRCVT-LABEL: fptosi_i32_f32_simd:
-; CHECK-NOFPRCVT: // %bb.0:
-; CHECK-NOFPRCVT-NEXT: fcvtzs s0, s0
-; CHECK-NOFPRCVT-NEXT: ret
-;
-; CHECK-LABEL: fptosi_i32_f32_simd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzs s0, s0
-; CHECK-NEXT: ret
- %val = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %x, metadata !"fpexcept.strict")
- %bc = bitcast i32 %val to float
- ret float %bc
-}
-
-
-
-define float @fptoui_i32_f16_simd(half %x) {
-; CHECK-NOFPRCVT-LABEL: fptoui_i32_f16_simd:
-; CHECK-NOFPRCVT: // %bb.0:
-; CHECK-NOFPRCVT-NEXT: fcvtzu w8, h0
-; CHECK-NOFPRCVT-NEXT: fmov s0, w8
-; CHECK-NOFPRCVT-NEXT: ret
-;
-; CHECK-LABEL: fptoui_i32_f16_simd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzu s0, h0
-; CHECK-NEXT: ret
- %val = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %x, metadata !"fpexcept.strict")
- %sum = bitcast i32 %val to float
- ret float %sum
-}
-
-define double @fptoui_i64_f16_simd(half %x) {
-; CHECK-NOFPRCVT-LABEL: fptoui_i64_f16_simd:
-; CHECK-NOFPRCVT: // %bb.0:
-; CHECK-NOFPRCVT-NEXT: fcvtzu x8, h0
-; CHECK-NOFPRCVT-NEXT: fmov d0, x8
-; CHECK-NOFPRCVT-NEXT: ret
-;
-; CHECK-LABEL: fptoui_i64_f16_simd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzu d0, h0
-; CHECK-NEXT: ret
- %val = call i64 @llvm.experimental.constrained.fptoui.i64.f16(half %x, metadata !"fpexcept.strict")
- %sum = bitcast i64 %val to double
- ret double %sum
-}
-
-define double @fptoui_i64_f32_simd(float %x) {
-; CHECK-NOFPRCVT-LABEL: fptoui_i64_f32_simd:
-; CHECK-NOFPRCVT: // %bb.0:
-; CHECK-NOFPRCVT-NEXT: fcvtzu x8, s0
-; CHECK-NOFPRCVT-NEXT: fmov d0, x8
-; CHECK-NOFPRCVT-NEXT: ret
-;
-; CHECK-LABEL: fptoui_i64_f32_simd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzu d0, s0
-; CHECK-NEXT: ret
- %val = call i64 @llvm.experimental.constrained.fptoui.i64.f32(float %x, metadata !"fpexcept.strict")
- %bc = bitcast i64 %val to double
- ret double %bc
-}
-
-define float @fptoui_i32_f64_simd(double %x) {
-; CHECK-NOFPRCVT-LABEL: fptoui_i32_f64_simd:
-; CHECK-NOFPRCVT: // %bb.0:
-; CHECK-NOFPRCVT-NEXT: fcvtzu w8, d0
-; CHECK-NOFPRCVT-NEXT: fmov s0, w8
-; CHECK-NOFPRCVT-NEXT: ret
-;
-; CHECK-LABEL: fptoui_i32_f64_simd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzu s0, d0
-; CHECK-NEXT: ret
- %val = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %x, metadata !"fpexcept.strict")
- %bc = bitcast i32 %val to float
- ret float %bc
-}
-
-define double @fptoui_i64_f64_simd(double %x) {
-; CHECK-NOFPRCVT-LABEL: fptoui_i64_f64_simd:
-; CHECK-NOFPRCVT: // %bb.0:
-; CHECK-NOFPRCVT-NEXT: fcvtzu d0, d0
-; CHECK-NOFPRCVT-NEXT: ret
-;
-; CHECK-LABEL: fptoui_i64_f64_simd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzu d0, d0
-; CHECK-NEXT: ret
- %val = call i64 @llvm.experimental.constrained.fptoui.i64.f64(double %x, metadata !"fpexcept.strict")
- %bc = bitcast i64 %val to double
- ret double %bc
-}
-
-define float @fptoui_i32_f32_simd(float %x) {
-; CHECK-NOFPRCVT-LABEL: fptoui_i32_f32_simd:
-; CHECK-NOFPRCVT: // %bb.0:
-; CHECK-NOFPRCVT-NEXT: fcvtzu s0, s0
-; CHECK-NOFPRCVT-NEXT: ret
-;
-; CHECK-LABEL: fptoui_i32_f32_simd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzu s0, s0
-; CHECK-NEXT: ret
- %val = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %x, metadata !"fpexcept.strict")
- %bc = bitcast i32 %val to float
- ret float %bc
-}
-
;
; FPTOI rounding
;
@@ -1950,164 +1739,378 @@ define double @fcvtzu_dd_simd(double %a) {
; FPTOI scalar_to_vector
;
-define <1 x i64> @fcvtzs_scalar_to_vector_h(half %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzs_scalar_to_vector_h:
+define <2 x i32> @fcvtzs_v2i32_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_v2i32_from_f16_scalar_to_vector_simd:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzs w8, h0
+; CHECK-NOFPRCVT-NEXT: fmov s0, w8
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtzs_v2i32_from_f16_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs s0, h0
+; CHECK-NEXT: ret
+ %fcvtzs_scalar = fptosi half %a to i32
+ %fcvtzs_vector = insertelement <2 x i32> poison, i32 %fcvtzs_scalar, i32 0
+ ret <2 x i32> %fcvtzs_vector
+}
+
+define <2 x i32> @fcvtzs_v2i32_from_f32_scalar_to_vector_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_v2i32_from_f32_scalar_to_vector_simd:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzs s0, s0
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtzs_v2i32_from_f32_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs s0, s0
+; CHECK-NEXT: ret
+ %fcvtzs_scalar = fptosi float %a to i32
+ %fcvtzs_vector = insertelement <2 x i32> poison, i32 %fcvtzs_scalar, i32 0
+ ret <2 x i32> %fcvtzs_vector
+}
+
+define <2 x i32> @fcvtzs_v2i32_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_v2i32_from_f64_scalar_to_vector_simd:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzs w8, d0
+; CHECK-NOFPRCVT-NEXT: fmov s0, w8
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtzs_v2i32_from_f64_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs s0, d0
+; CHECK-NEXT: ret
+ %fcvtzs_scalar = fptosi double %a to i32
+ %fcvtzs_vector = insertelement <2 x i32> poison, i32 %fcvtzs_scalar, i32 0
+ ret <2 x i32> %fcvtzs_vector
+}
+
+define <4 x i32> @fcvtzs_v4i32_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_v4i32_from_f16_scalar_to_vector_simd:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzs w8, h0
+; CHECK-NOFPRCVT-NEXT: fmov s0, w8
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtzs_v4i32_from_f16_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs s0, h0
+; CHECK-NEXT: ret
+ %fcvtzs_scalar = fptosi half %a to i32
+ %fcvtzs_vector = insertelement <4 x i32> poison, i32 %fcvtzs_scalar, i32 0
+ ret <4 x i32> %fcvtzs_vector
+}
+
+define <4 x i32> @fcvtzs_v4i32_from_f32_scalar_to_vector_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_v4i32_from_f32_scalar_to_vector_simd:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzs s0, s0
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtzs_v4i32_from_f32_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs s0, s0
+; CHECK-NEXT: ret
+ %fcvtzs_scalar = fptosi float %a to i32
+ %fcvtzs_vector = insertelement <4 x i32> poison, i32 %fcvtzs_scalar, i32 0
+ ret <4 x i32> %fcvtzs_vector
+}
+
+define <4 x i32> @fcvtzs_v4i32_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_v4i32_from_f64_scalar_to_vector_simd:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzs w8, d0
+; CHECK-NOFPRCVT-NEXT: fmov s0, w8
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtzs_v4i32_from_f64_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs s0, d0
+; CHECK-NEXT: ret
+ %fcvtzs_scalar = fptosi double %a to i32
+ %fcvtzs_vector = insertelement <4 x i32> poison, i32 %fcvtzs_scalar, i32 0
+ ret <4 x i32> %fcvtzs_vector
+}
+
+define <1 x i64> @fcvtzs_v1i64_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_v1i64_from_f16_scalar_to_vector_simd:
; CHECK-NOFPRCVT: // %bb.0:
; CHECK-NOFPRCVT-NEXT: fcvtzs x8, h0
; CHECK-NOFPRCVT-NEXT: fmov d0, x8
; CHECK-NOFPRCVT-NEXT: ret
;
-; CHECK-LABEL: fcvtzs_scalar_to_vector_h:
+; CHECK-LABEL: fcvtzs_v1i64_from_f16_scalar_to_vector_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtzs d0, h0
; CHECK-NEXT: ret
- %val = fptosi half %a to i64
- %vec = insertelement <1 x i64> poison, i64 %val, i32 0
- ret <1 x i64> %vec
+ %fcvtzs_scalar = fptosi half %a to i64
+ %fcvtzs_vector = insertelement <1 x i64> poison, i64 %fcvtzs_scalar, i32 0
+ ret <1 x i64> %fcvtzs_vector
}
-define <1 x i64> @fcvtzs_scalar_to_vector_s(float %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzs_scalar_to_vector_s:
+define <1 x i64> @fcvtzs_v1i64_from_f32_scalar_to_vector_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_v1i64_from_f32_scalar_to_vector_simd:
; CHECK-NOFPRCVT: // %bb.0:
; CHECK-NOFPRCVT-NEXT: fcvtzs x8, s0
; CHECK-NOFPRCVT-NEXT: fmov d0, x8
; CHECK-NOFPRCVT-NEXT: ret
;
-; CHECK-LABEL: fcvtzs_scalar_to_vector_s:
+; CHECK-LABEL: fcvtzs_v1i64_from_f32_scalar_to_vector_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtzs d0, s0
; CHECK-NEXT: ret
- %val = fptosi float %a to i64
- %vec = insertelement <1 x i64> poison, i64 %val, i32 0
- ret <1 x i64> %vec
+ %fcvtzs_scalar = fptosi float %a to i64
+ %fcvtzs_vector = insertelement <1 x i64> poison, i64 %fcvtzs_scalar, i32 0
+ ret <1 x i64> %fcvtzs_vector
}
-define <1 x i64> @fcvtzs_scalar_to_vector_d(double %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzs_scalar_to_vector_d:
+define <1 x i64> @fcvtzs_v1i64_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_v1i64_from_f64_scalar_to_vector_simd:
; CHECK-NOFPRCVT: // %bb.0:
; CHECK-NOFPRCVT-NEXT: fcvtzs d0, d0
; CHECK-NOFPRCVT-NEXT: ret
;
-; CHECK-LABEL: fcvtzs_scalar_to_vector_d:
+; CHECK-LABEL: fcvtzs_v1i64_from_f64_scalar_to_vector_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtzs d0, d0
; CHECK-NEXT: ret
- %val = fptosi double %a to i64
- %vec = insertelement <1 x i64> poison, i64 %val, i32 0
- ret <1 x i64> %vec
+ %fcvtzs_scalar = fptosi double %a to i64
+ %fcvtzs_vector = insertelement <1 x i64> poison, i64 %fcvtzs_scalar, i32 0
+ ret <1 x i64> %fcvtzs_vector
}
-define <1 x i64> @fcvtzu_scalar_to_vector_h(half %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzu_scalar_to_vector_h:
+define <2 x i64> @fcvtzs_v2i64_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_v2i64_from_f16_scalar_to_vector_simd:
; CHECK-NOFPRCVT: // %bb.0:
-; CHECK-NOFPRCVT-NEXT: fcvtzu x8, h0
+; CHECK-NOFPRCVT-NEXT: fcvtzs x8, h0
; CHECK-NOFPRCVT-NEXT: fmov d0, x8
; CHECK-NOFPRCVT-NEXT: ret
;
-; CHECK-LABEL: fcvtzu_scalar_to_vector_h:
+; CHECK-LABEL: fcvtzs_v2i64_from_f16_scalar_to_vector_simd:
; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzu d0, h0
+; CHECK-NEXT: fcvtzs d0, h0
; CHECK-NEXT: ret
- %val = fptoui half %a to i64
- %vec = insertelement <1 x i64> poison, i64 %val, i32 0
- ret <1 x i64> %vec
+ %fcvtzs_scalar = fptosi half %a to i64
+ %fcvtzs_vector = insertelement <2 x i64> poison, i64 %fcvtzs_scalar, i32 0
+ ret <2 x i64> %fcvtzs_vector
}
-define <1 x i64> @fcvtzu_scalar_to_vector_s(float %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzu_scalar_to_vector_s:
+define <2 x i64> @fcvtzs_v2i64_from_f32_scalar_to_vector_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_v2i64_from_f32_scalar_to_vector_simd:
; CHECK-NOFPRCVT: // %bb.0:
-; CHECK-NOFPRCVT-NEXT: fcvtzu x8, s0
+; CHECK-NOFPRCVT-NEXT: fcvtzs x8, s0
; CHECK-NOFPRCVT-NEXT: fmov d0, x8
; CHECK-NOFPRCVT-NEXT: ret
;
-; CHECK-LABEL: fcvtzu_scalar_to_vector_s:
+; CHECK-LABEL: fcvtzs_v2i64_from_f32_scalar_to_vector_simd:
; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzu d0, s0
+; CHECK-NEXT: fcvtzs d0, s0
; CHECK-NEXT: ret
- %val = fptoui float %a to i64
- %vec = insertelement <1 x i64> poison, i64 %val, i32 0
- ret <1 x i64> %vec
+ %fcvtzs_scalar = fptosi float %a to i64
+ %fcvtzs_vector = insertelement <2 x i64> poison, i64 %fcvtzs_scalar, i32 0
+ ret <2 x i64> %fcvtzs_vector
}
-define <1 x i64> @fcvtzu_scalar_to_vector_d(double %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzu_scalar_to_vector_d:
+define <2 x i64> @fcvtzs_v2i64_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_v2i64_from_f64_scalar_to_vector_simd:
; CHECK-NOFPRCVT: // %bb.0:
-; CHECK-NOFPRCVT-NEXT: fcvtzu d0, d0
+; CHECK-NOFPRCVT-NEXT: fcvtzs d0, d0
; CHECK-NOFPRCVT-NEXT: ret
;
-; CHECK-LABEL: fcvtzu_scalar_to_vector_d:
+; CHECK-LABEL: fcvtzs_v2i64_from_f64_scalar_to_vector_simd:
; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzu d0, d0
+; CHECK-NEXT: fcvtzs d0, d0
+; CHECK-NEXT: ret
+ %fcvtzs_scalar = fptosi double %a to i64
+ %fcvtzs_vector = insertelement <2 x i64> poison, i64 %fcvtzs_scalar, i32 0
+ ret <2 x i64> %fcvtzs_vector
+}
+
+define <2 x i32> @fcvtzu_v2i32_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_v2i32_from_f16_scalar_to_vector_simd:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzu w8, h0
+; CHECK-NOFPRCVT-NEXT: fmov s0, w8
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtzu_v2i32_from_f16_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu s0, h0
+; CHECK-NEXT: ret
+ %fcvtzu_scalar = fptoui half %a to i32
+ %fcvtzu_vector = insertelement <2 x i32> poison, i32 %fcvtzu_scalar, i32 0
+ ret <2 x i32> %fcvtzu_vector
+}
+
+define <2 x i32> @fcvtzu_v2i32_from_f32_scalar_to_vector_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_v2i32_from_f32_scalar_to_vector_simd:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzu s0, s0
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtzu_v2i32_from_f32_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu s0, s0
; CHECK-NEXT: ret
- %val = fptoui double %a to i64
- %vec = insertelement <1 x i64> poison, i64 %val, i32 0
- ret <1 x i64> %vec
+ %fcvtzu_scalar = fptoui float %a to i32
+ %fcvtzu_vector = insertelement <2 x i32> poison, i32 %fcvtzu_scalar, i32 0
+ ret <2 x i32> %fcvtzu_vector
}
+define <2 x i32> @fcvtzu_v2i32_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_v2i32_from_f64_scalar_to_vector_simd:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzu w8, d0
+; CHECK-NOFPRCVT-NEXT: fmov s0, w8
+; CHECK-NOFPRCVT-NEXT: ret
;
-; FPTOI scalar_to_vector strictfp
+; CHECK-LABEL: fcvtzu_v2i32_from_f64_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu s0, d0
+; CHECK-NEXT: ret
+ %fcvtzu_scalar = fptoui double %a to i32
+ %fcvtzu_vector = insertelement <2 x i32> poison, i32 %fcvtzu_scalar, i32 0
+ ret <2 x i32> %fcvtzu_vector
+}
+
+define <4 x i32> @fcvtzu_v4i32_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_v4i32_from_f16_scalar_to_vector_simd:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzu w8, h0
+; CHECK-NOFPRCVT-NEXT: fmov s0, w8
+; CHECK-NOFPRCVT-NEXT: ret
;
+; CHECK-LABEL: fcvtzu_v4i32_from_f16_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu s0, h0
+; CHECK-NEXT: ret
+ %fcvtzu_scalar = fptoui half %a to i32
+ %fcvtzu_vector = insertelement <4 x i32> poison, i32 %fcvtzu_scalar, i32 0
+ ret <4 x i32> %fcvtzu_vector
+}
-define <1 x i64> @fcvtzs_scalar_to_vector_h_strict(half %x) {
-; CHECK-NOFPRCVT-LABEL: fcvtzs_scalar_to_vector_h_strict:
+define <4 x i32> @fcvtzu_v4i32_from_f32_scalar_to_vector_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_v4i32_from_f32_scalar_to_vector_simd:
; CHECK-NOFPRCVT: // %bb.0:
-; CHECK-NOFPRCVT-NEXT: fcvtzs x8, h0
+; CHECK-NOFPRCVT-NEXT: fcvtzu s0, s0
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtzu_v4i32_from_f32_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu s0, s0
+; CHECK-NEXT: ret
+ %fcvtzu_scalar = fptoui float %a to i32
+ %fcvtzu_vector = insertelement <4 x i32> poison, i32 %fcvtzu_scalar, i32 0
+ ret <4 x i32> %fcvtzu_vector
+}
+
+define <4 x i32> @fcvtzu_v4i32_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_v4i32_from_f64_scalar_to_vector_simd:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzu w8, d0
+; CHECK-NOFPRCVT-NEXT: fmov s0, w8
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtzu_v4i32_from_f64_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu s0, d0
+; CHECK-NEXT: ret
+ %fcvtzu_scalar = fptoui double %a to i32
+ %fcvtzu_vector = insertelement <4 x i32> poison, i32 %fcvtzu_scalar, i32 0
+ ret <4 x i32> %fcvtzu_vector
+}
+
+define <1 x i64> @fcvtzu_v1i64_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_v1i64_from_f16_scalar_to_vector_simd:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzu x8, h0
; CHECK-NOFPRCVT-NEXT: fmov d0, x8
; CHECK-NOFPRCVT-NEXT: ret
;
-; CHECK-LABEL: fcvtzs_scalar_to_vector_h_strict:
+; CHECK-LABEL: fcvtzu_v1i64_from_f16_scalar_to_vector_simd:
; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzs d0, h0
+; CHECK-NEXT: fcvtzu d0, h0
; CHECK-NEXT: ret
- %val = call i64 @llvm.experimental.constrained.fptosi.i64.f16(half %x, metadata !"fpexcept.strict")
- %vec = insertelement <1 x i64> poison, i64 %val, i32 0
- ret <1 x i64> %vec
+ %fcvtzu_scalar = fptoui half %a to i64
+ %fcvtzu_vector = insertelement <1 x i64> poison, i64 %fcvtzu_scalar, i32 0
+ ret <1 x i64> %fcvtzu_vector
}
-define <1 x i64> @fcvtzs_scalar_to_vector_s_strict(float %x) {
-; CHECK-NOFPRCVT-LABEL: fcvtzs_scalar_to_vector_s_strict:
+define <1 x i64> @fcvtzu_v1i64_from_f32_scalar_to_vector_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_v1i64_from_f32_scalar_to_vector_simd:
; CHECK-NOFPRCVT: // %bb.0:
-; CHECK-NOFPRCVT-NEXT: fcvtzs x8, s0
+; CHECK-NOFPRCVT-NEXT: fcvtzu x8, s0
; CHECK-NOFPRCVT-NEXT: fmov d0, x8
; CHECK-NOFPRCVT-NEXT: ret
;
-; CHECK-LABEL: fcvtzs_scalar_to_vector_s_strict:
+; CHECK-LABEL: fcvtzu_v1i64_from_f32_scalar_to_vector_simd:
; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzs d0, s0
+; CHECK-NEXT: fcvtzu d0, s0
; CHECK-NEXT: ret
- %val = call i64 @llvm.experimental.constrained.fptosi.i64.f32(float %x, metadata !"fpexcept.strict")
- %vec = insertelement <1 x i64> poison, i64 %val, i32 0
- ret <1 x i64> %vec
+ %fcvtzu_scalar = fptoui float %a to i64
+ %fcvtzu_vector = insertelement <1 x i64> poison, i64 %fcvtzu_scalar, i32 0
+ ret <1 x i64> %fcvtzu_vector
}
-define <1 x i64> @fcvtzu_scalar_to_vector_h_strict(half %x) {
-; CHECK-NOFPRCVT-LABEL: fcvtzu_scalar_to_vector_h_strict:
+define <1 x i64> @fcvtzu_v1i64_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_v1i64_from_f64_scalar_to_vector_simd:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzu d0, d0
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtzu_v1i64_from_f64_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu d0, d0
+; CHECK-NEXT: ret
+ %fcvtzu_scalar = fptoui double %a to i64
+ %fcvtzu_vector = insertelement <1 x i64> poison, i64 %fcvtzu_scalar, i32 0
+ ret <1 x i64> %fcvtzu_vector
+}
+
+define <2 x i64> @fcvtzu_v2i64_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_v2i64_from_f16_scalar_to_vector_simd:
; CHECK-NOFPRCVT: // %bb.0:
; CHECK-NOFPRCVT-NEXT: fcvtzu x8, h0
; CHECK-NOFPRCVT-NEXT: fmov d0, x8
; CHECK-NOFPRCVT-NEXT: ret
;
-; CHECK-LABEL: fcvtzu_scalar_to_vector_h_strict:
+; CHECK-LABEL: fcvtzu_v2i64_from_f16_scalar_to_vector_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtzu d0, h0
; CHECK-NEXT: ret
- %val = call i64 @llvm.experimental.constrained.fptoui.i64.f16(half %x, metadata !"fpexcept.strict")
- %vec = insertelement <1 x i64> poison, i64 %val, i32 0
- ret <1 x i64> %vec
+ %fcvtzu_scalar = fptoui half %a to i64
+ %fcvtzu_vector = insertelement <2 x i64> poison, i64 %fcvtzu_scalar, i32 0
+ ret <2 x i64> %fcvtzu_vector
}
-define <1 x i64> @fcvtzu_scalar_to_vector_s_strict(float %x) {
-; CHECK-NOFPRCVT-LABEL: fcvtzu_scalar_to_vector_s_strict:
+define <2 x i64> @fcvtzu_v2i64_from_f32_scalar_to_vector_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_v2i64_from_f32_scalar_to_vector_simd:
; CHECK-NOFPRCVT: // %bb.0:
; CHECK-NOFPRCVT-NEXT: fcvtzu x8, s0
; CHECK-NOFPRCVT-NEXT: fmov d0, x8
; CHECK-NOFPRCVT-NEXT: ret
;
-; CHECK-LABEL: fcvtzu_scalar_to_vector_s_strict:
+; CHECK-LABEL: fcvtzu_v2i64_from_f32_scalar_to_vector_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtzu d0, s0
; CHECK-NEXT: ret
- %val = call i64 @llvm.experimental.constrained.fptoui.i64.f32(float %x, metadata !"fpexcept.strict")
- %vec = insertelement <1 x i64> poison, i64 %val, i32 0
- ret <1 x i64> %vec
+ %fcvtzu_scalar = fptoui float %a to i64
+ %fcvtzu_vector = insertelement <2 x i64> poison, i64 %fcvtzu_scalar, i32 0
+ ret <2 x i64> %fcvtzu_vector
+}
+
+define <2 x i64> @fcvtzu_v2i64_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_v2i64_from_f64_scalar_to_vector_simd:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtzu d0, d0
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtzu_v2i64_from_f64_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu d0, d0
+; CHECK-NEXT: ret
+ %fcvtzu_scalar = fptoui double %a to i64
+ %fcvtzu_vector = insertelement <2 x i64> poison, i64 %fcvtzu_scalar, i32 0
+ ret <2 x i64> %fcvtzu_vector
}
diff --git a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll
index 68c24f2a30709..55af566b9f4c1 100644
--- a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll
@@ -612,330 +612,1202 @@ define float @fcvtzu_1s1s_simd(float %a) {
; Intriniscs (scalar_to_vector)
;
-define <1 x i64> @fcvtas_1d1s_scalar_to_vector_simd(float %A) nounwind {
-; CHECK-LABEL: fcvtas_1d1s_scalar_to_vector_simd:
+define <2 x i32> @fcvtas_v2i32_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtas_v2i32_from_f16_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtas s0, h0
+; CHECK-NEXT: ret
+ %fcvtas_scalar = tail call i32 @llvm.aarch64.neon.fcvtas.i32.f16(half %a)
+ %fcvtas_vector = insertelement <2 x i32> poison, i32 %fcvtas_scalar, i32 0
+ ret <2 x i32> %fcvtas_vector
+}
+
+define <2 x i32> @fcvtas_v2i32_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtas_v2i32_from_f32_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtas s0, s0
+; CHECK-NEXT: ret
+ %fcvtas_scalar = call i32 @llvm.aarch64.neon.fcvtas.i32.f32(float %A)
+ %fcvtas_vector = insertelement <2 x i32> poison, i32 %fcvtas_scalar, i32 0
+ ret <2 x i32> %fcvtas_vector
+}
+
+define <2 x i32> @fcvtas_v2i32_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtas_v2i32_from_f64_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtas s0, d0
+; CHECK-NEXT: ret
+ %fcvtas_scalar = tail call i32 @llvm.aarch64.neon.fcvtas.i32.f64(double %a)
+ %fcvtas_vector = insertelement <2 x i32> poison, i32 %fcvtas_scalar, i32 0
+ ret <2 x i32> %fcvtas_vector
+}
+
+define <4 x i32> @fcvtas_v4i32_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtas_v4i32_from_f16_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtas s0, h0
+; CHECK-NEXT: ret
+ %fcvtas_scalar = tail call i32 @llvm.aarch64.neon.fcvtas.i32.f16(half %a)
+ %fcvtas_vector = insertelement <4 x i32> poison, i32 %fcvtas_scalar, i32 0
+ ret <4 x i32> %fcvtas_vector
+}
+
+define <4 x i32> @fcvtas_v4i32_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtas_v4i32_from_f32_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtas s0, s0
+; CHECK-NEXT: ret
+ %fcvtas_scalar = call i32 @llvm.aarch64.neon.fcvtas.i32.f32(float %A)
+ %fcvtas_vector = insertelement <4 x i32> poison, i32 %fcvtas_scalar, i32 0
+ ret <4 x i32> %fcvtas_vector
+}
+
+define <4 x i32> @fcvtas_v4i32_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtas_v4i32_from_f64_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtas s0, d0
+; CHECK-NEXT: ret
+ %fcvtas_scalar = tail call i32 @llvm.aarch64.neon.fcvtas.i32.f64(double %a)
+ %fcvtas_vector = insertelement <4 x i32> poison, i32 %fcvtas_scalar, i32 0
+ ret <4 x i32> %fcvtas_vector
+}
+
+define <1 x i64> @fcvtas_v1i64_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtas_v1i64_from_f16_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtas d0, h0
+; CHECK-NEXT: ret
+ %fcvtas_scalar = tail call i64 @llvm.aarch64.neon.fcvtas.i64.f16(half %a)
+ %fcvtas_vector = insertelement <1 x i64> poison, i64 %fcvtas_scalar, i32 0
+ ret <1 x i64> %fcvtas_vector
+}
+
+define <1 x i64> @fcvtas_v1i64_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtas_v1i64_from_f32_scalar_to_vector_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtas d0, s0
; CHECK-NEXT: ret
- %i = call i64 @llvm.aarch64.neon.fcvtas.i64.f32(float %A)
- %vec = insertelement <1 x i64> poison, i64 %i, i32 0
- ret <1 x i64> %vec
+ %fcvtas_scalar = call i64 @llvm.aarch64.neon.fcvtas.i64.f32(float %A)
+ %fcvtas_vector = insertelement <1 x i64> poison, i64 %fcvtas_scalar, i32 0
+ ret <1 x i64> %fcvtas_vector
}
+define <1 x i64> @fcvtas_v1i64_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtas_v1i64_from_f64_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtas d0, d0
+; CHECK-NEXT: ret
+ %fcvtas_scalar = tail call i64 @llvm.aarch64.neon.fcvtas.i64.f64(double %a)
+ %fcvtas_vector = insertelement <1 x i64> poison, i64 %fcvtas_scalar, i32 0
+ ret <1 x i64> %fcvtas_vector
+}
-define <1 x i64> @fcvtas_1d1h_scalar_to_vector_simd(half %a) {
-; CHECK-LABEL: fcvtas_1d1h_scalar_to_vector_simd:
+define <2 x i64> @fcvtas_v2i64_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtas_v2i64_from_f16_scalar_to_vector_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtas d0, h0
; CHECK-NEXT: ret
- %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtas.i64.f16(half %a)
- %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f16, i32 0
- ret <1 x i64> %vec
+ %fcvtas_scalar = tail call i64 @llvm.aarch64.neon.fcvtas.i64.f16(half %a)
+ %fcvtas_vector = insertelement <2 x i64> poison, i64 %fcvtas_scalar, i32 0
+ ret <2 x i64> %fcvtas_vector
+}
+
+define <2 x i64> @fcvtas_v2i64_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtas_v2i64_from_f32_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtas d0, s0
+; CHECK-NEXT: ret
+ %fcvtas_scalar = call i64 @llvm.aarch64.neon.fcvtas.i64.f32(float %A)
+ %fcvtas_vector = insertelement <2 x i64> poison, i64 %fcvtas_scalar, i32 0
+ ret <2 x i64> %fcvtas_vector
}
-define <1 x i64> @fcvtas_1d1d_scalar_to_vector_simd(double %a) {
-; CHECK-LABEL: fcvtas_1d1d_scalar_to_vector_simd:
+define <2 x i64> @fcvtas_v2i64_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtas_v2i64_from_f64_scalar_to_vector_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtas d0, d0
; CHECK-NEXT: ret
- %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtas.i64.f64(double %a)
- %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f64, i32 0
- ret <1 x i64> %vec
+ %fcvtas_scalar = tail call i64 @llvm.aarch64.neon.fcvtas.i64.f64(double %a)
+ %fcvtas_vector = insertelement <2 x i64> poison, i64 %fcvtas_scalar, i32 0
+ ret <2 x i64> %fcvtas_vector
}
+define <2 x i32> @fcvtau_v2i32_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtau_v2i32_from_f16_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtau s0, h0
+; CHECK-NEXT: ret
+ %fcvtau_scalar = tail call i32 @llvm.aarch64.neon.fcvtau.i32.f16(half %a)
+ %fcvtau_vector = insertelement <2 x i32> poison, i32 %fcvtau_scalar, i32 0
+ ret <2 x i32> %fcvtau_vector
+}
+define <2 x i32> @fcvtau_v2i32_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtau_v2i32_from_f32_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtau s0, s0
+; CHECK-NEXT: ret
+ %fcvtau_scalar = call i32 @llvm.aarch64.neon.fcvtau.i32.f32(float %A)
+ %fcvtau_vector = insertelement <2 x i32> poison, i32 %fcvtau_scalar, i32 0
+ ret <2 x i32> %fcvtau_vector
+}
-define <1 x i64> @fcvtau_1d1s_scalar_to_vector_simd(float %A) nounwind {
-; CHECK-LABEL: fcvtau_1d1s_scalar_to_vector_simd:
+define <2 x i32> @fcvtau_v2i32_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtau_v2i32_from_f64_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtau s0, d0
+; CHECK-NEXT: ret
+ %fcvtau_scalar = tail call i32 @llvm.aarch64.neon.fcvtau.i32.f64(double %a)
+ %fcvtau_vector = insertelement <2 x i32> poison, i32 %fcvtau_scalar, i32 0
+ ret <2 x i32> %fcvtau_vector
+}
+
+define <4 x i32> @fcvtau_v4i32_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtau_v4i32_from_f16_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtau s0, h0
+; CHECK-NEXT: ret
+ %fcvtau_scalar = tail call i32 @llvm.aarch64.neon.fcvtau.i32.f16(half %a)
+ %fcvtau_vector = insertelement <4 x i32> poison, i32 %fcvtau_scalar, i32 0
+ ret <4 x i32> %fcvtau_vector
+}
+
+define <4 x i32> @fcvtau_v4i32_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtau_v4i32_from_f32_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtau s0, s0
+; CHECK-NEXT: ret
+ %fcvtau_scalar = call i32 @llvm.aarch64.neon.fcvtau.i32.f32(float %A)
+ %fcvtau_vector = insertelement <4 x i32> poison, i32 %fcvtau_scalar, i32 0
+ ret <4 x i32> %fcvtau_vector
+}
+
+define <4 x i32> @fcvtau_v4i32_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtau_v4i32_from_f64_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtau s0, d0
+; CHECK-NEXT: ret
+ %fcvtau_scalar = tail call i32 @llvm.aarch64.neon.fcvtau.i32.f64(double %a)
+ %fcvtau_vector = insertelement <4 x i32> poison, i32 %fcvtau_scalar, i32 0
+ ret <4 x i32> %fcvtau_vector
+}
+
+define <1 x i64> @fcvtau_v1i64_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtau_v1i64_from_f16_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtau d0, h0
+; CHECK-NEXT: ret
+ %fcvtau_scalar = tail call i64 @llvm.aarch64.neon.fcvtau.i64.f16(half %a)
+ %fcvtau_vector = insertelement <1 x i64> poison, i64 %fcvtau_scalar, i32 0
+ ret <1 x i64> %fcvtau_vector
+}
+
+define <1 x i64> @fcvtau_v1i64_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtau_v1i64_from_f32_scalar_to_vector_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtau d0, s0
; CHECK-NEXT: ret
- %i = call i64 @llvm.aarch64.neon.fcvtau.i64.f32(float %A)
- %vec = insertelement <1 x i64> poison, i64 %i, i32 0
- ret <1 x i64> %vec
+ %fcvtau_scalar = call i64 @llvm.aarch64.neon.fcvtau.i64.f32(float %A)
+ %fcvtau_vector = insertelement <1 x i64> poison, i64 %fcvtau_scalar, i32 0
+ ret <1 x i64> %fcvtau_vector
}
+define <1 x i64> @fcvtau_v1i64_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtau_v1i64_from_f64_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtau d0, d0
+; CHECK-NEXT: ret
+ %fcvtau_scalar = tail call i64 @llvm.aarch64.neon.fcvtau.i64.f64(double %a)
+ %fcvtau_vector = insertelement <1 x i64> poison, i64 %fcvtau_scalar, i32 0
+ ret <1 x i64> %fcvtau_vector
+}
-define <1 x i64> @fcvtau_1d1h_scalar_to_vector_simd(half %a) {
-; CHECK-LABEL: fcvtau_1d1h_scalar_to_vector_simd:
+define <2 x i64> @fcvtau_v2i64_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtau_v2i64_from_f16_scalar_to_vector_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtau d0, h0
; CHECK-NEXT: ret
- %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtau.i64.f16(half %a)
- %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f16, i32 0
- ret <1 x i64> %vec
+ %fcvtau_scalar = tail call i64 @llvm.aarch64.neon.fcvtau.i64.f16(half %a)
+ %fcvtau_vector = insertelement <2 x i64> poison, i64 %fcvtau_scalar, i32 0
+ ret <2 x i64> %fcvtau_vector
}
-define <1 x i64> @fcvtau_1d1d_scalar_to_vector_simd(double %a) {
-; CHECK-LABEL: fcvtau_1d1d_scalar_to_vector_simd:
+define <2 x i64> @fcvtau_v2i64_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtau_v2i64_from_f32_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtau d0, s0
+; CHECK-NEXT: ret
+ %fcvtau_scalar = call i64 @llvm.aarch64.neon.fcvtau.i64.f32(float %A)
+ %fcvtau_vector = insertelement <2 x i64> poison, i64 %fcvtau_scalar, i32 0
+ ret <2 x i64> %fcvtau_vector
+}
+
+define <2 x i64> @fcvtau_v2i64_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtau_v2i64_from_f64_scalar_to_vector_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtau d0, d0
; CHECK-NEXT: ret
- %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtau.i64.f64(double %a)
- %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f64, i32 0
- ret <1 x i64> %vec
+ %fcvtau_scalar = tail call i64 @llvm.aarch64.neon.fcvtau.i64.f64(double %a)
+ %fcvtau_vector = insertelement <2 x i64> poison, i64 %fcvtau_scalar, i32 0
+ ret <2 x i64> %fcvtau_vector
+}
+
+define <2 x i32> @fcvtms_v2i32_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtms_v2i32_from_f16_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtms s0, h0
+; CHECK-NEXT: ret
+ %fcvtms_scalar = tail call i32 @llvm.aarch64.neon.fcvtms.i32.f16(half %a)
+ %fcvtms_vector = insertelement <2 x i32> poison, i32 %fcvtms_scalar, i32 0
+ ret <2 x i32> %fcvtms_vector
+}
+
+define <2 x i32> @fcvtms_v2i32_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtms_v2i32_from_f32_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtms s0, s0
+; CHECK-NEXT: ret
+ %fcvtms_scalar = call i32 @llvm.aarch64.neon.fcvtms.i32.f32(float %A)
+ %fcvtms_vector = insertelement <2 x i32> poison, i32 %fcvtms_scalar, i32 0
+ ret <2 x i32> %fcvtms_vector
+}
+
+define <2 x i32> @fcvtms_v2i32_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtms_v2i32_from_f64_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtms s0, d0
+; CHECK-NEXT: ret
+ %fcvtms_scalar = tail call i32 @llvm.aarch64.neon.fcvtms.i32.f64(double %a)
+ %fcvtms_vector = insertelement <2 x i32> poison, i32 %fcvtms_scalar, i32 0
+ ret <2 x i32> %fcvtms_vector
+}
+
+define <4 x i32> @fcvtms_v4i32_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtms_v4i32_from_f16_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtms s0, h0
+; CHECK-NEXT: ret
+ %fcvtms_scalar = tail call i32 @llvm.aarch64.neon.fcvtms.i32.f16(half %a)
+ %fcvtms_vector = insertelement <4 x i32> poison, i32 %fcvtms_scalar, i32 0
+ ret <4 x i32> %fcvtms_vector
+}
+
+define <4 x i32> @fcvtms_v4i32_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtms_v4i32_from_f32_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtms s0, s0
+; CHECK-NEXT: ret
+ %fcvtms_scalar = call i32 @llvm.aarch64.neon.fcvtms.i32.f32(float %A)
+ %fcvtms_vector = insertelement <4 x i32> poison, i32 %fcvtms_scalar, i32 0
+ ret <4 x i32> %fcvtms_vector
}
+define <4 x i32> @fcvtms_v4i32_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtms_v4i32_from_f64_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtms s0, d0
+; CHECK-NEXT: ret
+ %fcvtms_scalar = tail call i32 @llvm.aarch64.neon.fcvtms.i32.f64(double %a)
+ %fcvtms_vector = insertelement <4 x i32> poison, i32 %fcvtms_scalar, i32 0
+ ret <4 x i32> %fcvtms_vector
+}
+define <1 x i64> @fcvtms_v1i64_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtms_v1i64_from_f16_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtms d0, h0
+; CHECK-NEXT: ret
+ %fcvtms_scalar = tail call i64 @llvm.aarch64.neon.fcvtms.i64.f16(half %a)
+ %fcvtms_vector = insertelement <1 x i64> poison, i64 %fcvtms_scalar, i32 0
+ ret <1 x i64> %fcvtms_vector
+}
-define <1 x i64> @fcvtms_1d1s_scalar_to_vector_simd(float %A) nounwind {
-; CHECK-LABEL: fcvtms_1d1s_scalar_to_vector_simd:
+define <1 x i64> @fcvtms_v1i64_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtms_v1i64_from_f32_scalar_to_vector_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtms d0, s0
; CHECK-NEXT: ret
- %i = call i64 @llvm.aarch64.neon.fcvtms.i64.f32(float %A)
- %vec = insertelement <1 x i64> poison, i64 %i, i32 0
- ret <1 x i64> %vec
+ %fcvtms_scalar = call i64 @llvm.aarch64.neon.fcvtms.i64.f32(float %A)
+ %fcvtms_vector = insertelement <1 x i64> poison, i64 %fcvtms_scalar, i32 0
+ ret <1 x i64> %fcvtms_vector
}
+define <1 x i64> @fcvtms_v1i64_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtms_v1i64_from_f64_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtms d0, d0
+; CHECK-NEXT: ret
+ %fcvtms_scalar = tail call i64 @llvm.aarch64.neon.fcvtms.i64.f64(double %a)
+ %fcvtms_vector = insertelement <1 x i64> poison, i64 %fcvtms_scalar, i32 0
+ ret <1 x i64> %fcvtms_vector
+}
-define <1 x i64> @fcvtms_1d1h_scalar_to_vector_simd(half %a) {
-; CHECK-LABEL: fcvtms_1d1h_scalar_to_vector_simd:
+define <2 x i64> @fcvtms_v2i64_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtms_v2i64_from_f16_scalar_to_vector_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtms d0, h0
; CHECK-NEXT: ret
- %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtms.i64.f16(half %a)
- %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f16, i32 0
- ret <1 x i64> %vec
+ %fcvtms_scalar = tail call i64 @llvm.aarch64.neon.fcvtms.i64.f16(half %a)
+ %fcvtms_vector = insertelement <2 x i64> poison, i64 %fcvtms_scalar, i32 0
+ ret <2 x i64> %fcvtms_vector
+}
+
+define <2 x i64> @fcvtms_v2i64_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtms_v2i64_from_f32_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtms d0, s0
+; CHECK-NEXT: ret
+ %fcvtms_scalar = call i64 @llvm.aarch64.neon.fcvtms.i64.f32(float %A)
+ %fcvtms_vector = insertelement <2 x i64> poison, i64 %fcvtms_scalar, i32 0
+ ret <2 x i64> %fcvtms_vector
}
-define <1 x i64> @fcvtms_1d1d_scalar_to_vector_simd(double %a) {
-; CHECK-LABEL: fcvtms_1d1d_scalar_to_vector_simd:
+define <2 x i64> @fcvtms_v2i64_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtms_v2i64_from_f64_scalar_to_vector_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtms d0, d0
; CHECK-NEXT: ret
- %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtms.i64.f64(double %a)
- %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f64, i32 0
- ret <1 x i64> %vec
+ %fcvtms_scalar = tail call i64 @llvm.aarch64.neon.fcvtms.i64.f64(double %a)
+ %fcvtms_vector = insertelement <2 x i64> poison, i64 %fcvtms_scalar, i32 0
+ ret <2 x i64> %fcvtms_vector
}
+define <2 x i32> @fcvtmu_v2i32_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtmu_v2i32_from_f16_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtmu s0, h0
+; CHECK-NEXT: ret
+ %fcvtmu_scalar = tail call i32 @llvm.aarch64.neon.fcvtmu.i32.f16(half %a)
+ %fcvtmu_vector = insertelement <2 x i32> poison, i32 %fcvtmu_scalar, i32 0
+ ret <2 x i32> %fcvtmu_vector
+}
+define <2 x i32> @fcvtmu_v2i32_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtmu_v2i32_from_f32_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtmu s0, s0
+; CHECK-NEXT: ret
+ %fcvtmu_scalar = call i32 @llvm.aarch64.neon.fcvtmu.i32.f32(float %A)
+ %fcvtmu_vector = insertelement <2 x i32> poison, i32 %fcvtmu_scalar, i32 0
+ ret <2 x i32> %fcvtmu_vector
+}
+
+define <2 x i32> @fcvtmu_v2i32_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtmu_v2i32_from_f64_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtmu s0, d0
+; CHECK-NEXT: ret
+ %fcvtmu_scalar = tail call i32 @llvm.aarch64.neon.fcvtmu.i32.f64(double %a)
+ %fcvtmu_vector = insertelement <2 x i32> poison, i32 %fcvtmu_scalar, i32 0
+ ret <2 x i32> %fcvtmu_vector
+}
+
+define <4 x i32> @fcvtmu_v4i32_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtmu_v4i32_from_f16_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtmu s0, h0
+; CHECK-NEXT: ret
+ %fcvtmu_scalar = tail call i32 @llvm.aarch64.neon.fcvtmu.i32.f16(half %a)
+ %fcvtmu_vector = insertelement <4 x i32> poison, i32 %fcvtmu_scalar, i32 0
+ ret <4 x i32> %fcvtmu_vector
+}
+
+define <4 x i32> @fcvtmu_v4i32_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtmu_v4i32_from_f32_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtmu s0, s0
+; CHECK-NEXT: ret
+ %fcvtmu_scalar = call i32 @llvm.aarch64.neon.fcvtmu.i32.f32(float %A)
+ %fcvtmu_vector = insertelement <4 x i32> poison, i32 %fcvtmu_scalar, i32 0
+ ret <4 x i32> %fcvtmu_vector
+}
+
+define <4 x i32> @fcvtmu_v4i32_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtmu_v4i32_from_f64_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtmu s0, d0
+; CHECK-NEXT: ret
+ %fcvtmu_scalar = tail call i32 @llvm.aarch64.neon.fcvtmu.i32.f64(double %a)
+ %fcvtmu_vector = insertelement <4 x i32> poison, i32 %fcvtmu_scalar, i32 0
+ ret <4 x i32> %fcvtmu_vector
+}
+
+define <1 x i64> @fcvtmu_v1i64_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtmu_v1i64_from_f16_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtmu d0, h0
+; CHECK-NEXT: ret
+ %fcvtmu_scalar = tail call i64 @llvm.aarch64.neon.fcvtmu.i64.f16(half %a)
+ %fcvtmu_vector = insertelement <1 x i64> poison, i64 %fcvtmu_scalar, i32 0
+ ret <1 x i64> %fcvtmu_vector
+}
-define <1 x i64> @fcvtmu_1d1s_scalar_to_vector_simd(float %A) nounwind {
-; CHECK-LABEL: fcvtmu_1d1s_scalar_to_vector_simd:
+define <1 x i64> @fcvtmu_v1i64_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtmu_v1i64_from_f32_scalar_to_vector_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtmu d0, s0
; CHECK-NEXT: ret
- %i = call i64 @llvm.aarch64.neon.fcvtmu.i64.f32(float %A)
- %vec = insertelement <1 x i64> poison, i64 %i, i32 0
- ret <1 x i64> %vec
+ %fcvtmu_scalar = call i64 @llvm.aarch64.neon.fcvtmu.i64.f32(float %A)
+ %fcvtmu_vector = insertelement <1 x i64> poison, i64 %fcvtmu_scalar, i32 0
+ ret <1 x i64> %fcvtmu_vector
}
+define <1 x i64> @fcvtmu_v1i64_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtmu_v1i64_from_f64_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtmu d0, d0
+; CHECK-NEXT: ret
+ %fcvtmu_scalar = tail call i64 @llvm.aarch64.neon.fcvtmu.i64.f64(double %a)
+ %fcvtmu_vector = insertelement <1 x i64> poison, i64 %fcvtmu_scalar, i32 0
+ ret <1 x i64> %fcvtmu_vector
+}
-define <1 x i64> @fcvtmu_1d1h_scalar_to_vector_simd(half %a) {
-; CHECK-LABEL: fcvtmu_1d1h_scalar_to_vector_simd:
+define <2 x i64> @fcvtmu_v2i64_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtmu_v2i64_from_f16_scalar_to_vector_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtmu d0, h0
; CHECK-NEXT: ret
- %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtmu.i64.f16(half %a)
- %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f16, i32 0
- ret <1 x i64> %vec
+ %fcvtmu_scalar = tail call i64 @llvm.aarch64.neon.fcvtmu.i64.f16(half %a)
+ %fcvtmu_vector = insertelement <2 x i64> poison, i64 %fcvtmu_scalar, i32 0
+ ret <2 x i64> %fcvtmu_vector
+}
+
+define <2 x i64> @fcvtmu_v2i64_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtmu_v2i64_from_f32_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtmu d0, s0
+; CHECK-NEXT: ret
+ %fcvtmu_scalar = call i64 @llvm.aarch64.neon.fcvtmu.i64.f32(float %A)
+ %fcvtmu_vector = insertelement <2 x i64> poison, i64 %fcvtmu_scalar, i32 0
+ ret <2 x i64> %fcvtmu_vector
}
-define <1 x i64> @fcvtmu_1d1d_scalar_to_vector_simd(double %a) {
-; CHECK-LABEL: fcvtmu_1d1d_scalar_to_vector_simd:
+define <2 x i64> @fcvtmu_v2i64_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtmu_v2i64_from_f64_scalar_to_vector_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtmu d0, d0
; CHECK-NEXT: ret
- %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtmu.i64.f64(double %a)
- %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f64, i32 0
- ret <1 x i64> %vec
+ %fcvtmu_scalar = tail call i64 @llvm.aarch64.neon.fcvtmu.i64.f64(double %a)
+ %fcvtmu_vector = insertelement <2 x i64> poison, i64 %fcvtmu_scalar, i32 0
+ ret <2 x i64> %fcvtmu_vector
+}
+
+define <2 x i32> @fcvtns_v2i32_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtns_v2i32_from_f16_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtns s0, h0
+; CHECK-NEXT: ret
+ %fcvtns_scalar = tail call i32 @llvm.aarch64.neon.fcvtns.i32.f16(half %a)
+ %fcvtns_vector = insertelement <2 x i32> poison, i32 %fcvtns_scalar, i32 0
+ ret <2 x i32> %fcvtns_vector
+}
+
+define <2 x i32> @fcvtns_v2i32_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtns_v2i32_from_f32_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtns s0, s0
+; CHECK-NEXT: ret
+ %fcvtns_scalar = call i32 @llvm.aarch64.neon.fcvtns.i32.f32(float %A)
+ %fcvtns_vector = insertelement <2 x i32> poison, i32 %fcvtns_scalar, i32 0
+ ret <2 x i32> %fcvtns_vector
+}
+
+define <2 x i32> @fcvtns_v2i32_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtns_v2i32_from_f64_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtns s0, d0
+; CHECK-NEXT: ret
+ %fcvtns_scalar = tail call i32 @llvm.aarch64.neon.fcvtns.i32.f64(double %a)
+ %fcvtns_vector = insertelement <2 x i32> poison, i32 %fcvtns_scalar, i32 0
+ ret <2 x i32> %fcvtns_vector
+}
+
+define <4 x i32> @fcvtns_v4i32_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtns_v4i32_from_f16_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtns s0, h0
+; CHECK-NEXT: ret
+ %fcvtns_scalar = tail call i32 @llvm.aarch64.neon.fcvtns.i32.f16(half %a)
+ %fcvtns_vector = insertelement <4 x i32> poison, i32 %fcvtns_scalar, i32 0
+ ret <4 x i32> %fcvtns_vector
+}
+
+define <4 x i32> @fcvtns_v4i32_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtns_v4i32_from_f32_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtns s0, s0
+; CHECK-NEXT: ret
+ %fcvtns_scalar = call i32 @llvm.aarch64.neon.fcvtns.i32.f32(float %A)
+ %fcvtns_vector = insertelement <4 x i32> poison, i32 %fcvtns_scalar, i32 0
+ ret <4 x i32> %fcvtns_vector
}
+define <4 x i32> @fcvtns_v4i32_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtns_v4i32_from_f64_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtns s0, d0
+; CHECK-NEXT: ret
+ %fcvtns_scalar = tail call i32 @llvm.aarch64.neon.fcvtns.i32.f64(double %a)
+ %fcvtns_vector = insertelement <4 x i32> poison, i32 %fcvtns_scalar, i32 0
+ ret <4 x i32> %fcvtns_vector
+}
+define <1 x i64> @fcvtns_v1i64_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtns_v1i64_from_f16_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtns d0, h0
+; CHECK-NEXT: ret
+ %fcvtns_scalar = tail call i64 @llvm.aarch64.neon.fcvtns.i64.f16(half %a)
+ %fcvtns_vector = insertelement <1 x i64> poison, i64 %fcvtns_scalar, i32 0
+ ret <1 x i64> %fcvtns_vector
+}
-define <1 x i64> @fcvtns_1d1s_scalar_to_vector_simd(float %A) nounwind {
-; CHECK-LABEL: fcvtns_1d1s_scalar_to_vector_simd:
+define <1 x i64> @fcvtns_v1i64_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtns_v1i64_from_f32_scalar_to_vector_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtns d0, s0
; CHECK-NEXT: ret
- %i = call i64 @llvm.aarch64.neon.fcvtns.i64.f32(float %A)
- %vec = insertelement <1 x i64> poison, i64 %i, i32 0
- ret <1 x i64> %vec
+ %fcvtns_scalar = call i64 @llvm.aarch64.neon.fcvtns.i64.f32(float %A)
+ %fcvtns_vector = insertelement <1 x i64> poison, i64 %fcvtns_scalar, i32 0
+ ret <1 x i64> %fcvtns_vector
}
+define <1 x i64> @fcvtns_v1i64_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtns_v1i64_from_f64_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtns d0, d0
+; CHECK-NEXT: ret
+ %fcvtns_scalar = tail call i64 @llvm.aarch64.neon.fcvtns.i64.f64(double %a)
+ %fcvtns_vector = insertelement <1 x i64> poison, i64 %fcvtns_scalar, i32 0
+ ret <1 x i64> %fcvtns_vector
+}
-define <1 x i64> @fcvtns_1d1h_scalar_to_vector_simd(half %a) {
-; CHECK-LABEL: fcvtns_1d1h_scalar_to_vector_simd:
+define <2 x i64> @fcvtns_v2i64_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtns_v2i64_from_f16_scalar_to_vector_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtns d0, h0
; CHECK-NEXT: ret
- %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtns.i64.f16(half %a)
- %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f16, i32 0
- ret <1 x i64> %vec
+ %fcvtns_scalar = tail call i64 @llvm.aarch64.neon.fcvtns.i64.f16(half %a)
+ %fcvtns_vector = insertelement <2 x i64> poison, i64 %fcvtns_scalar, i32 0
+ ret <2 x i64> %fcvtns_vector
+}
+
+define <2 x i64> @fcvtns_v2i64_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtns_v2i64_from_f32_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtns d0, s0
+; CHECK-NEXT: ret
+ %fcvtns_scalar = call i64 @llvm.aarch64.neon.fcvtns.i64.f32(float %A)
+ %fcvtns_vector = insertelement <2 x i64> poison, i64 %fcvtns_scalar, i32 0
+ ret <2 x i64> %fcvtns_vector
}
-define <1 x i64> @fcvtns_1d1d_scalar_to_vector_simd(double %a) {
-; CHECK-LABEL: fcvtns_1d1d_scalar_to_vector_simd:
+define <2 x i64> @fcvtns_v2i64_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtns_v2i64_from_f64_scalar_to_vector_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtns d0, d0
; CHECK-NEXT: ret
- %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtns.i64.f64(double %a)
- %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f64, i32 0
- ret <1 x i64> %vec
+ %fcvtns_scalar = tail call i64 @llvm.aarch64.neon.fcvtns.i64.f64(double %a)
+ %fcvtns_vector = insertelement <2 x i64> poison, i64 %fcvtns_scalar, i32 0
+ ret <2 x i64> %fcvtns_vector
+}
+
+define <2 x i32> @fcvtnu_v2i32_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtnu_v2i32_from_f16_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtnu s0, h0
+; CHECK-NEXT: ret
+ %fcvtnu_scalar = tail call i32 @llvm.aarch64.neon.fcvtnu.i32.f16(half %a)
+ %fcvtnu_vector = insertelement <2 x i32> poison, i32 %fcvtnu_scalar, i32 0
+ ret <2 x i32> %fcvtnu_vector
}
+define <2 x i32> @fcvtnu_v2i32_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtnu_v2i32_from_f32_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtnu s0, s0
+; CHECK-NEXT: ret
+ %fcvtnu_scalar = call i32 @llvm.aarch64.neon.fcvtnu.i32.f32(float %A)
+ %fcvtnu_vector = insertelement <2 x i32> poison, i32 %fcvtnu_scalar, i32 0
+ ret <2 x i32> %fcvtnu_vector
+}
+
+define <2 x i32> @fcvtnu_v2i32_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtnu_v2i32_from_f64_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtnu s0, d0
+; CHECK-NEXT: ret
+ %fcvtnu_scalar = tail call i32 @llvm.aarch64.neon.fcvtnu.i32.f64(double %a)
+ %fcvtnu_vector = insertelement <2 x i32> poison, i32 %fcvtnu_scalar, i32 0
+ ret <2 x i32> %fcvtnu_vector
+}
+define <4 x i32> @fcvtnu_v4i32_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtnu_v4i32_from_f16_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtnu s0, h0
+; CHECK-NEXT: ret
+ %fcvtnu_scalar = tail call i32 @llvm.aarch64.neon.fcvtnu.i32.f16(half %a)
+ %fcvtnu_vector = insertelement <4 x i32> poison, i32 %fcvtnu_scalar, i32 0
+ ret <4 x i32> %fcvtnu_vector
+}
+
+define <4 x i32> @fcvtnu_v4i32_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtnu_v4i32_from_f32_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtnu s0, s0
+; CHECK-NEXT: ret
+ %fcvtnu_scalar = call i32 @llvm.aarch64.neon.fcvtnu.i32.f32(float %A)
+ %fcvtnu_vector = insertelement <4 x i32> poison, i32 %fcvtnu_scalar, i32 0
+ ret <4 x i32> %fcvtnu_vector
+}
+
+define <4 x i32> @fcvtnu_v4i32_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtnu_v4i32_from_f64_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtnu s0, d0
+; CHECK-NEXT: ret
+ %fcvtnu_scalar = tail call i32 @llvm.aarch64.neon.fcvtnu.i32.f64(double %a)
+ %fcvtnu_vector = insertelement <4 x i32> poison, i32 %fcvtnu_scalar, i32 0
+ ret <4 x i32> %fcvtnu_vector
+}
-define <1 x i64> @fcvtnu_1d1s_scalar_to_vector_simd(float %A) nounwind {
-; CHECK-LABEL: fcvtnu_1d1s_scalar_to_vector_simd:
+define <1 x i64> @fcvtnu_v1i64_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtnu_v1i64_from_f16_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtnu d0, h0
+; CHECK-NEXT: ret
+ %fcvtnu_scalar = tail call i64 @llvm.aarch64.neon.fcvtnu.i64.f16(half %a)
+ %fcvtnu_vector = insertelement <1 x i64> poison, i64 %fcvtnu_scalar, i32 0
+ ret <1 x i64> %fcvtnu_vector
+}
+
+define <1 x i64> @fcvtnu_v1i64_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtnu_v1i64_from_f32_scalar_to_vector_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtnu d0, s0
; CHECK-NEXT: ret
- %i = call i64 @llvm.aarch64.neon.fcvtnu.i64.f32(float %A)
- %vec = insertelement <1 x i64> poison, i64 %i, i32 0
- ret <1 x i64> %vec
+ %fcvtnu_scalar = call i64 @llvm.aarch64.neon.fcvtnu.i64.f32(float %A)
+ %fcvtnu_vector = insertelement <1 x i64> poison, i64 %fcvtnu_scalar, i32 0
+ ret <1 x i64> %fcvtnu_vector
}
+define <1 x i64> @fcvtnu_v1i64_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtnu_v1i64_from_f64_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtnu d0, d0
+; CHECK-NEXT: ret
+ %fcvtnu_scalar = tail call i64 @llvm.aarch64.neon.fcvtnu.i64.f64(double %a)
+ %fcvtnu_vector = insertelement <1 x i64> poison, i64 %fcvtnu_scalar, i32 0
+ ret <1 x i64> %fcvtnu_vector
+}
-define <1 x i64> @fcvtnu_1d1h_scalar_to_vector_simd(half %a) {
-; CHECK-LABEL: fcvtnu_1d1h_scalar_to_vector_simd:
+define <2 x i64> @fcvtnu_v2i64_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtnu_v2i64_from_f16_scalar_to_vector_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtnu d0, h0
; CHECK-NEXT: ret
- %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtnu.i64.f16(half %a)
- %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f16, i32 0
- ret <1 x i64> %vec
+ %fcvtnu_scalar = tail call i64 @llvm.aarch64.neon.fcvtnu.i64.f16(half %a)
+ %fcvtnu_vector = insertelement <2 x i64> poison, i64 %fcvtnu_scalar, i32 0
+ ret <2 x i64> %fcvtnu_vector
+}
+
+define <2 x i64> @fcvtnu_v2i64_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtnu_v2i64_from_f32_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtnu d0, s0
+; CHECK-NEXT: ret
+ %fcvtnu_scalar = call i64 @llvm.aarch64.neon.fcvtnu.i64.f32(float %A)
+ %fcvtnu_vector = insertelement <2 x i64> poison, i64 %fcvtnu_scalar, i32 0
+ ret <2 x i64> %fcvtnu_vector
}
-define <1 x i64> @fcvtnu_1d1d_scalar_to_vector_simd(double %a) {
-; CHECK-LABEL: fcvtnu_1d1d_scalar_to_vector_simd:
+define <2 x i64> @fcvtnu_v2i64_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtnu_v2i64_from_f64_scalar_to_vector_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtnu d0, d0
; CHECK-NEXT: ret
- %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtnu.i64.f64(double %a)
- %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f64, i32 0
- ret <1 x i64> %vec
+ %fcvtnu_scalar = tail call i64 @llvm.aarch64.neon.fcvtnu.i64.f64(double %a)
+ %fcvtnu_vector = insertelement <2 x i64> poison, i64 %fcvtnu_scalar, i32 0
+ ret <2 x i64> %fcvtnu_vector
}
+define <2 x i32> @fcvtps_v2i32_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtps_v2i32_from_f16_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtps s0, h0
+; CHECK-NEXT: ret
+ %fcvtps_scalar = tail call i32 @llvm.aarch64.neon.fcvtps.i32.f16(half %a)
+ %fcvtps_vector = insertelement <2 x i32> poison, i32 %fcvtps_scalar, i32 0
+ ret <2 x i32> %fcvtps_vector
+}
+define <2 x i32> @fcvtps_v2i32_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtps_v2i32_from_f32_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtps s0, s0
+; CHECK-NEXT: ret
+ %fcvtps_scalar = call i32 @llvm.aarch64.neon.fcvtps.i32.f32(float %A)
+ %fcvtps_vector = insertelement <2 x i32> poison, i32 %fcvtps_scalar, i32 0
+ ret <2 x i32> %fcvtps_vector
+}
-define <1 x i64> @fcvtps_1d1s_scalar_to_vector_simd(float %A) nounwind {
-; CHECK-LABEL: fcvtps_1d1s_scalar_to_vector_simd:
+define <2 x i32> @fcvtps_v2i32_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtps_v2i32_from_f64_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtps s0, d0
+; CHECK-NEXT: ret
+ %fcvtps_scalar = tail call i32 @llvm.aarch64.neon.fcvtps.i32.f64(double %a)
+ %fcvtps_vector = insertelement <2 x i32> poison, i32 %fcvtps_scalar, i32 0
+ ret <2 x i32> %fcvtps_vector
+}
+
+define <4 x i32> @fcvtps_v4i32_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtps_v4i32_from_f16_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtps s0, h0
+; CHECK-NEXT: ret
+ %fcvtps_scalar = tail call i32 @llvm.aarch64.neon.fcvtps.i32.f16(half %a)
+ %fcvtps_vector = insertelement <4 x i32> poison, i32 %fcvtps_scalar, i32 0
+ ret <4 x i32> %fcvtps_vector
+}
+
+define <4 x i32> @fcvtps_v4i32_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtps_v4i32_from_f32_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtps s0, s0
+; CHECK-NEXT: ret
+ %fcvtps_scalar = call i32 @llvm.aarch64.neon.fcvtps.i32.f32(float %A)
+ %fcvtps_vector = insertelement <4 x i32> poison, i32 %fcvtps_scalar, i32 0
+ ret <4 x i32> %fcvtps_vector
+}
+
+define <4 x i32> @fcvtps_v4i32_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtps_v4i32_from_f64_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtps s0, d0
+; CHECK-NEXT: ret
+ %fcvtps_scalar = tail call i32 @llvm.aarch64.neon.fcvtps.i32.f64(double %a)
+ %fcvtps_vector = insertelement <4 x i32> poison, i32 %fcvtps_scalar, i32 0
+ ret <4 x i32> %fcvtps_vector
+}
+
+define <1 x i64> @fcvtps_v1i64_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtps_v1i64_from_f16_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtps d0, h0
+; CHECK-NEXT: ret
+ %fcvtps_scalar = tail call i64 @llvm.aarch64.neon.fcvtps.i64.f16(half %a)
+ %fcvtps_vector = insertelement <1 x i64> poison, i64 %fcvtps_scalar, i32 0
+ ret <1 x i64> %fcvtps_vector
+}
+
+define <1 x i64> @fcvtps_v1i64_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtps_v1i64_from_f32_scalar_to_vector_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtps d0, s0
; CHECK-NEXT: ret
- %i = call i64 @llvm.aarch64.neon.fcvtps.i64.f32(float %A)
- %vec = insertelement <1 x i64> poison, i64 %i, i32 0
- ret <1 x i64> %vec
+ %fcvtps_scalar = call i64 @llvm.aarch64.neon.fcvtps.i64.f32(float %A)
+ %fcvtps_vector = insertelement <1 x i64> poison, i64 %fcvtps_scalar, i32 0
+ ret <1 x i64> %fcvtps_vector
}
+define <1 x i64> @fcvtps_v1i64_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtps_v1i64_from_f64_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtps d0, d0
+; CHECK-NEXT: ret
+ %fcvtps_scalar = tail call i64 @llvm.aarch64.neon.fcvtps.i64.f64(double %a)
+ %fcvtps_vector = insertelement <1 x i64> poison, i64 %fcvtps_scalar, i32 0
+ ret <1 x i64> %fcvtps_vector
+}
-define <1 x i64> @fcvtps_1d1h_scalar_to_vector_simd(half %a) {
-; CHECK-LABEL: fcvtps_1d1h_scalar_to_vector_simd:
+define <2 x i64> @fcvtps_v2i64_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtps_v2i64_from_f16_scalar_to_vector_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtps d0, h0
; CHECK-NEXT: ret
- %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtps.i64.f16(half %a)
- %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f16, i32 0
- ret <1 x i64> %vec
+ %fcvtps_scalar = tail call i64 @llvm.aarch64.neon.fcvtps.i64.f16(half %a)
+ %fcvtps_vector = insertelement <2 x i64> poison, i64 %fcvtps_scalar, i32 0
+ ret <2 x i64> %fcvtps_vector
}
-define <1 x i64> @fcvtps_1d1d_scalar_to_vector_simd(double %a) {
-; CHECK-LABEL: fcvtps_1d1d_scalar_to_vector_simd:
+define <2 x i64> @fcvtps_v2i64_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtps_v2i64_from_f32_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtps d0, s0
+; CHECK-NEXT: ret
+ %fcvtps_scalar = call i64 @llvm.aarch64.neon.fcvtps.i64.f32(float %A)
+ %fcvtps_vector = insertelement <2 x i64> poison, i64 %fcvtps_scalar, i32 0
+ ret <2 x i64> %fcvtps_vector
+}
+
+define <2 x i64> @fcvtps_v2i64_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtps_v2i64_from_f64_scalar_to_vector_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtps d0, d0
; CHECK-NEXT: ret
- %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtps.i64.f64(double %a)
- %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f64, i32 0
- ret <1 x i64> %vec
+ %fcvtps_scalar = tail call i64 @llvm.aarch64.neon.fcvtps.i64.f64(double %a)
+ %fcvtps_vector = insertelement <2 x i64> poison, i64 %fcvtps_scalar, i32 0
+ ret <2 x i64> %fcvtps_vector
+}
+
+define <2 x i32> @fcvtpu_v2i32_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtpu_v2i32_from_f16_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtpu s0, h0
+; CHECK-NEXT: ret
+ %fcvtpu_scalar = tail call i32 @llvm.aarch64.neon.fcvtpu.i32.f16(half %a)
+ %fcvtpu_vector = insertelement <2 x i32> poison, i32 %fcvtpu_scalar, i32 0
+ ret <2 x i32> %fcvtpu_vector
+}
+
+define <2 x i32> @fcvtpu_v2i32_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtpu_v2i32_from_f32_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtpu s0, s0
+; CHECK-NEXT: ret
+ %fcvtpu_scalar = call i32 @llvm.aarch64.neon.fcvtpu.i32.f32(float %A)
+ %fcvtpu_vector = insertelement <2 x i32> poison, i32 %fcvtpu_scalar, i32 0
+ ret <2 x i32> %fcvtpu_vector
+}
+
+define <2 x i32> @fcvtpu_v2i32_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtpu_v2i32_from_f64_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtpu s0, d0
+; CHECK-NEXT: ret
+ %fcvtpu_scalar = tail call i32 @llvm.aarch64.neon.fcvtpu.i32.f64(double %a)
+ %fcvtpu_vector = insertelement <2 x i32> poison, i32 %fcvtpu_scalar, i32 0
+ ret <2 x i32> %fcvtpu_vector
+}
+
+define <4 x i32> @fcvtpu_v4i32_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtpu_v4i32_from_f16_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtpu s0, h0
+; CHECK-NEXT: ret
+ %fcvtpu_scalar = tail call i32 @llvm.aarch64.neon.fcvtpu.i32.f16(half %a)
+ %fcvtpu_vector = insertelement <4 x i32> poison, i32 %fcvtpu_scalar, i32 0
+ ret <4 x i32> %fcvtpu_vector
+}
+
+define <4 x i32> @fcvtpu_v4i32_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtpu_v4i32_from_f32_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtpu s0, s0
+; CHECK-NEXT: ret
+ %fcvtpu_scalar = call i32 @llvm.aarch64.neon.fcvtpu.i32.f32(float %A)
+ %fcvtpu_vector = insertelement <4 x i32> poison, i32 %fcvtpu_scalar, i32 0
+ ret <4 x i32> %fcvtpu_vector
}
+define <4 x i32> @fcvtpu_v4i32_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtpu_v4i32_from_f64_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtpu s0, d0
+; CHECK-NEXT: ret
+ %fcvtpu_scalar = tail call i32 @llvm.aarch64.neon.fcvtpu.i32.f64(double %a)
+ %fcvtpu_vector = insertelement <4 x i32> poison, i32 %fcvtpu_scalar, i32 0
+ ret <4 x i32> %fcvtpu_vector
+}
+define <1 x i64> @fcvtpu_v1i64_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtpu_v1i64_from_f16_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtpu d0, h0
+; CHECK-NEXT: ret
+ %fcvtpu_scalar = tail call i64 @llvm.aarch64.neon.fcvtpu.i64.f16(half %a)
+ %fcvtpu_vector = insertelement <1 x i64> poison, i64 %fcvtpu_scalar, i32 0
+ ret <1 x i64> %fcvtpu_vector
+}
-define <1 x i64> @fcvtpu_1d1s_scalar_to_vector_simd(float %A) nounwind {
-; CHECK-LABEL: fcvtpu_1d1s_scalar_to_vector_simd:
+define <1 x i64> @fcvtpu_v1i64_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtpu_v1i64_from_f32_scalar_to_vector_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtpu d0, s0
; CHECK-NEXT: ret
- %i = call i64 @llvm.aarch64.neon.fcvtpu.i64.f32(float %A)
- %vec = insertelement <1 x i64> poison, i64 %i, i32 0
- ret <1 x i64> %vec
+ %fcvtpu_scalar = call i64 @llvm.aarch64.neon.fcvtpu.i64.f32(float %A)
+ %fcvtpu_vector = insertelement <1 x i64> poison, i64 %fcvtpu_scalar, i32 0
+ ret <1 x i64> %fcvtpu_vector
}
+define <1 x i64> @fcvtpu_v1i64_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtpu_v1i64_from_f64_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtpu d0, d0
+; CHECK-NEXT: ret
+ %fcvtpu_scalar = tail call i64 @llvm.aarch64.neon.fcvtpu.i64.f64(double %a)
+ %fcvtpu_vector = insertelement <1 x i64> poison, i64 %fcvtpu_scalar, i32 0
+ ret <1 x i64> %fcvtpu_vector
+}
-define <1 x i64> @fcvtpu_1d1h_scalar_to_vector_simd(half %a) {
-; CHECK-LABEL: fcvtpu_1d1h_scalar_to_vector_simd:
+define <2 x i64> @fcvtpu_v2i64_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtpu_v2i64_from_f16_scalar_to_vector_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtpu d0, h0
; CHECK-NEXT: ret
- %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtpu.i64.f16(half %a)
- %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f16, i32 0
- ret <1 x i64> %vec
+ %fcvtpu_scalar = tail call i64 @llvm.aarch64.neon.fcvtpu.i64.f16(half %a)
+ %fcvtpu_vector = insertelement <2 x i64> poison, i64 %fcvtpu_scalar, i32 0
+ ret <2 x i64> %fcvtpu_vector
+}
+
+define <2 x i64> @fcvtpu_v2i64_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtpu_v2i64_from_f32_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtpu d0, s0
+; CHECK-NEXT: ret
+ %fcvtpu_scalar = call i64 @llvm.aarch64.neon.fcvtpu.i64.f32(float %A)
+ %fcvtpu_vector = insertelement <2 x i64> poison, i64 %fcvtpu_scalar, i32 0
+ ret <2 x i64> %fcvtpu_vector
}
-define <1 x i64> @fcvtpu_1d1d_scalar_to_vector_simd(double %a) {
-; CHECK-LABEL: fcvtpu_1d1d_scalar_to_vector_simd:
+define <2 x i64> @fcvtpu_v2i64_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtpu_v2i64_from_f64_scalar_to_vector_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtpu d0, d0
; CHECK-NEXT: ret
- %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtpu.i64.f64(double %a)
- %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f64, i32 0
- ret <1 x i64> %vec
+ %fcvtpu_scalar = tail call i64 @llvm.aarch64.neon.fcvtpu.i64.f64(double %a)
+ %fcvtpu_vector = insertelement <2 x i64> poison, i64 %fcvtpu_scalar, i32 0
+ ret <2 x i64> %fcvtpu_vector
}
+define <2 x i32> @fcvtzs_v2i32_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtzs_v2i32_from_f16_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs s0, h0
+; CHECK-NEXT: ret
+ %fcvtzs_scalar = tail call i32 @llvm.aarch64.neon.fcvtzs.i32.f16(half %a)
+ %fcvtzs_vector = insertelement <2 x i32> poison, i32 %fcvtzs_scalar, i32 0
+ ret <2 x i32> %fcvtzs_vector
+}
+define <2 x i32> @fcvtzs_v2i32_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtzs_v2i32_from_f32_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs s0, s0
+; CHECK-NEXT: ret
+ %fcvtzs_scalar = call i32 @llvm.aarch64.neon.fcvtzs.i32.f32(float %A)
+ %fcvtzs_vector = insertelement <2 x i32> poison, i32 %fcvtzs_scalar, i32 0
+ ret <2 x i32> %fcvtzs_vector
+}
+
+define <2 x i32> @fcvtzs_v2i32_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtzs_v2i32_from_f64_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs s0, d0
+; CHECK-NEXT: ret
+ %fcvtzs_scalar = tail call i32 @llvm.aarch64.neon.fcvtzs.i32.f64(double %a)
+ %fcvtzs_vector = insertelement <2 x i32> poison, i32 %fcvtzs_scalar, i32 0
+ ret <2 x i32> %fcvtzs_vector
+}
+
+define <4 x i32> @fcvtzs_v4i32_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtzs_v4i32_from_f16_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs s0, h0
+; CHECK-NEXT: ret
+ %fcvtzs_scalar = tail call i32 @llvm.aarch64.neon.fcvtzs.i32.f16(half %a)
+ %fcvtzs_vector = insertelement <4 x i32> poison, i32 %fcvtzs_scalar, i32 0
+ ret <4 x i32> %fcvtzs_vector
+}
+
+define <4 x i32> @fcvtzs_v4i32_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtzs_v4i32_from_f32_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs s0, s0
+; CHECK-NEXT: ret
+ %fcvtzs_scalar = call i32 @llvm.aarch64.neon.fcvtzs.i32.f32(float %A)
+ %fcvtzs_vector = insertelement <4 x i32> poison, i32 %fcvtzs_scalar, i32 0
+ ret <4 x i32> %fcvtzs_vector
+}
+
+define <4 x i32> @fcvtzs_v4i32_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtzs_v4i32_from_f64_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs s0, d0
+; CHECK-NEXT: ret
+ %fcvtzs_scalar = tail call i32 @llvm.aarch64.neon.fcvtzs.i32.f64(double %a)
+ %fcvtzs_vector = insertelement <4 x i32> poison, i32 %fcvtzs_scalar, i32 0
+ ret <4 x i32> %fcvtzs_vector
+}
-define <1 x i64> @fcvtzs_1d1s_scalar_to_vector_simd(float %A) nounwind {
-; CHECK-LABEL: fcvtzs_1d1s_scalar_to_vector_simd:
+define <1 x i64> @fcvtzs_v1i64_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtzs_v1i64_from_f16_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs d0, h0
+; CHECK-NEXT: ret
+ %fcvtzs_scalar = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f16(half %a)
+ %fcvtzs_vector = insertelement <1 x i64> poison, i64 %fcvtzs_scalar, i32 0
+ ret <1 x i64> %fcvtzs_vector
+}
+
+define <1 x i64> @fcvtzs_v1i64_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtzs_v1i64_from_f32_scalar_to_vector_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtzs d0, s0
; CHECK-NEXT: ret
- %i = call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %A)
- %vec = insertelement <1 x i64> poison, i64 %i, i32 0
- ret <1 x i64> %vec
+ %fcvtzs_scalar = call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %A)
+ %fcvtzs_vector = insertelement <1 x i64> poison, i64 %fcvtzs_scalar, i32 0
+ ret <1 x i64> %fcvtzs_vector
}
+define <1 x i64> @fcvtzs_v1i64_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtzs_v1i64_from_f64_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs d0, d0
+; CHECK-NEXT: ret
+ %fcvtzs_scalar = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f64(double %a)
+ %fcvtzs_vector = insertelement <1 x i64> poison, i64 %fcvtzs_scalar, i32 0
+ ret <1 x i64> %fcvtzs_vector
+}
-define <1 x i64> @fcvtzs_1d1h_scalar_to_vector_simd(half %a) {
-; CHECK-LABEL: fcvtzs_1d1h_scalar_to_vector_simd:
+define <2 x i64> @fcvtzs_v2i64_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtzs_v2i64_from_f16_scalar_to_vector_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtzs d0, h0
; CHECK-NEXT: ret
- %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f16(half %a)
- %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f16, i32 0
- ret <1 x i64> %vec
+ %fcvtzs_scalar = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f16(half %a)
+ %fcvtzs_vector = insertelement <2 x i64> poison, i64 %fcvtzs_scalar, i32 0
+ ret <2 x i64> %fcvtzs_vector
+}
+
+define <2 x i64> @fcvtzs_v2i64_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtzs_v2i64_from_f32_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs d0, s0
+; CHECK-NEXT: ret
+ %fcvtzs_scalar = call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %A)
+ %fcvtzs_vector = insertelement <2 x i64> poison, i64 %fcvtzs_scalar, i32 0
+ ret <2 x i64> %fcvtzs_vector
}
-define <1 x i64> @fcvtzs_1d1d_scalar_to_vector_simd(double %a) {
-; CHECK-LABEL: fcvtzs_1d1d_scalar_to_vector_simd:
+define <2 x i64> @fcvtzs_v2i64_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtzs_v2i64_from_f64_scalar_to_vector_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtzs d0, d0
; CHECK-NEXT: ret
- %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f64(double %a)
- %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f64, i32 0
- ret <1 x i64> %vec
+ %fcvtzs_scalar = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f64(double %a)
+ %fcvtzs_vector = insertelement <2 x i64> poison, i64 %fcvtzs_scalar, i32 0
+ ret <2 x i64> %fcvtzs_vector
+}
+
+define <2 x i32> @fcvtzu_v2i32_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtzu_v2i32_from_f16_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu s0, h0
+; CHECK-NEXT: ret
+ %fcvtzu_scalar = tail call i32 @llvm.aarch64.neon.fcvtzu.i32.f16(half %a)
+ %fcvtzu_vector = insertelement <2 x i32> poison, i32 %fcvtzu_scalar, i32 0
+ ret <2 x i32> %fcvtzu_vector
+}
+
+define <2 x i32> @fcvtzu_v2i32_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtzu_v2i32_from_f32_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu s0, s0
+; CHECK-NEXT: ret
+ %fcvtzu_scalar = call i32 @llvm.aarch64.neon.fcvtzu.i32.f32(float %A)
+ %fcvtzu_vector = insertelement <2 x i32> poison, i32 %fcvtzu_scalar, i32 0
+ ret <2 x i32> %fcvtzu_vector
+}
+
+define <2 x i32> @fcvtzu_v2i32_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtzu_v2i32_from_f64_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu s0, d0
+; CHECK-NEXT: ret
+ %fcvtzu_scalar = tail call i32 @llvm.aarch64.neon.fcvtzu.i32.f64(double %a)
+ %fcvtzu_vector = insertelement <2 x i32> poison, i32 %fcvtzu_scalar, i32 0
+ ret <2 x i32> %fcvtzu_vector
+}
+
+define <4 x i32> @fcvtzu_v4i32_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtzu_v4i32_from_f16_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu s0, h0
+; CHECK-NEXT: ret
+ %fcvtzu_scalar = tail call i32 @llvm.aarch64.neon.fcvtzu.i32.f16(half %a)
+ %fcvtzu_vector = insertelement <4 x i32> poison, i32 %fcvtzu_scalar, i32 0
+ ret <4 x i32> %fcvtzu_vector
+}
+
+define <4 x i32> @fcvtzu_v4i32_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtzu_v4i32_from_f32_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu s0, s0
+; CHECK-NEXT: ret
+ %fcvtzu_scalar = call i32 @llvm.aarch64.neon.fcvtzu.i32.f32(float %A)
+ %fcvtzu_vector = insertelement <4 x i32> poison, i32 %fcvtzu_scalar, i32 0
+ ret <4 x i32> %fcvtzu_vector
}
+define <4 x i32> @fcvtzu_v4i32_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtzu_v4i32_from_f64_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu s0, d0
+; CHECK-NEXT: ret
+ %fcvtzu_scalar = tail call i32 @llvm.aarch64.neon.fcvtzu.i32.f64(double %a)
+ %fcvtzu_vector = insertelement <4 x i32> poison, i32 %fcvtzu_scalar, i32 0
+ ret <4 x i32> %fcvtzu_vector
+}
+define <1 x i64> @fcvtzu_v1i64_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtzu_v1i64_from_f16_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu d0, h0
+; CHECK-NEXT: ret
+ %fcvtzu_scalar = tail call i64 @llvm.aarch64.neon.fcvtzu.i64.f16(half %a)
+ %fcvtzu_vector = insertelement <1 x i64> poison, i64 %fcvtzu_scalar, i32 0
+ ret <1 x i64> %fcvtzu_vector
+}
-define <1 x i64> @fcvtzu_1d1s_scalar_to_vector_simd(float %A) nounwind {
-; CHECK-LABEL: fcvtzu_1d1s_scalar_to_vector_simd:
+define <1 x i64> @fcvtzu_v1i64_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtzu_v1i64_from_f32_scalar_to_vector_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtzu d0, s0
; CHECK-NEXT: ret
- %i = call i64 @llvm.aarch64.neon.fcvtzu.i64.f32(float %A)
- %vec = insertelement <1 x i64> poison, i64 %i, i32 0
- ret <1 x i64> %vec
+ %fcvtzu_scalar = call i64 @llvm.aarch64.neon.fcvtzu.i64.f32(float %A)
+ %fcvtzu_vector = insertelement <1 x i64> poison, i64 %fcvtzu_scalar, i32 0
+ ret <1 x i64> %fcvtzu_vector
}
+define <1 x i64> @fcvtzu_v1i64_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtzu_v1i64_from_f64_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu d0, d0
+; CHECK-NEXT: ret
+ %fcvtzu_scalar = tail call i64 @llvm.aarch64.neon.fcvtzu.i64.f64(double %a)
+ %fcvtzu_vector = insertelement <1 x i64> poison, i64 %fcvtzu_scalar, i32 0
+ ret <1 x i64> %fcvtzu_vector
+}
-define <1 x i64> @fcvtzu_1d1h_scalar_to_vector_simd(half %a) {
-; CHECK-LABEL: fcvtzu_1d1h_scalar_to_vector_simd:
+define <2 x i64> @fcvtzu_v2i64_from_f16_scalar_to_vector_simd(half %a) {
+; CHECK-LABEL: fcvtzu_v2i64_from_f16_scalar_to_vector_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtzu d0, h0
; CHECK-NEXT: ret
- %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtzu.i64.f16(half %a)
- %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f16, i32 0
- ret <1 x i64> %vec
+ %fcvtzu_scalar = tail call i64 @llvm.aarch64.neon.fcvtzu.i64.f16(half %a)
+ %fcvtzu_vector = insertelement <2 x i64> poison, i64 %fcvtzu_scalar, i32 0
+ ret <2 x i64> %fcvtzu_vector
}
-define <1 x i64> @fcvtzu_1d1d_scalar_to_vector_simd(double %a) {
-; CHECK-LABEL: fcvtzu_1d1d_scalar_to_vector_simd:
+define <2 x i64> @fcvtzu_v2i64_from_f32_scalar_to_vector_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtzu_v2i64_from_f32_scalar_to_vector_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu d0, s0
+; CHECK-NEXT: ret
+ %fcvtzu_scalar = call i64 @llvm.aarch64.neon.fcvtzu.i64.f32(float %A)
+ %fcvtzu_vector = insertelement <2 x i64> poison, i64 %fcvtzu_scalar, i32 0
+ ret <2 x i64> %fcvtzu_vector
+}
+
+define <2 x i64> @fcvtzu_v2i64_from_f64_scalar_to_vector_simd(double %a) {
+; CHECK-LABEL: fcvtzu_v2i64_from_f64_scalar_to_vector_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtzu d0, d0
; CHECK-NEXT: ret
- %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtzu.i64.f64(double %a)
- %vec = insertelement <1 x i64> poison, i64 %vcvtah_s64_f64, i32 0
- ret <1 x i64> %vec
+ %fcvtzu_scalar = tail call i64 @llvm.aarch64.neon.fcvtzu.i64.f64(double %a)
+ %fcvtzu_vector = insertelement <2 x i64> poison, i64 %fcvtzu_scalar, i32 0
+ ret <2 x i64> %fcvtzu_vector
}
diff --git a/llvm/test/CodeGen/AArch64/arm64-vcvt.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt.ll
index dcb3b9b24627b..c70dac3f21a53 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vcvt.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vcvt.ll
@@ -357,7 +357,6 @@ define <2 x i64> @fcvtzs_2d(<2 x double> %A) nounwind {
ret <2 x i64> %tmp3
}
-; FIXME: Generate "fcvtzs d0, d0"?
define <1 x i64> @fcvtzs_1d(<1 x double> %A) nounwind {
; CHECK-LABEL: fcvtzs_1d:
; CHECK: // %bb.0:
>From dfe66a991fef1707961cd6dce4e7093422401019 Mon Sep 17 00:00:00 2001
From: Lukacma <Marian.Lukac at arm.com>
Date: Thu, 15 Jan 2026 15:03:51 +0000
Subject: [PATCH 4/8] Apply suggestion from @kmclaughlin-arm
Co-authored-by: Kerry McLaughlin <kerry.mclaughlin at arm.com>
---
llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll
index 55af566b9f4c1..e0fe663bc625d 100644
--- a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll
@@ -609,7 +609,7 @@ define float @fcvtzu_1s1s_simd(float %a) {
}
;
-; Intriniscs (scalar_to_vector)
+; Intrinsics (scalar_to_vector)
;
define <2 x i32> @fcvtas_v2i32_from_f16_scalar_to_vector_simd(half %a) {
>From 5f5eb5c22b07714b5d3c5ed6d0e0426275d79b2d Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Wed, 21 Jan 2026 15:33:43 +0000
Subject: [PATCH 5/8] Update to use custom lowering of Scalar to Vector instead
of patterns
---
.../Target/AArch64/AArch64ISelDAGToDAG.cpp | 63 +
.../lib/Target/AArch64/AArch64InstrAtomics.td | 2 +-
.../lib/Target/AArch64/AArch64InstrFormats.td | 19 +-
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 124 +-
.../lib/Target/AArch64/AArch64SVEInstrInfo.td | 2 +-
.../AArch64/aarch64-matrix-umull-smull.ll | 8 +-
llvm/test/CodeGen/AArch64/aarch64-pmull2.ll | 4 +-
.../CodeGen/AArch64/arm64-cvt-simd-fptoi.ll | 380 ------
.../CodeGen/AArch64/arm64-neon-select_cc.ll | 24 +-
llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll | 56 +-
llvm/test/CodeGen/AArch64/arm64-vshift.ll | 9 +-
llvm/test/CodeGen/AArch64/avoid-pre-trunc.ll | 24 +-
.../AArch64/concat-vector-add-combine.ll | 20 +-
llvm/test/CodeGen/AArch64/ctpop.ll | 48 +-
.../test/CodeGen/AArch64/fptosi-sat-vector.ll | 17 +-
.../test/CodeGen/AArch64/fptoui-sat-vector.ll | 17 +-
llvm/test/CodeGen/AArch64/fsh.ll | 16 +-
.../AArch64/ragreedy-local-interval-cost.ll | 126 +-
.../AArch64/scalar-to-vector-bitcasts.ll | 45 +
llvm/test/CodeGen/AArch64/sext.ll | 42 +-
.../AArch64/sve-fixed-vector-llrint.ll | 365 +++---
.../CodeGen/AArch64/sve-fixed-vector-lrint.ll | 1039 ++++++++---------
llvm/test/CodeGen/AArch64/vector-llrint.ll | 17 +-
llvm/test/CodeGen/AArch64/vector-lrint.ll | 507 ++++----
llvm/test/CodeGen/AArch64/zext.ll | 38 +-
25 files changed, 1301 insertions(+), 1711 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/scalar-to-vector-bitcasts.ll
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 54ad7beb823ac..b1377aeaaa69c 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -61,6 +61,7 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
}
void Select(SDNode *Node) override;
+ void PreprocessISelDAG() override;
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
@@ -532,6 +533,28 @@ char AArch64DAGToDAGISelLegacy::ID = 0;
INITIALIZE_PASS(AArch64DAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
+/// addBitcastHints - This method adds bitcast hints to the operands of a node
+/// to help instruction selector determine which operands are in Neon registers.
+static SDValue addBitcastHints(SelectionDAG &DAG, SDNode &N) {
+ SDLoc DL(&N);
+ auto getFloatVT = [](EVT VT) {
+ EVT ScalarVT = VT.getScalarType();
+ assert((ScalarVT == MVT::i32 || ScalarVT == MVT::i64) && "Unexpected VT");
+ return VT.changeElementType(ScalarVT == MVT::i32 ? MVT::f32 : MVT::f64);
+ };
+ auto bitcastToFloat = [&](SDValue Val) {
+ return DAG.getBitcast(getFloatVT(Val.getValueType()), Val);
+ };
+ SmallVector<SDValue, 2> NewOps;
+ NewOps.reserve(N.getNumOperands() - 1);
+
+ for (unsigned I = 0, E = N.getNumOperands(); I < E; ++I)
+ NewOps.push_back(bitcastToFloat(N.getOperand(I)));
+ EVT OrigVT = N.getValueType(0);
+ SDValue OpNode = DAG.getNode(N.getOpcode(), DL, getFloatVT(OrigVT), NewOps);
+ return DAG.getBitcast(OrigVT, OpNode);
+}
+
/// isIntImmediate - This method tests to see if the node is a constant
/// operand. If so Imm will receive the 32-bit value.
static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
@@ -7774,3 +7797,43 @@ bool AArch64DAGToDAGISel::SelectCmpBranchExtOperand(SDValue N, SDValue &Reg,
return false;
}
+
+void AArch64DAGToDAGISel::PreprocessISelDAG() {
+ bool MadeChange = false;
+ for (SDNode &N : llvm::make_early_inc_range(CurDAG->allnodes())) {
+ if (N.use_empty())
+ continue;
+
+ SDValue Result;
+ switch (N.getOpcode()) {
+ case ISD::SCALAR_TO_VECTOR: {
+ EVT VT = N.getValueType(0);
+ if (!VT.isVector() || VT.isScalableVector() || !VT.isInteger())
+ break;
+ if (VT.getVectorElementType() != N.getOperand(0).getValueType())
+ break;
+
+ Result = addBitcastHints(*CurDAG, N);
+ break;
+ }
+ default:
+ break;
+ }
+
+ if (Result) {
+ LLVM_DEBUG(dbgs() << "AArch64 DAG preprocessing replacing:\nOld: ");
+ LLVM_DEBUG(N.dump(CurDAG));
+ LLVM_DEBUG(dbgs() << "\nNew: ");
+ LLVM_DEBUG(Result.dump(CurDAG));
+ LLVM_DEBUG(dbgs() << "\n");
+
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(&N, 0), Result);
+ MadeChange = true;
+ }
+ }
+
+ if (MadeChange)
+ CurDAG->RemoveDeadNodes();
+
+ SelectionDAGISel::PreprocessISelDAG();
+}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
index 5d9215dd71233..d9bd43ce70522 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -577,7 +577,7 @@ let Predicates = [HasRCPC3, HasNEON] in {
def : Pat<(vector_insert (v2f64 VecListOne128:$Rd),
(f64 (bitconvert (i64 (acquiring_load<atomic_load_nonext_64> GPR64sp:$Rn)))), (i64 VectorIndexD:$idx)),
(LDAP1 VecListOne128:$Rd, VectorIndexD:$idx, GPR64sp:$Rn)>;
- def : Pat<(v1i64 (scalar_to_vector
+ def : Pat<(v1i64 (scalar_to_vector_any_64
(i64 (acquiring_load<atomic_load_nonext_64> GPR64sp:$Rn)))),
(EXTRACT_SUBREG (LDAP1 (v2i64 (IMPLICIT_DEF)), (i64 0), GPR64sp:$Rn), dsub)>;
def : Pat<(v1f64 (scalar_to_vector
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 43319f7eb8a8f..3b41f373835dd 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -200,10 +200,27 @@ def dup_v4f32 :
[(v2f32 (extract_subvector (v4f32 (AArch64duplane32 (v4f32 node:$LHS), node:$RHS)), (i64 0))),
(v2f32 (AArch64duplane32 (v4f32 node:$LHS), node:$RHS))]>;
+// Match scalar_to_vector values, optionally wrapped in a bitcast through
+// f32/f64.
+def scalar_to_vector_any_64 : PatFrags<(ops node:$src),
+ [(scalar_to_vector node:$src),
+ (bitconvert (v2f32 (scalar_to_vector (f32 (bitconvert node:$src))))),
+ (bitconvert (v1f64 (scalar_to_vector (f64 (bitconvert node:$src)))))]>;
+def scalar_to_vector_any_128 : PatFrags<(ops node:$src),
+ [(scalar_to_vector node:$src),
+ (bitconvert (v4f32 (scalar_to_vector (f32 (bitconvert node:$src))))),
+ (bitconvert (v2f64 (scalar_to_vector (f64 (bitconvert node:$src)))))]>;
+
// Match either a scalar_to_vector (from SDAG) or a vector_insert of undef (from GISel)
def vec_ins_or_scal_vec : PatFrags<(ops node:$src),
[(vector_insert undef, node:$src, (i64 0)),
(scalar_to_vector node:$src)]>;
+def vec_ins_or_scal_vec_64 : PatFrags<(ops node:$src),
+ [(vector_insert undef, node:$src, (i64 0)),
+ (scalar_to_vector_any_64 node:$src)]>;
+def vec_ins_or_scal_vec_128 : PatFrags<(ops node:$src),
+ [(vector_insert undef, node:$src, (i64 0)),
+ (scalar_to_vector_any_128 node:$src)]>;
//===----------------------------------------------------------------------===//
// Asm Operand Classes.
@@ -8693,7 +8710,7 @@ multiclass SIMDScalarDUP<string asm> {
let Inst{19-16} = 0b1000;
}
- def : Pat<(v1i64 (scalar_to_vector (i64 (vector_extract (v2i64 V128:$src),
+ def : Pat<(v1i64 (scalar_to_vector_any_64 (i64 (vector_extract (v2i64 V128:$src),
VectorIndexD:$idx)))),
(!cast<Instruction>(NAME # i64) V128:$src, VectorIndexD:$idx)>;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 6a0fe9b4619c6..8f1aa8dd3aba2 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -4304,7 +4304,8 @@ multiclass LoadInsertVTPatterns<SDPatternOperator LoadOp, ValueType VT, ValueTyp
Instruction LoadInst, Instruction UnscaledLoadInst,
Instruction ROWLoadInst, Instruction ROXLoadInst,
ROAddrMode ro, ComplexPattern Addr, ComplexPattern UnscaledAddr,
- Operand AddrImm, SubRegIndex SubReg> {
+ Operand AddrImm, SubRegIndex SubReg,
+ SDPatternOperator VecInsFrag> {
// Scaled
def : Pat <(vector_insert (VT immAllZerosV),
(ScalarVT (LoadOp (Addr GPR64sp:$Rn, AddrImm:$offset))), (i64 0)),
@@ -4323,16 +4324,16 @@ multiclass LoadInsertVTPatterns<SDPatternOperator LoadOp, ValueType VT, ValueTyp
(SUBREG_TO_REG (i64 0), (ROXLoadInst GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend), SubReg)>;
// Undef equivalents of the patterns above.
- def : Pat <(VT (vec_ins_or_scal_vec
+ def : Pat <(VT (VecInsFrag
(ScalarVT (LoadOp (Addr GPR64sp:$Rn, AddrImm:$offset))))),
(SUBREG_TO_REG (i64 0), (LoadInst GPR64sp:$Rn, AddrImm:$offset), SubReg)>;
- def : Pat <(VT (vec_ins_or_scal_vec
+ def : Pat <(VT (VecInsFrag
(ScalarVT (LoadOp (UnscaledAddr GPR64sp:$Rn, simm9:$offset))))),
(SUBREG_TO_REG (i64 0), (UnscaledLoadInst GPR64sp:$Rn, simm9:$offset), SubReg)>;
- def : Pat <(VT (vec_ins_or_scal_vec
+ def : Pat <(VT (VecInsFrag
(ScalarVT (LoadOp (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))))),
(SUBREG_TO_REG (i64 0), (ROWLoadInst GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend), SubReg)>;
- def : Pat <(VT (vec_ins_or_scal_vec
+ def : Pat <(VT (VecInsFrag
(ScalarVT (LoadOp (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))))),
(SUBREG_TO_REG (i64 0), (ROXLoadInst GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend), SubReg)>;
}
@@ -4343,11 +4344,11 @@ multiclass LoadInsertPatterns<SDPatternOperator LoadOp, ValueType VT, ValueType
ROAddrMode ro, ComplexPattern Addr, ComplexPattern UnscaledAddr,
Operand AddrImm, SubRegIndex SubReg> {
defm : LoadInsertVTPatterns<LoadOp, VT, ScalarVT, LoadInst, UnscaledLoadInst, ROWLoadInst,
- ROXLoadInst, ro, Addr, UnscaledAddr, AddrImm, SubReg>;
+ ROXLoadInst, ro, Addr, UnscaledAddr, AddrImm, SubReg, vec_ins_or_scal_vec_128>;
defm : LoadInsertVTPatterns<LoadOp, HVT, ScalarVT, LoadInst, UnscaledLoadInst, ROWLoadInst,
- ROXLoadInst, ro, Addr, UnscaledAddr, AddrImm, SubReg>;
+ ROXLoadInst, ro, Addr, UnscaledAddr, AddrImm, SubReg, vec_ins_or_scal_vec_64>;
defm : LoadInsertVTPatterns<LoadOp, SVT, ScalarVT, LoadInst, UnscaledLoadInst, ROWLoadInst,
- ROXLoadInst, ro, Addr, UnscaledAddr, AddrImm, SubReg>;
+ ROXLoadInst, ro, Addr, UnscaledAddr, AddrImm, SubReg, vec_ins_or_scal_vec>;
}
// Accept i8 scalar argument in GlobalISel.
@@ -4381,16 +4382,16 @@ defm : LoadInsertPatterns<load, v2f64, isVoid, nxv2f64, f64,
// Extra patterns for v1f64 scalar_to_vector(load), which need to avoid the
// SUBREG_TO_REG used above.
-def : Pat <(v1i64 (scalar_to_vector (i64
+def : Pat <(v1i64 (scalar_to_vector_any_64 (i64
(load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))),
(LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
-def : Pat <(v1i64 (scalar_to_vector (i64
+def : Pat <(v1i64 (scalar_to_vector_any_64 (i64
(load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))))),
(LDURDi GPR64sp:$Rn, simm9:$offset)>;
-def : Pat <(v1i64 (scalar_to_vector (i64
+def : Pat <(v1i64 (scalar_to_vector_any_64 (i64
(load (ro64.Wpat GPR64sp:$Rn, GPR32:$Rm, ro64.Wext:$extend))))),
(LDRDroW GPR64sp:$Rn, GPR32:$Rm, ro64.Wext:$extend)>;
-def : Pat <(v1i64 (scalar_to_vector (i64
+def : Pat <(v1i64 (scalar_to_vector_any_64 (i64
(load (ro64.Xpat GPR64sp:$Rn, GPR64:$Rm, ro64.Xext:$extend))))),
(LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro64.Xext:$extend)>;
@@ -4398,7 +4399,7 @@ def : Pat <(v1i64 (scalar_to_vector (i64
// Enables direct SIMD register loads for small integer types (i8/i16) that are
// naturally zero-extended to i32/i64.
multiclass ExtLoad8_16AllModes<ValueType OutTy, ValueType InnerTy,
- SDPatternOperator OuterOp,
+ PatFrags OuterOp,
PatFrags LoadOp8, PatFrags LoadOp16> {
// 8-bit loads.
def : Pat<(OutTy (OuterOp (InnerTy (LoadOp8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
@@ -4423,7 +4424,7 @@ multiclass ExtLoad8_16AllModes<ValueType OutTy, ValueType InnerTy,
// Extended multiclass that includes 32-bit loads in addition to 8-bit and 16-bit.
multiclass ExtLoad8_16_32AllModes<ValueType OutTy, ValueType InnerTy,
- SDPatternOperator OuterOp,
+ PatFrags OuterOp,
PatFrags LoadOp8, PatFrags LoadOp16, PatFrags LoadOp32> {
defm : ExtLoad8_16AllModes<OutTy, InnerTy, OuterOp, LoadOp8, LoadOp16>;
@@ -4439,18 +4440,22 @@ multiclass ExtLoad8_16_32AllModes<ValueType OutTy, ValueType InnerTy,
}
// Instantiate bitconvert patterns for floating-point types.
-defm : ExtLoad8_16AllModes<f32, i32, bitconvert, zextloadi8, zextloadi16>;
-defm : ExtLoad8_16_32AllModes<f64, i64, bitconvert, zextloadi8, zextloadi16, zextloadi32>;
+
+// Create a fragment to reuse same multiclass.
+def bitconvert_frag : PatFrags<(ops node:$src),
+ [(bitconvert node:$src)]>;
+defm : ExtLoad8_16AllModes<f32, i32, bitconvert_frag, zextloadi8, zextloadi16>;
+defm : ExtLoad8_16_32AllModes<f64, i64, bitconvert_frag, zextloadi8, zextloadi16, zextloadi32>;
// Instantiate scalar_to_vector patterns for all vector types.
-defm : ExtLoad8_16AllModes<v16i8, i32, scalar_to_vector, zextloadi8, zextloadi16>;
-defm : ExtLoad8_16AllModes<v16i8, i32, scalar_to_vector, extloadi8, extloadi16>;
-defm : ExtLoad8_16AllModes<v8i16, i32, scalar_to_vector, zextloadi8, zextloadi16>;
-defm : ExtLoad8_16AllModes<v8i16, i32, scalar_to_vector, extloadi8, extloadi16>;
-defm : ExtLoad8_16AllModes<v4i32, i32, scalar_to_vector, zextloadi8, zextloadi16>;
-defm : ExtLoad8_16AllModes<v4i32, i32, scalar_to_vector, extloadi8, extloadi16>;
-defm : ExtLoad8_16_32AllModes<v2i64, i64, scalar_to_vector, zextloadi8, zextloadi16, zextloadi32>;
-defm : ExtLoad8_16_32AllModes<v2i64, i64, scalar_to_vector, extloadi8, extloadi16, extloadi32>;
+defm : ExtLoad8_16AllModes<v16i8, i32, scalar_to_vector_any_128, zextloadi8, zextloadi16>;
+defm : ExtLoad8_16AllModes<v16i8, i32, scalar_to_vector_any_128, extloadi8, extloadi16>;
+defm : ExtLoad8_16AllModes<v8i16, i32, scalar_to_vector_any_128, zextloadi8, zextloadi16>;
+defm : ExtLoad8_16AllModes<v8i16, i32, scalar_to_vector_any_128, extloadi8, extloadi16>;
+defm : ExtLoad8_16AllModes<v4i32, i32, scalar_to_vector_any_128, zextloadi8, zextloadi16>;
+defm : ExtLoad8_16AllModes<v4i32, i32, scalar_to_vector_any_128, extloadi8, extloadi16>;
+defm : ExtLoad8_16_32AllModes<v2i64, i64, scalar_to_vector_any_128, zextloadi8, zextloadi16, zextloadi32>;
+defm : ExtLoad8_16_32AllModes<v2i64, i64, scalar_to_vector_any_128, extloadi8, extloadi16, extloadi32>;
// Pre-fetch.
defm PRFUM : PrefetchUnscaled<0b11, 0, 0b10, "prfum",
@@ -6569,38 +6574,6 @@ multiclass FPToIntegerSIMDScalarPatterns<SDPatternOperator OpN, string INST> {
def : Pat<(f64 (bitconvert (i64 (OpN (f64 FPR64:$Rn))))),
(!cast<Instruction>(INST # v1i64) FPR64:$Rn)>;
- foreach ret_type = [v2i32, v4i32] in {
- let Predicates = [HasFPRCVT] in {
- def : Pat<(ret_type (scalar_to_vector (i32 (OpN (f64 FPR64:$Rn))))),
- (INSERT_SUBREG (IMPLICIT_DEF),
- (!cast<Instruction>(INST # SDr) FPR64:$Rn), ssub)>;
- def : Pat<(ret_type (scalar_to_vector (i32 (OpN (f16 FPR16:$Rn))))),
- (INSERT_SUBREG (IMPLICIT_DEF),
- (!cast<Instruction>(INST # SHr) FPR16:$Rn), ssub)>;
- }
- def : Pat<(ret_type (scalar_to_vector (i32 (OpN (f32 FPR32:$Rn))))),
- (INSERT_SUBREG (IMPLICIT_DEF),
- (!cast<Instruction>(INST # v1i32) FPR32:$Rn), ssub)>;
- }
-
- let Predicates = [HasFPRCVT] in {
- def : Pat<(v1i64 (scalar_to_vector (i64 (OpN (f16 FPR16:$Rn))))),
- (!cast<Instruction>(INST # DHr) FPR16:$Rn)>;
- def : Pat<(v1i64 (scalar_to_vector (i64 (OpN (f32 FPR32:$Rn))))),
- (!cast<Instruction>(INST # DSr) FPR32:$Rn)>;
- def : Pat<(v2i64 (scalar_to_vector (i64 (OpN (f16 FPR16:$Rn))))),
- (INSERT_SUBREG (IMPLICIT_DEF),
- (!cast<Instruction>(INST # DHr) FPR16:$Rn), dsub)>;
- def : Pat<(v2i64 (scalar_to_vector (i64 (OpN (f32 FPR32:$Rn))))),
- (INSERT_SUBREG (IMPLICIT_DEF),
- (!cast<Instruction>(INST # DSr) FPR32:$Rn), dsub)>;
- }
-
- def : Pat<(v1i64 (scalar_to_vector (i64 (OpN (f64 FPR64:$Rn))))),
- (!cast<Instruction>(INST # v1i64) FPR64:$Rn)>;
- def : Pat<(v2i64 (scalar_to_vector (i64 (OpN (f64 FPR64:$Rn))))),
- (INSERT_SUBREG (IMPLICIT_DEF),
- (!cast<Instruction>(INST # v1i64) FPR64:$Rn), dsub)>;
}
defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtas, "FCVTAS">;
defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtau, "FCVTAU">;
@@ -6649,39 +6622,6 @@ multiclass FPToIntegerIntPats<Intrinsic round, string INST> {
def : Pat<(f64 (bitconvert (i64 (round f64:$Rn)))),
(!cast<Instruction>(INST # v1i64) $Rn)>;
- foreach ret_type = [v2i32, v4i32] in {
- let Predicates = [HasFPRCVT] in {
- def : Pat<(ret_type (scalar_to_vector (i32 (round f16:$Rn)))),
- (INSERT_SUBREG (IMPLICIT_DEF),
- (!cast<Instruction>(INST # SHr) $Rn), ssub)>;
- def : Pat<(ret_type (scalar_to_vector (i32 (round f64:$Rn)))),
- (INSERT_SUBREG (IMPLICIT_DEF),
- (!cast<Instruction>(INST # SDr) $Rn), ssub)>;
- }
- def : Pat<(ret_type (scalar_to_vector (i32 (round f32:$Rn)))),
- (INSERT_SUBREG (IMPLICIT_DEF),
- (!cast<Instruction>(INST # v1i32) $Rn), ssub)>;
- }
-
- let Predicates = [HasFPRCVT] in {
- def : Pat<(v1i64 (scalar_to_vector (i64 (round f16:$Rn)))),
- (!cast<Instruction>(INST # DHr) $Rn)>;
- def : Pat<(v1i64 (scalar_to_vector (i64 (round f32:$Rn)))),
- (!cast<Instruction>(INST # DSr) $Rn)>;
- def : Pat<(v2i64 (scalar_to_vector (i64 (round f16:$Rn)))),
- (INSERT_SUBREG (IMPLICIT_DEF),
- (!cast<Instruction>(INST # DHr) $Rn), dsub)>;
- def : Pat<(v2i64 (scalar_to_vector (i64 (round f32:$Rn)))),
- (INSERT_SUBREG (IMPLICIT_DEF),
- (!cast<Instruction>(INST # DSr) $Rn), dsub)>;
- }
-
- def : Pat<(v1i64 (scalar_to_vector (i64 (round f64:$Rn)))),
- (!cast<Instruction>(INST # v1i64) $Rn)>;
- def : Pat<(v2i64 (scalar_to_vector (i64 (round f64:$Rn)))),
- (INSERT_SUBREG (IMPLICIT_DEF),
- (!cast<Instruction>(INST # v1i64) $Rn), dsub)>;
-
let Predicates = [HasFullFP16] in {
def : Pat<(i32 (round (fmul f16:$Rn, fixedpoint_f16_i32:$scale))),
(!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
@@ -8105,7 +8045,7 @@ multiclass Neon_INS_elt_ext_pattern<ValueType VT128, ValueType VT64, ValueType O
(INS (INSERT_SUBREG (VT128 (IMPLICIT_DEF)), V64:$src, dsub), (VecIndexMult imm:$Immd),
(INSERT_SUBREG (VT128 (IMPLICIT_DEF)), V64:$Rn, dsub), imm:$Immn),
dsub)>;
- def : Pat<(OutVT (scalar_to_vector (i32 (vector_extract (VT64 V64:$Rn), (i64 imm:$Immn))))),
+ def : Pat<(OutVT (scalar_to_vector_any_64 (i32 (vector_extract (VT64 V64:$Rn), (i64 imm:$Immn))))),
(EXTRACT_SUBREG
(VT128 (SUBREG_TO_REG
(i64 0),
@@ -8121,7 +8061,7 @@ multiclass Neon_INS_elt_ext_pattern<ValueType VT128, ValueType VT64, ValueType O
(INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), (VecIndexMult imm:$Immd),
V128:$Rn, imm:$Immn),
dsub)>;
- def : Pat<(OutVT (scalar_to_vector (i32 (vector_extract (VT128 V128:$Rn), (i64 imm:$Immn))))),
+ def : Pat<(OutVT (scalar_to_vector_any_64 (i32 (vector_extract (VT128 V128:$Rn), (i64 imm:$Immn))))),
(EXTRACT_SUBREG
(VT128 (SUBREG_TO_REG
(i64 0),
@@ -9667,7 +9607,7 @@ def : Ld1Lane64IdxOpPat<extloadi8, VectorIndexH, v4i16, i32, LD1i8, VectorIndexH
let Predicates = [HasNEON] in {
class Ld1Lane128FirstElm<ValueType ResultTy, ValueType VecTy,
SDPatternOperator ExtLoad, Instruction LD1>
- : Pat<(ResultTy (vec_ins_or_scal_vec (i32 (ExtLoad GPR64sp:$Rn)))),
+ : Pat<(ResultTy (vec_ins_or_scal_vec_64 (i32 (ExtLoad GPR64sp:$Rn)))),
(ResultTy (EXTRACT_SUBREG
(LD1 (VecTy (IMPLICIT_DEF)), 0, GPR64sp:$Rn), dsub))>;
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index ffb24dfbcd527..c7460b082301f 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -3607,7 +3607,7 @@ let Predicates = [HasSVE_or_SME] in {
def : Pat<(v1f64 (scalar_to_vector
(f64 (vector_extract nxv2f64:$vec, VectorIndexD:$index)))),
(DUPi64 (EXTRACT_SUBREG nxv2f64:$vec, zsub), VectorIndexD:$index)>;
- def : Pat<(v1i64 (scalar_to_vector
+ def : Pat<(v1i64 (scalar_to_vector_any_64
(i64 (vector_extract nxv2i64:$vec, VectorIndexD:$index)))),
(DUPi64 (EXTRACT_SUBREG nxv2i64:$vec, zsub), VectorIndexD:$index)>;
} // End HasNEON
diff --git a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
index 99c540366fb12..d68186aee8a9e 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
@@ -1520,10 +1520,10 @@ for.end12: ; preds = %vector.body
define void @matrix_mul_signed_and(i32 %N, ptr nocapture %C, ptr nocapture readonly %A, i32 %val) {
; CHECK-SD-LABEL: matrix_mul_signed_and:
; CHECK-SD: // %bb.0: // %vector.header
-; CHECK-SD-NEXT: and w9, w3, #0xffff
+; CHECK-SD-NEXT: and w8, w3, #0xffff
; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-SD-NEXT: fmov s0, w8
; CHECK-SD-NEXT: and x8, x0, #0xfffffff8
-; CHECK-SD-NEXT: fmov s0, w9
; CHECK-SD-NEXT: .LBB13_1: // %vector.body
; CHECK-SD-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-SD-NEXT: add x9, x2, w0, uxtw #1
@@ -1608,10 +1608,10 @@ for.end12: ; preds = %vector.body
define void @matrix_mul_signed_and_double(i32 %N, ptr nocapture %C, ptr nocapture readonly %A, i32 %val) {
; CHECK-SD-LABEL: matrix_mul_signed_and_double:
; CHECK-SD: // %bb.0: // %vector.header
-; CHECK-SD-NEXT: and w9, w3, #0xffff
+; CHECK-SD-NEXT: and w8, w3, #0xffff
; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-SD-NEXT: fmov s0, w8
; CHECK-SD-NEXT: and x8, x0, #0xfffffff0
-; CHECK-SD-NEXT: fmov s0, w9
; CHECK-SD-NEXT: .LBB14_1: // %vector.body
; CHECK-SD-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-SD-NEXT: add x9, x2, w0, uxtw #1
diff --git a/llvm/test/CodeGen/AArch64/aarch64-pmull2.ll b/llvm/test/CodeGen/AArch64/aarch64-pmull2.ll
index 9d7aa78ec139f..b5cee616ee9dc 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-pmull2.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-pmull2.ll
@@ -9,9 +9,9 @@ define void @test1(ptr %0, ptr %1) {
; CHECK-LABEL: test1:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #56824 // =0xddf8
-; CHECK-NEXT: mov w9, #61186 // =0xef02
+; CHECK-NEXT: mov x9, #61186 // =0xef02
; CHECK-NEXT: movk w8, #40522, lsl #16
-; CHECK-NEXT: movk w9, #29710, lsl #16
+; CHECK-NEXT: movk x9, #29710, lsl #16
; CHECK-NEXT: ldp q0, q1, [x1]
; CHECK-NEXT: dup v2.2d, x8
; CHECK-NEXT: fmov d3, x9
diff --git a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll
index 52c35ce872b61..1c93b7e67e8a2 100644
--- a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll
@@ -1734,383 +1734,3 @@ define double @fcvtzu_dd_simd(double %a) {
%bc = bitcast i64 %i to double
ret double %bc
}
-
-;
-; FPTOI scalar_to_vector
-;
-
-define <2 x i32> @fcvtzs_v2i32_from_f16_scalar_to_vector_simd(half %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzs_v2i32_from_f16_scalar_to_vector_simd:
-; CHECK-NOFPRCVT: // %bb.0:
-; CHECK-NOFPRCVT-NEXT: fcvtzs w8, h0
-; CHECK-NOFPRCVT-NEXT: fmov s0, w8
-; CHECK-NOFPRCVT-NEXT: ret
-;
-; CHECK-LABEL: fcvtzs_v2i32_from_f16_scalar_to_vector_simd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzs s0, h0
-; CHECK-NEXT: ret
- %fcvtzs_scalar = fptosi half %a to i32
- %fcvtzs_vector = insertelement <2 x i32> poison, i32 %fcvtzs_scalar, i32 0
- ret <2 x i32> %fcvtzs_vector
-}
-
-define <2 x i32> @fcvtzs_v2i32_from_f32_scalar_to_vector_simd(float %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzs_v2i32_from_f32_scalar_to_vector_simd:
-; CHECK-NOFPRCVT: // %bb.0:
-; CHECK-NOFPRCVT-NEXT: fcvtzs s0, s0
-; CHECK-NOFPRCVT-NEXT: ret
-;
-; CHECK-LABEL: fcvtzs_v2i32_from_f32_scalar_to_vector_simd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzs s0, s0
-; CHECK-NEXT: ret
- %fcvtzs_scalar = fptosi float %a to i32
- %fcvtzs_vector = insertelement <2 x i32> poison, i32 %fcvtzs_scalar, i32 0
- ret <2 x i32> %fcvtzs_vector
-}
-
-define <2 x i32> @fcvtzs_v2i32_from_f64_scalar_to_vector_simd(double %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzs_v2i32_from_f64_scalar_to_vector_simd:
-; CHECK-NOFPRCVT: // %bb.0:
-; CHECK-NOFPRCVT-NEXT: fcvtzs w8, d0
-; CHECK-NOFPRCVT-NEXT: fmov s0, w8
-; CHECK-NOFPRCVT-NEXT: ret
-;
-; CHECK-LABEL: fcvtzs_v2i32_from_f64_scalar_to_vector_simd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzs s0, d0
-; CHECK-NEXT: ret
- %fcvtzs_scalar = fptosi double %a to i32
- %fcvtzs_vector = insertelement <2 x i32> poison, i32 %fcvtzs_scalar, i32 0
- ret <2 x i32> %fcvtzs_vector
-}
-
-define <4 x i32> @fcvtzs_v4i32_from_f16_scalar_to_vector_simd(half %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzs_v4i32_from_f16_scalar_to_vector_simd:
-; CHECK-NOFPRCVT: // %bb.0:
-; CHECK-NOFPRCVT-NEXT: fcvtzs w8, h0
-; CHECK-NOFPRCVT-NEXT: fmov s0, w8
-; CHECK-NOFPRCVT-NEXT: ret
-;
-; CHECK-LABEL: fcvtzs_v4i32_from_f16_scalar_to_vector_simd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzs s0, h0
-; CHECK-NEXT: ret
- %fcvtzs_scalar = fptosi half %a to i32
- %fcvtzs_vector = insertelement <4 x i32> poison, i32 %fcvtzs_scalar, i32 0
- ret <4 x i32> %fcvtzs_vector
-}
-
-define <4 x i32> @fcvtzs_v4i32_from_f32_scalar_to_vector_simd(float %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzs_v4i32_from_f32_scalar_to_vector_simd:
-; CHECK-NOFPRCVT: // %bb.0:
-; CHECK-NOFPRCVT-NEXT: fcvtzs s0, s0
-; CHECK-NOFPRCVT-NEXT: ret
-;
-; CHECK-LABEL: fcvtzs_v4i32_from_f32_scalar_to_vector_simd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzs s0, s0
-; CHECK-NEXT: ret
- %fcvtzs_scalar = fptosi float %a to i32
- %fcvtzs_vector = insertelement <4 x i32> poison, i32 %fcvtzs_scalar, i32 0
- ret <4 x i32> %fcvtzs_vector
-}
-
-define <4 x i32> @fcvtzs_v4i32_from_f64_scalar_to_vector_simd(double %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzs_v4i32_from_f64_scalar_to_vector_simd:
-; CHECK-NOFPRCVT: // %bb.0:
-; CHECK-NOFPRCVT-NEXT: fcvtzs w8, d0
-; CHECK-NOFPRCVT-NEXT: fmov s0, w8
-; CHECK-NOFPRCVT-NEXT: ret
-;
-; CHECK-LABEL: fcvtzs_v4i32_from_f64_scalar_to_vector_simd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzs s0, d0
-; CHECK-NEXT: ret
- %fcvtzs_scalar = fptosi double %a to i32
- %fcvtzs_vector = insertelement <4 x i32> poison, i32 %fcvtzs_scalar, i32 0
- ret <4 x i32> %fcvtzs_vector
-}
-
-define <1 x i64> @fcvtzs_v1i64_from_f16_scalar_to_vector_simd(half %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzs_v1i64_from_f16_scalar_to_vector_simd:
-; CHECK-NOFPRCVT: // %bb.0:
-; CHECK-NOFPRCVT-NEXT: fcvtzs x8, h0
-; CHECK-NOFPRCVT-NEXT: fmov d0, x8
-; CHECK-NOFPRCVT-NEXT: ret
-;
-; CHECK-LABEL: fcvtzs_v1i64_from_f16_scalar_to_vector_simd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzs d0, h0
-; CHECK-NEXT: ret
- %fcvtzs_scalar = fptosi half %a to i64
- %fcvtzs_vector = insertelement <1 x i64> poison, i64 %fcvtzs_scalar, i32 0
- ret <1 x i64> %fcvtzs_vector
-}
-
-define <1 x i64> @fcvtzs_v1i64_from_f32_scalar_to_vector_simd(float %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzs_v1i64_from_f32_scalar_to_vector_simd:
-; CHECK-NOFPRCVT: // %bb.0:
-; CHECK-NOFPRCVT-NEXT: fcvtzs x8, s0
-; CHECK-NOFPRCVT-NEXT: fmov d0, x8
-; CHECK-NOFPRCVT-NEXT: ret
-;
-; CHECK-LABEL: fcvtzs_v1i64_from_f32_scalar_to_vector_simd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzs d0, s0
-; CHECK-NEXT: ret
- %fcvtzs_scalar = fptosi float %a to i64
- %fcvtzs_vector = insertelement <1 x i64> poison, i64 %fcvtzs_scalar, i32 0
- ret <1 x i64> %fcvtzs_vector
-}
-
-define <1 x i64> @fcvtzs_v1i64_from_f64_scalar_to_vector_simd(double %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzs_v1i64_from_f64_scalar_to_vector_simd:
-; CHECK-NOFPRCVT: // %bb.0:
-; CHECK-NOFPRCVT-NEXT: fcvtzs d0, d0
-; CHECK-NOFPRCVT-NEXT: ret
-;
-; CHECK-LABEL: fcvtzs_v1i64_from_f64_scalar_to_vector_simd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzs d0, d0
-; CHECK-NEXT: ret
- %fcvtzs_scalar = fptosi double %a to i64
- %fcvtzs_vector = insertelement <1 x i64> poison, i64 %fcvtzs_scalar, i32 0
- ret <1 x i64> %fcvtzs_vector
-}
-
-define <2 x i64> @fcvtzs_v2i64_from_f16_scalar_to_vector_simd(half %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzs_v2i64_from_f16_scalar_to_vector_simd:
-; CHECK-NOFPRCVT: // %bb.0:
-; CHECK-NOFPRCVT-NEXT: fcvtzs x8, h0
-; CHECK-NOFPRCVT-NEXT: fmov d0, x8
-; CHECK-NOFPRCVT-NEXT: ret
-;
-; CHECK-LABEL: fcvtzs_v2i64_from_f16_scalar_to_vector_simd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzs d0, h0
-; CHECK-NEXT: ret
- %fcvtzs_scalar = fptosi half %a to i64
- %fcvtzs_vector = insertelement <2 x i64> poison, i64 %fcvtzs_scalar, i32 0
- ret <2 x i64> %fcvtzs_vector
-}
-
-define <2 x i64> @fcvtzs_v2i64_from_f32_scalar_to_vector_simd(float %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzs_v2i64_from_f32_scalar_to_vector_simd:
-; CHECK-NOFPRCVT: // %bb.0:
-; CHECK-NOFPRCVT-NEXT: fcvtzs x8, s0
-; CHECK-NOFPRCVT-NEXT: fmov d0, x8
-; CHECK-NOFPRCVT-NEXT: ret
-;
-; CHECK-LABEL: fcvtzs_v2i64_from_f32_scalar_to_vector_simd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzs d0, s0
-; CHECK-NEXT: ret
- %fcvtzs_scalar = fptosi float %a to i64
- %fcvtzs_vector = insertelement <2 x i64> poison, i64 %fcvtzs_scalar, i32 0
- ret <2 x i64> %fcvtzs_vector
-}
-
-define <2 x i64> @fcvtzs_v2i64_from_f64_scalar_to_vector_simd(double %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzs_v2i64_from_f64_scalar_to_vector_simd:
-; CHECK-NOFPRCVT: // %bb.0:
-; CHECK-NOFPRCVT-NEXT: fcvtzs d0, d0
-; CHECK-NOFPRCVT-NEXT: ret
-;
-; CHECK-LABEL: fcvtzs_v2i64_from_f64_scalar_to_vector_simd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzs d0, d0
-; CHECK-NEXT: ret
- %fcvtzs_scalar = fptosi double %a to i64
- %fcvtzs_vector = insertelement <2 x i64> poison, i64 %fcvtzs_scalar, i32 0
- ret <2 x i64> %fcvtzs_vector
-}
-
-define <2 x i32> @fcvtzu_v2i32_from_f16_scalar_to_vector_simd(half %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzu_v2i32_from_f16_scalar_to_vector_simd:
-; CHECK-NOFPRCVT: // %bb.0:
-; CHECK-NOFPRCVT-NEXT: fcvtzu w8, h0
-; CHECK-NOFPRCVT-NEXT: fmov s0, w8
-; CHECK-NOFPRCVT-NEXT: ret
-;
-; CHECK-LABEL: fcvtzu_v2i32_from_f16_scalar_to_vector_simd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzu s0, h0
-; CHECK-NEXT: ret
- %fcvtzu_scalar = fptoui half %a to i32
- %fcvtzu_vector = insertelement <2 x i32> poison, i32 %fcvtzu_scalar, i32 0
- ret <2 x i32> %fcvtzu_vector
-}
-
-define <2 x i32> @fcvtzu_v2i32_from_f32_scalar_to_vector_simd(float %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzu_v2i32_from_f32_scalar_to_vector_simd:
-; CHECK-NOFPRCVT: // %bb.0:
-; CHECK-NOFPRCVT-NEXT: fcvtzu s0, s0
-; CHECK-NOFPRCVT-NEXT: ret
-;
-; CHECK-LABEL: fcvtzu_v2i32_from_f32_scalar_to_vector_simd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzu s0, s0
-; CHECK-NEXT: ret
- %fcvtzu_scalar = fptoui float %a to i32
- %fcvtzu_vector = insertelement <2 x i32> poison, i32 %fcvtzu_scalar, i32 0
- ret <2 x i32> %fcvtzu_vector
-}
-
-define <2 x i32> @fcvtzu_v2i32_from_f64_scalar_to_vector_simd(double %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzu_v2i32_from_f64_scalar_to_vector_simd:
-; CHECK-NOFPRCVT: // %bb.0:
-; CHECK-NOFPRCVT-NEXT: fcvtzu w8, d0
-; CHECK-NOFPRCVT-NEXT: fmov s0, w8
-; CHECK-NOFPRCVT-NEXT: ret
-;
-; CHECK-LABEL: fcvtzu_v2i32_from_f64_scalar_to_vector_simd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzu s0, d0
-; CHECK-NEXT: ret
- %fcvtzu_scalar = fptoui double %a to i32
- %fcvtzu_vector = insertelement <2 x i32> poison, i32 %fcvtzu_scalar, i32 0
- ret <2 x i32> %fcvtzu_vector
-}
-
-define <4 x i32> @fcvtzu_v4i32_from_f16_scalar_to_vector_simd(half %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzu_v4i32_from_f16_scalar_to_vector_simd:
-; CHECK-NOFPRCVT: // %bb.0:
-; CHECK-NOFPRCVT-NEXT: fcvtzu w8, h0
-; CHECK-NOFPRCVT-NEXT: fmov s0, w8
-; CHECK-NOFPRCVT-NEXT: ret
-;
-; CHECK-LABEL: fcvtzu_v4i32_from_f16_scalar_to_vector_simd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzu s0, h0
-; CHECK-NEXT: ret
- %fcvtzu_scalar = fptoui half %a to i32
- %fcvtzu_vector = insertelement <4 x i32> poison, i32 %fcvtzu_scalar, i32 0
- ret <4 x i32> %fcvtzu_vector
-}
-
-define <4 x i32> @fcvtzu_v4i32_from_f32_scalar_to_vector_simd(float %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzu_v4i32_from_f32_scalar_to_vector_simd:
-; CHECK-NOFPRCVT: // %bb.0:
-; CHECK-NOFPRCVT-NEXT: fcvtzu s0, s0
-; CHECK-NOFPRCVT-NEXT: ret
-;
-; CHECK-LABEL: fcvtzu_v4i32_from_f32_scalar_to_vector_simd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzu s0, s0
-; CHECK-NEXT: ret
- %fcvtzu_scalar = fptoui float %a to i32
- %fcvtzu_vector = insertelement <4 x i32> poison, i32 %fcvtzu_scalar, i32 0
- ret <4 x i32> %fcvtzu_vector
-}
-
-define <4 x i32> @fcvtzu_v4i32_from_f64_scalar_to_vector_simd(double %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzu_v4i32_from_f64_scalar_to_vector_simd:
-; CHECK-NOFPRCVT: // %bb.0:
-; CHECK-NOFPRCVT-NEXT: fcvtzu w8, d0
-; CHECK-NOFPRCVT-NEXT: fmov s0, w8
-; CHECK-NOFPRCVT-NEXT: ret
-;
-; CHECK-LABEL: fcvtzu_v4i32_from_f64_scalar_to_vector_simd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzu s0, d0
-; CHECK-NEXT: ret
- %fcvtzu_scalar = fptoui double %a to i32
- %fcvtzu_vector = insertelement <4 x i32> poison, i32 %fcvtzu_scalar, i32 0
- ret <4 x i32> %fcvtzu_vector
-}
-
-define <1 x i64> @fcvtzu_v1i64_from_f16_scalar_to_vector_simd(half %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzu_v1i64_from_f16_scalar_to_vector_simd:
-; CHECK-NOFPRCVT: // %bb.0:
-; CHECK-NOFPRCVT-NEXT: fcvtzu x8, h0
-; CHECK-NOFPRCVT-NEXT: fmov d0, x8
-; CHECK-NOFPRCVT-NEXT: ret
-;
-; CHECK-LABEL: fcvtzu_v1i64_from_f16_scalar_to_vector_simd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzu d0, h0
-; CHECK-NEXT: ret
- %fcvtzu_scalar = fptoui half %a to i64
- %fcvtzu_vector = insertelement <1 x i64> poison, i64 %fcvtzu_scalar, i32 0
- ret <1 x i64> %fcvtzu_vector
-}
-
-define <1 x i64> @fcvtzu_v1i64_from_f32_scalar_to_vector_simd(float %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzu_v1i64_from_f32_scalar_to_vector_simd:
-; CHECK-NOFPRCVT: // %bb.0:
-; CHECK-NOFPRCVT-NEXT: fcvtzu x8, s0
-; CHECK-NOFPRCVT-NEXT: fmov d0, x8
-; CHECK-NOFPRCVT-NEXT: ret
-;
-; CHECK-LABEL: fcvtzu_v1i64_from_f32_scalar_to_vector_simd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzu d0, s0
-; CHECK-NEXT: ret
- %fcvtzu_scalar = fptoui float %a to i64
- %fcvtzu_vector = insertelement <1 x i64> poison, i64 %fcvtzu_scalar, i32 0
- ret <1 x i64> %fcvtzu_vector
-}
-
-define <1 x i64> @fcvtzu_v1i64_from_f64_scalar_to_vector_simd(double %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzu_v1i64_from_f64_scalar_to_vector_simd:
-; CHECK-NOFPRCVT: // %bb.0:
-; CHECK-NOFPRCVT-NEXT: fcvtzu d0, d0
-; CHECK-NOFPRCVT-NEXT: ret
-;
-; CHECK-LABEL: fcvtzu_v1i64_from_f64_scalar_to_vector_simd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzu d0, d0
-; CHECK-NEXT: ret
- %fcvtzu_scalar = fptoui double %a to i64
- %fcvtzu_vector = insertelement <1 x i64> poison, i64 %fcvtzu_scalar, i32 0
- ret <1 x i64> %fcvtzu_vector
-}
-
-define <2 x i64> @fcvtzu_v2i64_from_f16_scalar_to_vector_simd(half %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzu_v2i64_from_f16_scalar_to_vector_simd:
-; CHECK-NOFPRCVT: // %bb.0:
-; CHECK-NOFPRCVT-NEXT: fcvtzu x8, h0
-; CHECK-NOFPRCVT-NEXT: fmov d0, x8
-; CHECK-NOFPRCVT-NEXT: ret
-;
-; CHECK-LABEL: fcvtzu_v2i64_from_f16_scalar_to_vector_simd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzu d0, h0
-; CHECK-NEXT: ret
- %fcvtzu_scalar = fptoui half %a to i64
- %fcvtzu_vector = insertelement <2 x i64> poison, i64 %fcvtzu_scalar, i32 0
- ret <2 x i64> %fcvtzu_vector
-}
-
-define <2 x i64> @fcvtzu_v2i64_from_f32_scalar_to_vector_simd(float %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzu_v2i64_from_f32_scalar_to_vector_simd:
-; CHECK-NOFPRCVT: // %bb.0:
-; CHECK-NOFPRCVT-NEXT: fcvtzu x8, s0
-; CHECK-NOFPRCVT-NEXT: fmov d0, x8
-; CHECK-NOFPRCVT-NEXT: ret
-;
-; CHECK-LABEL: fcvtzu_v2i64_from_f32_scalar_to_vector_simd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzu d0, s0
-; CHECK-NEXT: ret
- %fcvtzu_scalar = fptoui float %a to i64
- %fcvtzu_vector = insertelement <2 x i64> poison, i64 %fcvtzu_scalar, i32 0
- ret <2 x i64> %fcvtzu_vector
-}
-
-define <2 x i64> @fcvtzu_v2i64_from_f64_scalar_to_vector_simd(double %a) {
-; CHECK-NOFPRCVT-LABEL: fcvtzu_v2i64_from_f64_scalar_to_vector_simd:
-; CHECK-NOFPRCVT: // %bb.0:
-; CHECK-NOFPRCVT-NEXT: fcvtzu d0, d0
-; CHECK-NOFPRCVT-NEXT: ret
-;
-; CHECK-LABEL: fcvtzu_v2i64_from_f64_scalar_to_vector_simd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzu d0, d0
-; CHECK-NEXT: ret
- %fcvtzu_scalar = fptoui double %a to i64
- %fcvtzu_vector = insertelement <2 x i64> poison, i64 %fcvtzu_scalar, i32 0
- ret <2 x i64> %fcvtzu_vector
-}
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-select_cc.ll b/llvm/test/CodeGen/AArch64/arm64-neon-select_cc.ll
index cad3fb58086d6..b72fbe0a91684 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-select_cc.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-select_cc.ll
@@ -114,9 +114,9 @@ define <8x i16> @test_select_cc_v8i16(i16 %a, i16 %b, <8x i16> %c, <8x i16> %d )
define <2x i32> @test_select_cc_v2i32(i32 %a, i32 %b, <2x i32> %c, <2x i32> %d ) {
; CHECK-LABEL: test_select_cc_v2i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: fmov s2, w1
-; CHECK-NEXT: fmov s3, w0
-; CHECK-NEXT: cmeq v2.2s, v3.2s, v2.2s
+; CHECK-NEXT: fmov s2, w0
+; CHECK-NEXT: fmov s3, w1
+; CHECK-NEXT: cmeq v2.2s, v2.2s, v3.2s
; CHECK-NEXT: dup v2.2s, v2.s[0]
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
; CHECK-NEXT: ret
@@ -128,9 +128,9 @@ define <2x i32> @test_select_cc_v2i32(i32 %a, i32 %b, <2x i32> %c, <2x i32> %d )
define <4x i32> @test_select_cc_v4i32(i32 %a, i32 %b, <4x i32> %c, <4x i32> %d ) {
; CHECK-LABEL: test_select_cc_v4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: fmov s2, w1
-; CHECK-NEXT: fmov s3, w0
-; CHECK-NEXT: cmeq v2.4s, v3.4s, v2.4s
+; CHECK-NEXT: fmov s2, w0
+; CHECK-NEXT: fmov s3, w1
+; CHECK-NEXT: cmeq v2.4s, v2.4s, v3.4s
; CHECK-NEXT: dup v2.4s, v2.s[0]
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-NEXT: ret
@@ -155,9 +155,9 @@ define <1x i64> @test_select_cc_v1i64(i64 %a, i64 %b, <1x i64> %c, <1x i64> %d )
define <2x i64> @test_select_cc_v2i64(i64 %a, i64 %b, <2x i64> %c, <2x i64> %d ) {
; CHECK-LABEL: test_select_cc_v2i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: fmov d2, x1
-; CHECK-NEXT: fmov d3, x0
-; CHECK-NEXT: cmeq v2.2d, v3.2d, v2.2d
+; CHECK-NEXT: fmov d2, x0
+; CHECK-NEXT: fmov d3, x1
+; CHECK-NEXT: cmeq v2.2d, v2.2d, v3.2d
; CHECK-NEXT: dup v2.2d, v2.d[0]
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-NEXT: ret
@@ -210,9 +210,9 @@ define <4x float> @test_select_cc_v4f32(float %a, float %b, <4x float> %c, <4x f
define <4x float> @test_select_cc_v4f32_icmp(i32 %a, i32 %b, <4x float> %c, <4x float> %d ) {
; CHECK-LABEL: test_select_cc_v4f32_icmp:
; CHECK: // %bb.0:
-; CHECK-NEXT: fmov s2, w1
-; CHECK-NEXT: fmov s3, w0
-; CHECK-NEXT: cmeq v2.4s, v3.4s, v2.4s
+; CHECK-NEXT: fmov s2, w0
+; CHECK-NEXT: fmov s3, w1
+; CHECK-NEXT: cmeq v2.4s, v2.4s, v3.4s
; CHECK-NEXT: dup v2.4s, v2.s[0]
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll b/llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll
index cb14adc00df00..2b567d8f148f7 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll
@@ -405,15 +405,25 @@ define i16 @test_sqrdmlah_v1i16(i16 %acc, i16 %x, i16 %y) {
}
define i32 @test_sqrdmlah_v1i32(i32 %acc, i32 %x, i32 %y) {
-; CHECK-LABEL: test_sqrdmlah_v1i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fmov s0, w1
-; CHECK-NEXT: fmov s1, w2
-; CHECK-NEXT: sqrdmulh v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: fmov s1, w0
-; CHECK-NEXT: sqadd v0.4s, v1.4s, v0.4s
-; CHECK-NEXT: fmov w0, s0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_sqrdmlah_v1i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fmov s0, w2
+; CHECK-SD-NEXT: fmov s1, w1
+; CHECK-SD-NEXT: sqrdmulh v0.4s, v1.4s, v0.4s
+; CHECK-SD-NEXT: fmov s1, w0
+; CHECK-SD-NEXT: sqadd v0.4s, v1.4s, v0.4s
+; CHECK-SD-NEXT: fmov w0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_sqrdmlah_v1i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fmov s0, w1
+; CHECK-GI-NEXT: fmov s1, w2
+; CHECK-GI-NEXT: sqrdmulh v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT: fmov s1, w0
+; CHECK-GI-NEXT: sqadd v0.4s, v1.4s, v0.4s
+; CHECK-GI-NEXT: fmov w0, s0
+; CHECK-GI-NEXT: ret
%x_vec = insertelement <4 x i32> undef, i32 %x, i64 0
%y_vec = insertelement <4 x i32> undef, i32 %y, i64 0
%prod_vec = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %x_vec, <4 x i32> %y_vec)
@@ -444,15 +454,25 @@ define i16 @test_sqrdmlsh_v1i16(i16 %acc, i16 %x, i16 %y) {
}
define i32 @test_sqrdmlsh_v1i32(i32 %acc, i32 %x, i32 %y) {
-; CHECK-LABEL: test_sqrdmlsh_v1i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fmov s0, w1
-; CHECK-NEXT: fmov s1, w2
-; CHECK-NEXT: sqrdmulh v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: fmov s1, w0
-; CHECK-NEXT: sqsub v0.4s, v1.4s, v0.4s
-; CHECK-NEXT: fmov w0, s0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_sqrdmlsh_v1i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fmov s0, w2
+; CHECK-SD-NEXT: fmov s1, w1
+; CHECK-SD-NEXT: sqrdmulh v0.4s, v1.4s, v0.4s
+; CHECK-SD-NEXT: fmov s1, w0
+; CHECK-SD-NEXT: sqsub v0.4s, v1.4s, v0.4s
+; CHECK-SD-NEXT: fmov w0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_sqrdmlsh_v1i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fmov s0, w1
+; CHECK-GI-NEXT: fmov s1, w2
+; CHECK-GI-NEXT: sqrdmulh v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT: fmov s1, w0
+; CHECK-GI-NEXT: sqsub v0.4s, v1.4s, v0.4s
+; CHECK-GI-NEXT: fmov w0, s0
+; CHECK-GI-NEXT: ret
%x_vec = insertelement <4 x i32> undef, i32 %x, i64 0
%y_vec = insertelement <4 x i32> undef, i32 %y, i64 0
%prod_vec = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %x_vec, <4 x i32> %y_vec)
diff --git a/llvm/test/CodeGen/AArch64/arm64-vshift.ll b/llvm/test/CodeGen/AArch64/arm64-vshift.ll
index 9743639d99d9b..360183b5006b6 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vshift.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vshift.ll
@@ -2555,8 +2555,7 @@ define <1 x i64> @neon_ushl_vscalar_constant_shift(ptr %A) nounwind {
define i64 @neon_ushl_scalar_constant_shift(ptr %A) nounwind {
; CHECK-SD-LABEL: neon_ushl_scalar_constant_shift:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: ldr w8, [x0]
-; CHECK-SD-NEXT: fmov d0, x8
+; CHECK-SD-NEXT: ldr s0, [x0]
; CHECK-SD-NEXT: shl d0, d0, #1
; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: ret
@@ -2848,8 +2847,7 @@ define <1 x i64> @neon_sshll_vscalar_constant_shift(ptr %A) nounwind {
define i64 @neon_sshll_scalar_constant_shift(ptr %A) nounwind {
; CHECK-SD-LABEL: neon_sshll_scalar_constant_shift:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: ldr w8, [x0]
-; CHECK-SD-NEXT: fmov d0, x8
+; CHECK-SD-NEXT: ldr s0, [x0]
; CHECK-SD-NEXT: shl d0, d0, #1
; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: ret
@@ -2872,8 +2870,7 @@ define i64 @neon_sshll_scalar_constant_shift(ptr %A) nounwind {
define i64 @neon_sshll_scalar_constant_shift_m1(ptr %A) nounwind {
; CHECK-SD-LABEL: neon_sshll_scalar_constant_shift_m1:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: ldr w8, [x0]
-; CHECK-SD-NEXT: fmov d0, x8
+; CHECK-SD-NEXT: ldr s0, [x0]
; CHECK-SD-NEXT: sshr d0, d0, #1
; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/avoid-pre-trunc.ll b/llvm/test/CodeGen/AArch64/avoid-pre-trunc.ll
index d7a2a83cf3660..bd9162e36f299 100644
--- a/llvm/test/CodeGen/AArch64/avoid-pre-trunc.ll
+++ b/llvm/test/CodeGen/AArch64/avoid-pre-trunc.ll
@@ -107,20 +107,20 @@ define <4 x i32> @lower_trunc_4xi32(i64 %a, i64 %b, i64 %c, i64 %d) {
define <8 x i32> @lower_trunc_8xi32(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g, i64 %h) {
; CHECK-LABEL: lower_trunc_8xi32:
; CHECK: // %bb.0:
-; CHECK-NEXT: fmov d0, x6
-; CHECK-NEXT: fmov d1, x4
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: fmov d1, x6
; CHECK-NEXT: fmov d2, x2
-; CHECK-NEXT: fmov d3, x0
-; CHECK-NEXT: mov v0.d[1], x7
-; CHECK-NEXT: mov v1.d[1], x5
+; CHECK-NEXT: fmov d3, x4
+; CHECK-NEXT: mov v1.d[1], x7
; CHECK-NEXT: mov v2.d[1], x3
-; CHECK-NEXT: mov v3.d[1], x1
-; CHECK-NEXT: uzp1 v1.4s, v1.4s, v0.4s
-; CHECK-NEXT: uzp1 v2.4s, v3.4s, v2.4s
-; CHECK-NEXT: add v3.4s, v1.4s, v1.4s
-; CHECK-NEXT: add v0.4s, v2.4s, v2.4s
-; CHECK-NEXT: eor v1.16b, v1.16b, v3.16b
-; CHECK-NEXT: eor v0.16b, v2.16b, v0.16b
+; CHECK-NEXT: mov v0.d[1], x1
+; CHECK-NEXT: mov v3.d[1], x5
+; CHECK-NEXT: uzp1 v0.4s, v0.4s, v2.4s
+; CHECK-NEXT: uzp1 v1.4s, v3.4s, v1.4s
+; CHECK-NEXT: add v3.4s, v0.4s, v0.4s
+; CHECK-NEXT: add v2.4s, v1.4s, v1.4s
+; CHECK-NEXT: eor v0.16b, v0.16b, v3.16b
+; CHECK-NEXT: eor v1.16b, v1.16b, v2.16b
; CHECK-NEXT: ret
%a1 = insertelement <8 x i64> poison, i64 %a, i64 0
%b1 = insertelement <8 x i64> %a1, i64 %b, i64 1
diff --git a/llvm/test/CodeGen/AArch64/concat-vector-add-combine.ll b/llvm/test/CodeGen/AArch64/concat-vector-add-combine.ll
index 545da98034527..171f74149c905 100644
--- a/llvm/test/CodeGen/AArch64/concat-vector-add-combine.ll
+++ b/llvm/test/CodeGen/AArch64/concat-vector-add-combine.ll
@@ -60,16 +60,16 @@ define i16 @combine_add_16xi16(i16 %a, i16 %b, i16 %c, i16 %d, i16 %e, i16 %f, i
define i32 @combine_add_8xi32(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h) local_unnamed_addr #0 {
; CHECK-LABEL: combine_add_8xi32:
; CHECK: // %bb.0:
-; CHECK-NEXT: fmov s0, w4
-; CHECK-NEXT: fmov s1, w0
-; CHECK-NEXT: mov v0.s[1], w5
-; CHECK-NEXT: mov v1.s[1], w1
-; CHECK-NEXT: mov v0.s[2], w6
-; CHECK-NEXT: mov v1.s[2], w2
-; CHECK-NEXT: mov v0.s[3], w7
-; CHECK-NEXT: mov v1.s[3], w3
-; CHECK-NEXT: uzp2 v2.8h, v1.8h, v0.8h
-; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-NEXT: fmov s0, w0
+; CHECK-NEXT: fmov s1, w4
+; CHECK-NEXT: mov v1.s[1], w5
+; CHECK-NEXT: mov v0.s[1], w1
+; CHECK-NEXT: mov v1.s[2], w6
+; CHECK-NEXT: mov v0.s[2], w2
+; CHECK-NEXT: mov v1.s[3], w7
+; CHECK-NEXT: mov v0.s[3], w3
+; CHECK-NEXT: uzp2 v2.8h, v0.8h, v1.8h
+; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
; CHECK-NEXT: uhadd v0.8h, v0.8h, v2.8h
; CHECK-NEXT: uaddlv s0, v0.8h
; CHECK-NEXT: fmov w0, s0
diff --git a/llvm/test/CodeGen/AArch64/ctpop.ll b/llvm/test/CodeGen/AArch64/ctpop.ll
index 9c59f1b233b5d..b7dfb79477b64 100644
--- a/llvm/test/CodeGen/AArch64/ctpop.ll
+++ b/llvm/test/CodeGen/AArch64/ctpop.ll
@@ -395,24 +395,24 @@ entry:
define <3 x i128> @v3i128(<3 x i128> %d) {
; CHECK-SD-LABEL: v3i128:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: fmov d0, x4
+; CHECK-SD-NEXT: fmov d0, x0
; CHECK-SD-NEXT: fmov d1, x2
-; CHECK-SD-NEXT: fmov d2, x0
-; CHECK-SD-NEXT: mov v0.d[1], x5
+; CHECK-SD-NEXT: fmov d2, x4
+; CHECK-SD-NEXT: mov v2.d[1], x5
; CHECK-SD-NEXT: mov v1.d[1], x3
-; CHECK-SD-NEXT: mov v2.d[1], x1
+; CHECK-SD-NEXT: mov v0.d[1], x1
; CHECK-SD-NEXT: mov x1, xzr
; CHECK-SD-NEXT: mov x3, xzr
; CHECK-SD-NEXT: mov x5, xzr
-; CHECK-SD-NEXT: cnt v0.16b, v0.16b
-; CHECK-SD-NEXT: cnt v1.16b, v1.16b
; CHECK-SD-NEXT: cnt v2.16b, v2.16b
-; CHECK-SD-NEXT: addv b0, v0.16b
-; CHECK-SD-NEXT: addv b1, v1.16b
+; CHECK-SD-NEXT: cnt v1.16b, v1.16b
+; CHECK-SD-NEXT: cnt v0.16b, v0.16b
; CHECK-SD-NEXT: addv b2, v2.16b
-; CHECK-SD-NEXT: fmov x0, d2
+; CHECK-SD-NEXT: addv b1, v1.16b
+; CHECK-SD-NEXT: addv b0, v0.16b
+; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: fmov x2, d1
-; CHECK-SD-NEXT: fmov x4, d0
+; CHECK-SD-NEXT: fmov x4, d2
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: v3i128:
@@ -444,30 +444,30 @@ entry:
define <4 x i128> @v4i128(<4 x i128> %d) {
; CHECK-SD-LABEL: v4i128:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: fmov d0, x6
-; CHECK-SD-NEXT: fmov d1, x4
-; CHECK-SD-NEXT: fmov d2, x2
-; CHECK-SD-NEXT: fmov d3, x0
-; CHECK-SD-NEXT: mov v1.d[1], x5
-; CHECK-SD-NEXT: mov v2.d[1], x3
-; CHECK-SD-NEXT: mov v0.d[1], x7
-; CHECK-SD-NEXT: mov v3.d[1], x1
+; CHECK-SD-NEXT: fmov d0, x0
+; CHECK-SD-NEXT: fmov d1, x2
+; CHECK-SD-NEXT: fmov d2, x4
+; CHECK-SD-NEXT: fmov d3, x6
+; CHECK-SD-NEXT: mov v2.d[1], x5
+; CHECK-SD-NEXT: mov v1.d[1], x3
+; CHECK-SD-NEXT: mov v0.d[1], x1
+; CHECK-SD-NEXT: mov v3.d[1], x7
; CHECK-SD-NEXT: mov x1, xzr
; CHECK-SD-NEXT: mov x3, xzr
; CHECK-SD-NEXT: mov x5, xzr
; CHECK-SD-NEXT: mov x7, xzr
-; CHECK-SD-NEXT: cnt v1.16b, v1.16b
; CHECK-SD-NEXT: cnt v2.16b, v2.16b
+; CHECK-SD-NEXT: cnt v1.16b, v1.16b
; CHECK-SD-NEXT: cnt v0.16b, v0.16b
; CHECK-SD-NEXT: cnt v3.16b, v3.16b
-; CHECK-SD-NEXT: addv b1, v1.16b
; CHECK-SD-NEXT: addv b2, v2.16b
+; CHECK-SD-NEXT: addv b1, v1.16b
; CHECK-SD-NEXT: addv b0, v0.16b
; CHECK-SD-NEXT: addv b3, v3.16b
-; CHECK-SD-NEXT: fmov x2, d2
-; CHECK-SD-NEXT: fmov x4, d1
-; CHECK-SD-NEXT: fmov x6, d0
-; CHECK-SD-NEXT: fmov x0, d3
+; CHECK-SD-NEXT: fmov x0, d0
+; CHECK-SD-NEXT: fmov x2, d1
+; CHECK-SD-NEXT: fmov x4, d2
+; CHECK-SD-NEXT: fmov x6, d3
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: v4i128:
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
index 2417205759767..05e4b414dc072 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
@@ -1161,24 +1161,17 @@ declare <7 x i32> @llvm.fptosi.sat.v7f16.v7i32 (<7 x half>)
declare <8 x i32> @llvm.fptosi.sat.v8f16.v8i32 (<8 x half>)
define <1 x i32> @test_signed_v1f16_v1i32(<1 x half> %f) {
-; CHECK-SD-CVT-LABEL: test_signed_v1f16_v1i32:
-; CHECK-SD-CVT: // %bb.0:
-; CHECK-SD-CVT-NEXT: fcvt s0, h0
-; CHECK-SD-CVT-NEXT: fcvtzs w8, s0
-; CHECK-SD-CVT-NEXT: fmov s0, w8
-; CHECK-SD-CVT-NEXT: ret
+; CHECK-CVT-LABEL: test_signed_v1f16_v1i32:
+; CHECK-CVT: // %bb.0:
+; CHECK-CVT-NEXT: fcvt s0, h0
+; CHECK-CVT-NEXT: fcvtzs s0, s0
+; CHECK-CVT-NEXT: ret
;
; CHECK-FP16-LABEL: test_signed_v1f16_v1i32:
; CHECK-FP16: // %bb.0:
; CHECK-FP16-NEXT: fcvtzs w8, h0
; CHECK-FP16-NEXT: fmov s0, w8
; CHECK-FP16-NEXT: ret
-;
-; CHECK-GI-CVT-LABEL: test_signed_v1f16_v1i32:
-; CHECK-GI-CVT: // %bb.0:
-; CHECK-GI-CVT-NEXT: fcvt s0, h0
-; CHECK-GI-CVT-NEXT: fcvtzs s0, s0
-; CHECK-GI-CVT-NEXT: ret
%x = call <1 x i32> @llvm.fptosi.sat.v1f16.v1i32(<1 x half> %f)
ret <1 x i32> %x
}
diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
index ecca1165753bf..e461ace0ea0a2 100644
--- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
@@ -992,24 +992,17 @@ declare <7 x i32> @llvm.fptoui.sat.v7f16.v7i32 (<7 x half>)
declare <8 x i32> @llvm.fptoui.sat.v8f16.v8i32 (<8 x half>)
define <1 x i32> @test_unsigned_v1f16_v1i32(<1 x half> %f) {
-; CHECK-SD-CVT-LABEL: test_unsigned_v1f16_v1i32:
-; CHECK-SD-CVT: // %bb.0:
-; CHECK-SD-CVT-NEXT: fcvt s0, h0
-; CHECK-SD-CVT-NEXT: fcvtzu w8, s0
-; CHECK-SD-CVT-NEXT: fmov s0, w8
-; CHECK-SD-CVT-NEXT: ret
+; CHECK-CVT-LABEL: test_unsigned_v1f16_v1i32:
+; CHECK-CVT: // %bb.0:
+; CHECK-CVT-NEXT: fcvt s0, h0
+; CHECK-CVT-NEXT: fcvtzu s0, s0
+; CHECK-CVT-NEXT: ret
;
; CHECK-FP16-LABEL: test_unsigned_v1f16_v1i32:
; CHECK-FP16: // %bb.0:
; CHECK-FP16-NEXT: fcvtzu w8, h0
; CHECK-FP16-NEXT: fmov s0, w8
; CHECK-FP16-NEXT: ret
-;
-; CHECK-GI-CVT-LABEL: test_unsigned_v1f16_v1i32:
-; CHECK-GI-CVT: // %bb.0:
-; CHECK-GI-CVT-NEXT: fcvt s0, h0
-; CHECK-GI-CVT-NEXT: fcvtzu s0, s0
-; CHECK-GI-CVT-NEXT: ret
%x = call <1 x i32> @llvm.fptoui.sat.v1f16.v1i32(<1 x half> %f)
ret <1 x i32> %x
}
diff --git a/llvm/test/CodeGen/AArch64/fsh.ll b/llvm/test/CodeGen/AArch64/fsh.ll
index 1db776ea6f616..3ccbdb6b9d30a 100644
--- a/llvm/test/CodeGen/AArch64/fsh.ll
+++ b/llvm/test/CodeGen/AArch64/fsh.ll
@@ -2611,25 +2611,25 @@ define <7 x i32> @fshr_v7i32(<7 x i32> %a, <7 x i32> %b, <7 x i32> %c) {
; CHECK-SD-NEXT: mov v3.s[2], w6
; CHECK-SD-NEXT: ld1 { v4.s }[2], [x9]
; CHECK-SD-NEXT: ld1 { v6.s }[1], [x8]
-; CHECK-SD-NEXT: bic v16.16b, v5.16b, v2.16b
-; CHECK-SD-NEXT: and v2.16b, v2.16b, v5.16b
+; CHECK-SD-NEXT: and v16.16b, v2.16b, v5.16b
; CHECK-SD-NEXT: add x8, sp, #40
; CHECK-SD-NEXT: add x9, sp, #16
+; CHECK-SD-NEXT: bic v2.16b, v5.16b, v2.16b
; CHECK-SD-NEXT: mov v1.s[3], w3
; CHECK-SD-NEXT: and v7.16b, v0.16b, v5.16b
; CHECK-SD-NEXT: bic v0.16b, v5.16b, v0.16b
; CHECK-SD-NEXT: ld1 { v4.s }[3], [x9]
; CHECK-SD-NEXT: ld1 { v6.s }[2], [x8]
; CHECK-SD-NEXT: add v3.4s, v3.4s, v3.4s
-; CHECK-SD-NEXT: neg v2.4s, v2.4s
; CHECK-SD-NEXT: neg v5.4s, v7.4s
+; CHECK-SD-NEXT: neg v7.4s, v16.4s
; CHECK-SD-NEXT: add v1.4s, v1.4s, v1.4s
-; CHECK-SD-NEXT: ushl v3.4s, v3.4s, v16.4s
-; CHECK-SD-NEXT: ushl v2.4s, v6.4s, v2.4s
+; CHECK-SD-NEXT: ushl v4.4s, v4.4s, v5.4s
; CHECK-SD-NEXT: ushl v0.4s, v1.4s, v0.4s
-; CHECK-SD-NEXT: ushl v1.4s, v4.4s, v5.4s
-; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b
-; CHECK-SD-NEXT: orr v1.16b, v3.16b, v2.16b
+; CHECK-SD-NEXT: ushl v1.4s, v3.4s, v2.4s
+; CHECK-SD-NEXT: ushl v2.4s, v6.4s, v7.4s
+; CHECK-SD-NEXT: orr v0.16b, v0.16b, v4.16b
+; CHECK-SD-NEXT: orr v1.16b, v1.16b, v2.16b
; CHECK-SD-NEXT: mov w1, v0.s[1]
; CHECK-SD-NEXT: mov w2, v0.s[2]
; CHECK-SD-NEXT: mov w3, v0.s[3]
diff --git a/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll b/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll
index ae71cd00b9aa4..b94a26fd0e80b 100644
--- a/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll
+++ b/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll
@@ -25,7 +25,7 @@ define dso_local void @run_test() local_unnamed_addr uwtable {
; CHECK-NEXT: .cfi_offset b13, -64
; CHECK-NEXT: .cfi_offset b14, -72
; CHECK-NEXT: .cfi_offset b15, -80
-; CHECK-NEXT: movi v1.2d, #0000000000000000
+; CHECK-NEXT: movi v7.2d, #0000000000000000
; CHECK-NEXT: adrp x14, B+48
; CHECK-NEXT: add x14, x14, :lo12:B+48
; CHECK-NEXT: // implicit-def: $q18
@@ -43,15 +43,15 @@ define dso_local void @run_test() local_unnamed_addr uwtable {
; CHECK-NEXT: // implicit-def: $q3
; CHECK-NEXT: // implicit-def: $q4
; CHECK-NEXT: // implicit-def: $q5
-; CHECK-NEXT: // implicit-def: $q6
+; CHECK-NEXT: // implicit-def: $q1
; CHECK-NEXT: // implicit-def: $q16
; CHECK-NEXT: // implicit-def: $q17
-; CHECK-NEXT: // implicit-def: $q7
+; CHECK-NEXT: // implicit-def: $q6
; CHECK-NEXT: // implicit-def: $q19
; CHECK-NEXT: // implicit-def: $q20
; CHECK-NEXT: // implicit-def: $q21
; CHECK-NEXT: // implicit-def: $q22
-; CHECK-NEXT: // implicit-def: $q24
+; CHECK-NEXT: // implicit-def: $q12
; CHECK-NEXT: // implicit-def: $q23
; CHECK-NEXT: // implicit-def: $q25
; CHECK-NEXT: // implicit-def: $q26
@@ -59,7 +59,7 @@ define dso_local void @run_test() local_unnamed_addr uwtable {
; CHECK-NEXT: // implicit-def: $q30
; CHECK-NEXT: // implicit-def: $q8
; CHECK-NEXT: // implicit-def: $q11
-; CHECK-NEXT: // implicit-def: $q12
+; CHECK-NEXT: // implicit-def: $q28
; CHECK-NEXT: // implicit-def: $q29
; CHECK-NEXT: // implicit-def: $q13
; CHECK-NEXT: // implicit-def: $q10
@@ -69,111 +69,107 @@ define dso_local void @run_test() local_unnamed_addr uwtable {
; CHECK-NEXT: // kill: killed $q18
; CHECK-NEXT: .LBB0_1: // %for.cond1.preheader
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr x17, [x8]
+; CHECK-NEXT: ldr x18, [x8]
; CHECK-NEXT: ldr x15, [x8]
; CHECK-NEXT: mov v18.16b, v0.16b
; CHECK-NEXT: ldr x16, [x9]
; CHECK-NEXT: stp q15, q4, [sp] // 32-byte Folded Spill
; CHECK-NEXT: add x5, x10, x11
-; CHECK-NEXT: mul x1, x15, x17
+; CHECK-NEXT: mul x1, x15, x18
; CHECK-NEXT: ldr x2, [x13], #64
-; CHECK-NEXT: ldr x5, [x5, #128]
-; CHECK-NEXT: stp q7, q23, [sp, #32] // 32-byte Folded Spill
-; CHECK-NEXT: ldr x14, [x14, #8]
-; CHECK-NEXT: mul x0, x17, x17
+; CHECK-NEXT: stp q6, q23, [sp, #32] // 32-byte Folded Spill
; CHECK-NEXT: ldr q23, [sp, #80] // 16-byte Reload
+; CHECK-NEXT: ldr x14, [x14, #8]
+; CHECK-NEXT: mul x0, x18, x18
+; CHECK-NEXT: ldr x5, [x5, #128]
; CHECK-NEXT: mov v9.16b, v30.16b
; CHECK-NEXT: mov v30.16b, v25.16b
; CHECK-NEXT: mov v25.16b, v20.16b
-; CHECK-NEXT: mov v20.16b, v6.16b
-; CHECK-NEXT: mul x18, x16, x17
-; CHECK-NEXT: mov v6.16b, v1.16b
-; CHECK-NEXT: mov v28.16b, v24.16b
-; CHECK-NEXT: fmov d14, x1
-; CHECK-NEXT: mov v24.16b, v19.16b
-; CHECK-NEXT: mov v19.16b, v5.16b
-; CHECK-NEXT: mul x4, x2, x17
+; CHECK-NEXT: mov v20.16b, v1.16b
+; CHECK-NEXT: mul x17, x16, x18
; CHECK-NEXT: mov v31.16b, v26.16b
; CHECK-NEXT: mov v26.16b, v21.16b
-; CHECK-NEXT: fmov d15, x0
+; CHECK-NEXT: fmov d14, x1
; CHECK-NEXT: mov v21.16b, v16.16b
; CHECK-NEXT: mov v16.16b, v2.16b
-; CHECK-NEXT: mov v0.16b, v14.16b
-; CHECK-NEXT: mul x20, x2, x5
-; CHECK-NEXT: mov v7.16b, v10.16b
+; CHECK-NEXT: mul x4, x2, x18
+; CHECK-NEXT: mov v6.16b, v10.16b
; CHECK-NEXT: mov v10.16b, v17.16b
+; CHECK-NEXT: fmov d15, x0
; CHECK-NEXT: mov v17.16b, v3.16b
+; CHECK-NEXT: mov v24.16b, v19.16b
+; CHECK-NEXT: mov v0.16b, v14.16b
+; CHECK-NEXT: mul x3, x14, x18
+; CHECK-NEXT: mov v19.16b, v5.16b
; CHECK-NEXT: add x11, x11, #8
-; CHECK-NEXT: mov v15.d[1], x18
-; CHECK-NEXT: mul x3, x14, x17
+; CHECK-NEXT: add x12, x12, #1
+; CHECK-NEXT: mov v15.d[1], x17
+; CHECK-NEXT: mul x6, x15, x15
; CHECK-NEXT: cmp x11, #64
; CHECK-NEXT: mov v0.d[1], x1
; CHECK-NEXT: fmov d1, x4
-; CHECK-NEXT: add x12, x12, #1
-; CHECK-NEXT: mul x17, x17, x5
-; CHECK-NEXT: fmov d5, x20
-; CHECK-NEXT: mul x6, x15, x15
+; CHECK-NEXT: mul x7, x15, x5
+; CHECK-NEXT: mul x18, x18, x5
+; CHECK-NEXT: mov v1.d[1], x3
; CHECK-NEXT: add v23.2d, v23.2d, v0.2d
; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Reload
-; CHECK-NEXT: mov v1.d[1], x3
-; CHECK-NEXT: mul x7, x15, x5
-; CHECK-NEXT: add v0.2d, v0.2d, v15.2d
-; CHECK-NEXT: fmov d2, x17
-; CHECK-NEXT: mul x0, x14, x5
; CHECK-NEXT: fmov d4, x6
+; CHECK-NEXT: mul x20, x2, x5
+; CHECK-NEXT: add v0.2d, v0.2d, v15.2d
+; CHECK-NEXT: fmov d3, x7
; CHECK-NEXT: mul x19, x16, x5
+; CHECK-NEXT: mov v4.d[1], x6
+; CHECK-NEXT: fmov d2, x18
+; CHECK-NEXT: mul x0, x14, x5
; CHECK-NEXT: stp q0, q23, [sp, #64] // 32-byte Folded Spill
; CHECK-NEXT: ldr q0, [sp, #96] // 16-byte Reload
-; CHECK-NEXT: fmov d3, x7
+; CHECK-NEXT: fmov d5, x20
+; CHECK-NEXT: mov v3.d[1], x7
; CHECK-NEXT: ldr q23, [sp, #48] // 16-byte Reload
; CHECK-NEXT: mul x17, x2, x15
; CHECK-NEXT: add v0.2d, v0.2d, v15.2d
; CHECK-NEXT: ldr q15, [sp] // 16-byte Reload
-; CHECK-NEXT: mov v5.d[1], x0
-; CHECK-NEXT: mov v4.d[1], x6
+; CHECK-NEXT: mov v2.d[1], x19
+; CHECK-NEXT: add v13.2d, v13.2d, v4.2d
+; CHECK-NEXT: add v12.2d, v12.2d, v4.2d
; CHECK-NEXT: mul x16, x16, x15
-; CHECK-NEXT: mov v3.d[1], x7
; CHECK-NEXT: add v15.2d, v15.2d, v1.2d
-; CHECK-NEXT: mov v2.d[1], x19
+; CHECK-NEXT: mov v1.16b, v20.16b
+; CHECK-NEXT: mov v5.d[1], x0
; CHECK-NEXT: str q0, [sp, #96] // 16-byte Spill
-; CHECK-NEXT: mov v1.16b, v6.16b
-; CHECK-NEXT: mul x14, x14, x15
-; CHECK-NEXT: mov v6.16b, v20.16b
; CHECK-NEXT: mov v20.16b, v25.16b
-; CHECK-NEXT: fmov d0, x17
+; CHECK-NEXT: mul x14, x14, x15
; CHECK-NEXT: mov v25.16b, v30.16b
-; CHECK-NEXT: add v30.2d, v9.2d, v5.2d
-; CHECK-NEXT: mov v5.16b, v19.16b
-; CHECK-NEXT: mov v19.16b, v24.16b
; CHECK-NEXT: add v11.2d, v11.2d, v3.2d
-; CHECK-NEXT: mov v14.d[1], x16
+; CHECK-NEXT: fmov d0, x17
; CHECK-NEXT: mov v3.16b, v17.16b
; CHECK-NEXT: mov v17.16b, v10.16b
-; CHECK-NEXT: mov v10.16b, v7.16b
+; CHECK-NEXT: mov v10.16b, v6.16b
; CHECK-NEXT: add v8.2d, v8.2d, v2.2d
; CHECK-NEXT: mov v2.16b, v16.16b
-; CHECK-NEXT: mov v0.d[1], x14
+; CHECK-NEXT: mov v14.d[1], x16
; CHECK-NEXT: mov v16.16b, v21.16b
; CHECK-NEXT: mov v21.16b, v26.16b
-; CHECK-NEXT: add v13.2d, v13.2d, v4.2d
+; CHECK-NEXT: add v30.2d, v9.2d, v5.2d
+; CHECK-NEXT: mov v5.16b, v19.16b
; CHECK-NEXT: add v26.2d, v31.2d, v4.2d
-; CHECK-NEXT: add v24.2d, v28.2d, v4.2d
-; CHECK-NEXT: add v19.2d, v19.2d, v4.2d
-; CHECK-NEXT: add v6.2d, v6.2d, v4.2d
+; CHECK-NEXT: mov v0.d[1], x14
+; CHECK-NEXT: add v19.2d, v24.2d, v4.2d
; CHECK-NEXT: add v1.2d, v1.2d, v4.2d
-; CHECK-NEXT: ldp q4, q7, [sp, #16] // 32-byte Folded Reload
+; CHECK-NEXT: add v7.2d, v7.2d, v4.2d
+; CHECK-NEXT: ldp q4, q6, [sp, #16] // 32-byte Folded Reload
; CHECK-NEXT: add v10.2d, v10.2d, v14.2d
; CHECK-NEXT: add v29.2d, v29.2d, v14.2d
; CHECK-NEXT: add v27.2d, v27.2d, v14.2d
; CHECK-NEXT: add v23.2d, v23.2d, v14.2d
; CHECK-NEXT: add v22.2d, v22.2d, v14.2d
; CHECK-NEXT: add v20.2d, v20.2d, v14.2d
+; CHECK-NEXT: add v6.2d, v6.2d, v14.2d
; CHECK-NEXT: add v16.2d, v16.2d, v14.2d
-; CHECK-NEXT: add v7.2d, v7.2d, v14.2d
; CHECK-NEXT: add v5.2d, v5.2d, v14.2d
; CHECK-NEXT: add v3.2d, v3.2d, v14.2d
; CHECK-NEXT: add v2.2d, v2.2d, v14.2d
-; CHECK-NEXT: add v12.2d, v12.2d, v0.2d
+; CHECK-NEXT: add v28.2d, v28.2d, v0.2d
; CHECK-NEXT: add v25.2d, v25.2d, v0.2d
; CHECK-NEXT: add v21.2d, v21.2d, v0.2d
; CHECK-NEXT: add v17.2d, v17.2d, v0.2d
@@ -182,30 +178,30 @@ define dso_local void @run_test() local_unnamed_addr uwtable {
; CHECK-NEXT: mov x14, x13
; CHECK-NEXT: b.ne .LBB0_1
; CHECK-NEXT: // %bb.2: // %for.cond.cleanup
-; CHECK-NEXT: ldp q28, q18, [sp, #64] // 32-byte Folded Reload
+; CHECK-NEXT: ldp q24, q18, [sp, #64] // 32-byte Folded Reload
; CHECK-NEXT: adrp x8, C
; CHECK-NEXT: add x8, x8, :lo12:C
; CHECK-NEXT: ldp x20, x19, [sp, #176] // 16-byte Folded Reload
; CHECK-NEXT: stp q10, q13, [x8, #64]
-; CHECK-NEXT: stp q28, q18, [x8]
+; CHECK-NEXT: stp q24, q18, [x8]
; CHECK-NEXT: ldr q18, [sp, #96] // 16-byte Reload
-; CHECK-NEXT: stp q29, q12, [x8, #96]
-; CHECK-NEXT: ldp d13, d12, [sp, #128] // 16-byte Folded Reload
+; CHECK-NEXT: stp q29, q28, [x8, #96]
; CHECK-NEXT: stp q18, q15, [x8, #32]
; CHECK-NEXT: ldp d15, d14, [sp, #112] // 16-byte Folded Reload
; CHECK-NEXT: stp q11, q8, [x8, #144]
; CHECK-NEXT: ldp d9, d8, [sp, #160] // 16-byte Folded Reload
-; CHECK-NEXT: stp q30, q27, [x8, #176]
+; CHECK-NEXT: stp q12, q22, [x8, #272]
; CHECK-NEXT: ldp d11, d10, [sp, #144] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d13, d12, [sp, #128] // 16-byte Folded Reload
+; CHECK-NEXT: stp q30, q27, [x8, #176]
; CHECK-NEXT: str q26, [x8, #208]
; CHECK-NEXT: stp q25, q23, [x8, #240]
-; CHECK-NEXT: stp q24, q22, [x8, #272]
; CHECK-NEXT: stp q21, q20, [x8, #304]
-; CHECK-NEXT: stp q19, q7, [x8, #336]
+; CHECK-NEXT: stp q19, q6, [x8, #336]
; CHECK-NEXT: stp q17, q16, [x8, #368]
-; CHECK-NEXT: stp q6, q5, [x8, #400]
+; CHECK-NEXT: stp q1, q5, [x8, #400]
; CHECK-NEXT: stp q4, q3, [x8, #432]
-; CHECK-NEXT: stp q1, q2, [x8, #464]
+; CHECK-NEXT: stp q7, q2, [x8, #464]
; CHECK-NEXT: str q0, [x8, #496]
; CHECK-NEXT: add sp, sp, #192
; CHECK-NEXT: .cfi_def_cfa_offset 0
diff --git a/llvm/test/CodeGen/AArch64/scalar-to-vector-bitcasts.ll b/llvm/test/CodeGen/AArch64/scalar-to-vector-bitcasts.ll
new file mode 100644
index 0000000000000..3e7e4e741622c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/scalar-to-vector-bitcasts.ll
@@ -0,0 +1,45 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple aarch64-unknown-unknown -mattr=+fullfp16,+fprcvt | FileCheck %s
+
+; These tests ensure bitcasts are correctly emitted for scalar_to_vector
+; by checking if NEON variant of conversion instruction was selected
+
+define <2 x i32> @fcvtzs_v2i32_scalar_to_vector(float %a) {
+; CHECK-LABEL: fcvtzs_v2i32_scalar_to_vector:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs s0, s0
+; CHECK-NEXT: ret
+ %c = fptosi float %a to i32
+ %v = insertelement <2 x i32> poison, i32 %c, i32 0
+ ret <2 x i32> %v
+}
+
+define <4 x i32> @fcvtzs_v4i32_scalar_to_vector(double %a) {
+; CHECK-LABEL: fcvtzs_v4i32_scalar_to_vector:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs s0, d0
+; CHECK-NEXT: ret
+ %c = fptosi double %a to i32
+ %v = insertelement <4 x i32> poison, i32 %c, i32 0
+ ret <4 x i32> %v
+}
+
+define <1 x i64> @fcvtzs_v1i64_scalar_to_vector(half %a) {
+; CHECK-LABEL: fcvtzs_v1i64_scalar_to_vector:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs d0, h0
+; CHECK-NEXT: ret
+ %c = fptosi half %a to i64
+ %v = insertelement <1 x i64> poison, i64 %c, i32 0
+ ret <1 x i64> %v
+}
+
+define <2 x i64> @fcvtzs_v2i64_scalar_to_vector(float %a) {
+; CHECK-LABEL: fcvtzs_v2i64_scalar_to_vector:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs d0, s0
+; CHECK-NEXT: ret
+ %c = fptosi float %a to i64
+ %v = insertelement <2 x i64> poison, i64 %c, i32 0
+ ret <2 x i64> %v
+}
diff --git a/llvm/test/CodeGen/AArch64/sext.ll b/llvm/test/CodeGen/AArch64/sext.ll
index ef6b65cd50a1e..e6af0256fe6e2 100644
--- a/llvm/test/CodeGen/AArch64/sext.ll
+++ b/llvm/test/CodeGen/AArch64/sext.ll
@@ -1144,47 +1144,47 @@ entry:
define <16 x i64> @sext_v16i10_v16i64(<16 x i10> %a) {
; CHECK-SD-LABEL: sext_v16i10_v16i64:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: fmov s0, w2
-; CHECK-SD-NEXT: fmov s1, w0
-; CHECK-SD-NEXT: ldr s2, [sp]
-; CHECK-SD-NEXT: fmov s3, w4
-; CHECK-SD-NEXT: fmov s4, w6
+; CHECK-SD-NEXT: fmov s0, w6
+; CHECK-SD-NEXT: fmov s2, w4
+; CHECK-SD-NEXT: ldr s1, [sp]
+; CHECK-SD-NEXT: fmov s3, w2
+; CHECK-SD-NEXT: fmov s4, w0
; CHECK-SD-NEXT: add x8, sp, #8
; CHECK-SD-NEXT: ldr s5, [sp, #16]
; CHECK-SD-NEXT: ldr s6, [sp, #32]
; CHECK-SD-NEXT: ldr s7, [sp, #48]
-; CHECK-SD-NEXT: mov v1.s[1], w1
-; CHECK-SD-NEXT: mov v0.s[1], w3
-; CHECK-SD-NEXT: ld1 { v2.s }[1], [x8]
-; CHECK-SD-NEXT: mov v3.s[1], w5
-; CHECK-SD-NEXT: mov v4.s[1], w7
+; CHECK-SD-NEXT: ld1 { v1.s }[1], [x8]
+; CHECK-SD-NEXT: mov v2.s[1], w5
+; CHECK-SD-NEXT: mov v0.s[1], w7
+; CHECK-SD-NEXT: mov v4.s[1], w1
+; CHECK-SD-NEXT: mov v3.s[1], w3
; CHECK-SD-NEXT: add x8, sp, #24
; CHECK-SD-NEXT: add x9, sp, #40
; CHECK-SD-NEXT: add x10, sp, #56
; CHECK-SD-NEXT: ld1 { v5.s }[1], [x8]
; CHECK-SD-NEXT: ld1 { v6.s }[1], [x9]
; CHECK-SD-NEXT: ld1 { v7.s }[1], [x10]
-; CHECK-SD-NEXT: ushll v2.2d, v2.2s, #0
; CHECK-SD-NEXT: ushll v1.2d, v1.2s, #0
+; CHECK-SD-NEXT: ushll v2.2d, v2.2s, #0
; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-SD-NEXT: ushll v3.2d, v3.2s, #0
; CHECK-SD-NEXT: ushll v4.2d, v4.2s, #0
+; CHECK-SD-NEXT: ushll v3.2d, v3.2s, #0
; CHECK-SD-NEXT: ushll v5.2d, v5.2s, #0
; CHECK-SD-NEXT: ushll v6.2d, v6.2s, #0
; CHECK-SD-NEXT: ushll v7.2d, v7.2s, #0
-; CHECK-SD-NEXT: shl v17.2d, v2.2d, #54
-; CHECK-SD-NEXT: shl v1.2d, v1.2d, #54
-; CHECK-SD-NEXT: shl v16.2d, v0.2d, #54
-; CHECK-SD-NEXT: shl v3.2d, v3.2d, #54
+; CHECK-SD-NEXT: shl v16.2d, v1.2d, #54
+; CHECK-SD-NEXT: shl v2.2d, v2.2d, #54
+; CHECK-SD-NEXT: shl v17.2d, v0.2d, #54
; CHECK-SD-NEXT: shl v4.2d, v4.2d, #54
+; CHECK-SD-NEXT: shl v3.2d, v3.2d, #54
; CHECK-SD-NEXT: shl v5.2d, v5.2d, #54
; CHECK-SD-NEXT: shl v6.2d, v6.2d, #54
; CHECK-SD-NEXT: shl v7.2d, v7.2d, #54
-; CHECK-SD-NEXT: sshr v0.2d, v1.2d, #54
-; CHECK-SD-NEXT: sshr v1.2d, v16.2d, #54
-; CHECK-SD-NEXT: sshr v2.2d, v3.2d, #54
-; CHECK-SD-NEXT: sshr v3.2d, v4.2d, #54
-; CHECK-SD-NEXT: sshr v4.2d, v17.2d, #54
+; CHECK-SD-NEXT: sshr v2.2d, v2.2d, #54
+; CHECK-SD-NEXT: sshr v0.2d, v4.2d, #54
+; CHECK-SD-NEXT: sshr v1.2d, v3.2d, #54
+; CHECK-SD-NEXT: sshr v4.2d, v16.2d, #54
+; CHECK-SD-NEXT: sshr v3.2d, v17.2d, #54
; CHECK-SD-NEXT: sshr v5.2d, v5.2d, #54
; CHECK-SD-NEXT: sshr v6.2d, v6.2d, #54
; CHECK-SD-NEXT: sshr v7.2d, v7.2d, #54
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll b/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll
index a8b2c30bec562..b19767b0de550 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll
@@ -57,28 +57,28 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) nounwind {
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: frintx v0.4h, v0.4h
; CHECK-NEXT: frintx v1.4h, v1.4h
-; CHECK-NEXT: mov h4, v0.h[2]
-; CHECK-NEXT: mov h2, v0.h[1]
-; CHECK-NEXT: mov h7, v0.h[3]
+; CHECK-NEXT: mov h3, v0.h[2]
+; CHECK-NEXT: mov h4, v0.h[1]
+; CHECK-NEXT: mov h5, v0.h[3]
; CHECK-NEXT: fcvtzs x8, h0
-; CHECK-NEXT: mov h3, v1.h[2]
-; CHECK-NEXT: mov h5, v1.h[3]
-; CHECK-NEXT: mov h6, v1.h[1]
-; CHECK-NEXT: fcvtzs x11, h1
+; CHECK-NEXT: mov h2, v1.h[2]
+; CHECK-NEXT: mov h6, v1.h[3]
+; CHECK-NEXT: mov h7, v1.h[1]
+; CHECK-NEXT: fcvtzs x10, h1
+; CHECK-NEXT: fcvtzs x11, h3
; CHECK-NEXT: fcvtzs x12, h4
-; CHECK-NEXT: fcvtzs x9, h2
-; CHECK-NEXT: fcvtzs x15, h7
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: fcvtzs x10, h3
; CHECK-NEXT: fcvtzs x13, h5
+; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: fcvtzs x9, h2
; CHECK-NEXT: fcvtzs x14, h6
-; CHECK-NEXT: fmov d1, x12
-; CHECK-NEXT: fmov d2, x11
-; CHECK-NEXT: mov v0.d[1], x9
-; CHECK-NEXT: fmov d3, x10
-; CHECK-NEXT: mov v1.d[1], x15
-; CHECK-NEXT: mov v2.d[1], x14
-; CHECK-NEXT: mov v3.d[1], x13
+; CHECK-NEXT: fcvtzs x15, h7
+; CHECK-NEXT: fmov d2, x10
+; CHECK-NEXT: fmov d1, x11
+; CHECK-NEXT: mov v0.d[1], x12
+; CHECK-NEXT: fmov d3, x9
+; CHECK-NEXT: mov v1.d[1], x13
+; CHECK-NEXT: mov v2.d[1], x15
+; CHECK-NEXT: mov v3.d[1], x14
; CHECK-NEXT: ret
%a = call <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half> %x)
ret <8 x i64> %a
@@ -89,55 +89,55 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) nounwind {
; CHECK-LABEL: llrint_v16i64_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ext v2.16b, v1.16b, v1.16b, #8
+; CHECK-NEXT: ext v3.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: frintx v0.4h, v0.4h
; CHECK-NEXT: frintx v1.4h, v1.4h
-; CHECK-NEXT: frintx v3.4h, v0.4h
-; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: frintx v2.4h, v2.4h
+; CHECK-NEXT: frintx v3.4h, v3.4h
+; CHECK-NEXT: mov h5, v0.h[2]
; CHECK-NEXT: mov h4, v1.h[2]
+; CHECK-NEXT: mov h6, v0.h[1]
+; CHECK-NEXT: fcvtzs x8, h1
+; CHECK-NEXT: mov h16, v0.h[3]
+; CHECK-NEXT: fcvtzs x9, h0
+; CHECK-NEXT: mov h7, v1.h[1]
+; CHECK-NEXT: mov h1, v1.h[3]
+; CHECK-NEXT: mov h0, v2.h[3]
+; CHECK-NEXT: mov h17, v2.h[2]
+; CHECK-NEXT: fcvtzs x12, h5
; CHECK-NEXT: mov h5, v3.h[2]
-; CHECK-NEXT: frintx v0.4h, v0.4h
-; CHECK-NEXT: mov h6, v3.h[1]
-; CHECK-NEXT: fcvtzs x9, h3
-; CHECK-NEXT: mov h16, v1.h[1]
-; CHECK-NEXT: fcvtzs x12, h1
-; CHECK-NEXT: mov h3, v3.h[3]
-; CHECK-NEXT: mov h17, v1.h[3]
-; CHECK-NEXT: mov h7, v2.h[3]
-; CHECK-NEXT: fcvtzs x8, h4
-; CHECK-NEXT: fcvtzs x10, h5
-; CHECK-NEXT: mov h4, v2.h[2]
-; CHECK-NEXT: mov h5, v0.h[2]
-; CHECK-NEXT: fcvtzs x11, h6
-; CHECK-NEXT: mov h6, v0.h[3]
-; CHECK-NEXT: fcvtzs x15, h2
-; CHECK-NEXT: mov h2, v2.h[1]
-; CHECK-NEXT: fcvtzs x14, h0
-; CHECK-NEXT: fcvtzs x17, h3
-; CHECK-NEXT: fcvtzs x0, h17
-; CHECK-NEXT: fcvtzs x13, h7
-; CHECK-NEXT: mov h7, v0.h[1]
+; CHECK-NEXT: fcvtzs x11, h2
+; CHECK-NEXT: mov h18, v3.h[3]
+; CHECK-NEXT: fcvtzs x14, h3
+; CHECK-NEXT: mov h3, v3.h[1]
+; CHECK-NEXT: mov h19, v2.h[1]
+; CHECK-NEXT: fcvtzs x10, h4
+; CHECK-NEXT: fmov d4, x8
+; CHECK-NEXT: fcvtzs x13, h6
+; CHECK-NEXT: fcvtzs x15, h0
+; CHECK-NEXT: fcvtzs x8, h17
; CHECK-NEXT: fmov d0, x9
-; CHECK-NEXT: fcvtzs x16, h4
; CHECK-NEXT: fcvtzs x9, h5
-; CHECK-NEXT: fmov d4, x12
-; CHECK-NEXT: fcvtzs x12, h16
-; CHECK-NEXT: fmov d1, x10
-; CHECK-NEXT: fcvtzs x10, h6
-; CHECK-NEXT: fmov d5, x8
-; CHECK-NEXT: fcvtzs x8, h2
+; CHECK-NEXT: fcvtzs x16, h7
+; CHECK-NEXT: fcvtzs x17, h16
+; CHECK-NEXT: fmov d6, x11
+; CHECK-NEXT: fcvtzs x11, h18
+; CHECK-NEXT: fcvtzs x18, h3
; CHECK-NEXT: fmov d2, x14
-; CHECK-NEXT: fcvtzs x18, h7
-; CHECK-NEXT: fmov d6, x15
-; CHECK-NEXT: mov v0.d[1], x11
+; CHECK-NEXT: fcvtzs x14, h19
+; CHECK-NEXT: fcvtzs x0, h1
+; CHECK-NEXT: fmov d5, x10
+; CHECK-NEXT: fmov d1, x12
+; CHECK-NEXT: fmov d7, x8
; CHECK-NEXT: fmov d3, x9
-; CHECK-NEXT: fmov d7, x16
+; CHECK-NEXT: mov v0.d[1], x13
+; CHECK-NEXT: mov v4.d[1], x16
+; CHECK-NEXT: mov v2.d[1], x18
; CHECK-NEXT: mov v1.d[1], x17
-; CHECK-NEXT: mov v4.d[1], x12
; CHECK-NEXT: mov v5.d[1], x0
-; CHECK-NEXT: mov v6.d[1], x8
-; CHECK-NEXT: mov v2.d[1], x18
-; CHECK-NEXT: mov v3.d[1], x10
-; CHECK-NEXT: mov v7.d[1], x13
+; CHECK-NEXT: mov v6.d[1], x14
+; CHECK-NEXT: mov v3.d[1], x11
+; CHECK-NEXT: mov v7.d[1], x15
; CHECK-NEXT: ret
%a = call <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half> %x)
ret <16 x i64> %a
@@ -324,27 +324,27 @@ declare <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float>)
define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) nounwind {
; CHECK-LABEL: llrint_v8i64_v8f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: frintx v0.4s, v0.4s
; CHECK-NEXT: frintx v1.4s, v1.4s
-; CHECK-NEXT: mov s3, v1.s[2]
-; CHECK-NEXT: mov s4, v0.s[2]
-; CHECK-NEXT: mov s2, v0.s[1]
+; CHECK-NEXT: frintx v0.4s, v0.4s
+; CHECK-NEXT: mov s2, v1.s[2]
+; CHECK-NEXT: mov s3, v0.s[2]
+; CHECK-NEXT: mov s4, v0.s[1]
; CHECK-NEXT: mov s5, v1.s[3]
; CHECK-NEXT: mov s6, v1.s[1]
; CHECK-NEXT: mov s7, v0.s[3]
; CHECK-NEXT: fcvtzs x8, s0
; CHECK-NEXT: fcvtzs x10, s1
+; CHECK-NEXT: fcvtzs x9, s2
; CHECK-NEXT: fcvtzs x11, s3
; CHECK-NEXT: fcvtzs x12, s4
-; CHECK-NEXT: fcvtzs x9, s2
; CHECK-NEXT: fcvtzs x13, s5
; CHECK-NEXT: fcvtzs x14, s6
; CHECK-NEXT: fcvtzs x15, s7
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: fmov d2, x10
-; CHECK-NEXT: fmov d1, x12
-; CHECK-NEXT: fmov d3, x11
-; CHECK-NEXT: mov v0.d[1], x9
+; CHECK-NEXT: fmov d3, x9
+; CHECK-NEXT: fmov d1, x11
+; CHECK-NEXT: mov v0.d[1], x12
; CHECK-NEXT: mov v2.d[1], x14
; CHECK-NEXT: mov v1.d[1], x15
; CHECK-NEXT: mov v3.d[1], x13
@@ -363,48 +363,48 @@ define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) nounwind {
; CHECK-NEXT: frintx v0.4s, v0.4s
; CHECK-NEXT: mov s4, v3.s[2]
; CHECK-NEXT: mov s5, v2.s[2]
-; CHECK-NEXT: mov s6, v1.s[2]
-; CHECK-NEXT: mov s7, v0.s[2]
-; CHECK-NEXT: fcvtzs x10, s1
-; CHECK-NEXT: fcvtzs x11, s0
-; CHECK-NEXT: mov s16, v0.s[1]
-; CHECK-NEXT: mov s17, v1.s[1]
-; CHECK-NEXT: mov s18, v3.s[1]
-; CHECK-NEXT: fcvtzs x14, s3
-; CHECK-NEXT: fcvtzs x16, s2
-; CHECK-NEXT: fcvtzs x8, s4
-; CHECK-NEXT: mov s4, v2.s[1]
-; CHECK-NEXT: fcvtzs x9, s5
-; CHECK-NEXT: mov s5, v1.s[3]
-; CHECK-NEXT: fcvtzs x12, s6
-; CHECK-NEXT: mov s6, v0.s[3]
-; CHECK-NEXT: fcvtzs x13, s7
-; CHECK-NEXT: mov s7, v3.s[3]
-; CHECK-NEXT: fmov d0, x11
-; CHECK-NEXT: fcvtzs x17, s16
-; CHECK-NEXT: fcvtzs x18, s18
-; CHECK-NEXT: fcvtzs x15, s4
-; CHECK-NEXT: mov s4, v2.s[3]
-; CHECK-NEXT: fmov d2, x10
+; CHECK-NEXT: mov s6, v2.s[1]
+; CHECK-NEXT: mov s7, v1.s[2]
+; CHECK-NEXT: fcvtzs x8, s3
+; CHECK-NEXT: mov s16, v0.s[2]
+; CHECK-NEXT: fcvtzs x9, s2
+; CHECK-NEXT: mov s17, v1.s[3]
+; CHECK-NEXT: mov s18, v0.s[1]
+; CHECK-NEXT: mov s19, v3.s[3]
+; CHECK-NEXT: fcvtzs x14, s1
+; CHECK-NEXT: mov s1, v1.s[1]
+; CHECK-NEXT: fcvtzs x10, s4
; CHECK-NEXT: fcvtzs x11, s5
-; CHECK-NEXT: fcvtzs x10, s6
-; CHECK-NEXT: fmov d3, x12
-; CHECK-NEXT: fmov d1, x13
-; CHECK-NEXT: fcvtzs x12, s17
+; CHECK-NEXT: mov s5, v0.s[3]
+; CHECK-NEXT: mov s3, v3.s[1]
+; CHECK-NEXT: mov s2, v2.s[3]
+; CHECK-NEXT: fcvtzs x12, s6
; CHECK-NEXT: fcvtzs x13, s7
-; CHECK-NEXT: fmov d5, x9
-; CHECK-NEXT: fmov d6, x14
-; CHECK-NEXT: fmov d7, x8
-; CHECK-NEXT: fcvtzs x0, s4
-; CHECK-NEXT: fmov d4, x16
+; CHECK-NEXT: fcvtzs x15, s16
+; CHECK-NEXT: fmov d6, x8
+; CHECK-NEXT: fcvtzs x8, s0
+; CHECK-NEXT: fmov d4, x9
+; CHECK-NEXT: fcvtzs x9, s17
+; CHECK-NEXT: fcvtzs x16, s5
+; CHECK-NEXT: fcvtzs x17, s18
+; CHECK-NEXT: fmov d7, x10
+; CHECK-NEXT: fmov d5, x11
+; CHECK-NEXT: fcvtzs x10, s1
+; CHECK-NEXT: fcvtzs x11, s19
+; CHECK-NEXT: fcvtzs x18, s3
+; CHECK-NEXT: fcvtzs x0, s2
+; CHECK-NEXT: fmov d3, x13
+; CHECK-NEXT: fmov d1, x15
+; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: fmov d2, x14
+; CHECK-NEXT: mov v4.d[1], x12
+; CHECK-NEXT: mov v3.d[1], x9
+; CHECK-NEXT: mov v7.d[1], x11
; CHECK-NEXT: mov v0.d[1], x17
-; CHECK-NEXT: mov v1.d[1], x10
-; CHECK-NEXT: mov v3.d[1], x11
-; CHECK-NEXT: mov v2.d[1], x12
-; CHECK-NEXT: mov v6.d[1], x18
-; CHECK-NEXT: mov v7.d[1], x13
-; CHECK-NEXT: mov v4.d[1], x15
+; CHECK-NEXT: mov v1.d[1], x16
+; CHECK-NEXT: mov v2.d[1], x10
; CHECK-NEXT: mov v5.d[1], x0
+; CHECK-NEXT: mov v6.d[1], x18
; CHECK-NEXT: ret
%a = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> %x)
ret <16 x i64> %a
@@ -542,8 +542,7 @@ define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) nounwind {
; CHECK-LABEL: llrint_v1i64_v1f64:
; CHECK: // %bb.0:
; CHECK-NEXT: frintx d0, d0
-; CHECK-NEXT: fcvtzs x8, d0
-; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: fcvtzs d0, d0
; CHECK-NEXT: ret
%a = call <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double> %x)
ret <1 x i64> %a
@@ -570,17 +569,15 @@ define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) nounwind {
; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: frintx z0.d, p0/m, z0.d
-; CHECK-NEXT: mov z1.d, z0.d[2]
-; CHECK-NEXT: mov z2.d, z0.d[3]
+; CHECK-NEXT: mov z1.d, z0.d[3]
+; CHECK-NEXT: mov z2.d, z0.d[2]
; CHECK-NEXT: mov z3.d, z0.d[1]
-; CHECK-NEXT: fcvtzs x9, d0
+; CHECK-NEXT: fcvtzs d0, d0
; CHECK-NEXT: fcvtzs x8, d1
-; CHECK-NEXT: fcvtzs x10, d2
-; CHECK-NEXT: fcvtzs x11, d3
-; CHECK-NEXT: fmov d0, x9
-; CHECK-NEXT: fmov d1, x8
-; CHECK-NEXT: mov v0.d[1], x11
-; CHECK-NEXT: mov v1.d[1], x10
+; CHECK-NEXT: fcvtzs d1, d2
+; CHECK-NEXT: fcvtzs x9, d3
+; CHECK-NEXT: mov v0.d[1], x9
+; CHECK-NEXT: mov v1.d[1], x8
; CHECK-NEXT: ret
%a = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> %x)
ret <4 x i64> %a
@@ -598,31 +595,27 @@ define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) nounwind {
; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
; CHECK-NEXT: splice z2.d, p0, z2.d, z3.d
; CHECK-NEXT: ptrue p0.d, vl4
-; CHECK-NEXT: frintx z0.d, p0/m, z0.d
; CHECK-NEXT: movprfx z1, z2
; CHECK-NEXT: frintx z1.d, p0/m, z2.d
-; CHECK-NEXT: mov z4.d, z1.d[2]
-; CHECK-NEXT: mov z5.d, z0.d[2]
-; CHECK-NEXT: mov z2.d, z0.d[1]
-; CHECK-NEXT: mov z3.d, z1.d[3]
-; CHECK-NEXT: mov z6.d, z0.d[3]
-; CHECK-NEXT: fcvtzs x8, d0
-; CHECK-NEXT: mov z0.d, z1.d[1]
-; CHECK-NEXT: fcvtzs x10, d1
-; CHECK-NEXT: fcvtzs x11, d4
-; CHECK-NEXT: fcvtzs x12, d5
-; CHECK-NEXT: fcvtzs x9, d2
-; CHECK-NEXT: fcvtzs x13, d3
-; CHECK-NEXT: fcvtzs x14, d6
-; CHECK-NEXT: fcvtzs x15, d0
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: fmov d2, x10
-; CHECK-NEXT: fmov d1, x12
-; CHECK-NEXT: fmov d3, x11
-; CHECK-NEXT: mov v0.d[1], x9
-; CHECK-NEXT: mov v2.d[1], x15
-; CHECK-NEXT: mov v1.d[1], x14
-; CHECK-NEXT: mov v3.d[1], x13
+; CHECK-NEXT: frintx z0.d, p0/m, z0.d
+; CHECK-NEXT: mov z2.d, z1.d[3]
+; CHECK-NEXT: mov z3.d, z0.d[3]
+; CHECK-NEXT: mov z4.d, z0.d[1]
+; CHECK-NEXT: mov z5.d, z1.d[2]
+; CHECK-NEXT: mov z6.d, z0.d[2]
+; CHECK-NEXT: mov z7.d, z1.d[1]
+; CHECK-NEXT: fcvtzs d0, d0
+; CHECK-NEXT: fcvtzs x8, d2
+; CHECK-NEXT: fcvtzs x9, d3
+; CHECK-NEXT: fcvtzs x10, d4
+; CHECK-NEXT: fcvtzs d2, d1
+; CHECK-NEXT: fcvtzs d3, d5
+; CHECK-NEXT: fcvtzs d1, d6
+; CHECK-NEXT: fcvtzs x11, d7
+; CHECK-NEXT: mov v0.d[1], x10
+; CHECK-NEXT: mov v1.d[1], x9
+; CHECK-NEXT: mov v3.d[1], x8
+; CHECK-NEXT: mov v2.d[1], x11
; CHECK-NEXT: ret
%a = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> %x)
ret <8 x i64> %a
@@ -632,70 +625,60 @@ declare <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double>)
define <16 x i64> @llrint_v16f64(<16 x double> %x) nounwind {
; CHECK-LABEL: llrint_v16f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.d, vl2
+; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: // kill: def $q6 killed $q6 def $z6
; CHECK-NEXT: // kill: def $q4 killed $q4 def $z4
+; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2
; CHECK-NEXT: // kill: def $q7 killed $q7 def $z7
; CHECK-NEXT: // kill: def $q5 killed $q5 def $z5
-; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2
-; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: // kill: def $q3 killed $q3 def $z3
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT: splice z6.d, p0, z6.d, z7.d
+; CHECK-NEXT: splice z2.d, p0, z2.d, z3.d
+; CHECK-NEXT: splice z4.d, p0, z4.d, z5.d
+; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
; CHECK-NEXT: ptrue p0.d, vl4
-; CHECK-NEXT: splice z6.d, p1, z6.d, z7.d
-; CHECK-NEXT: splice z4.d, p1, z4.d, z5.d
-; CHECK-NEXT: splice z2.d, p1, z2.d, z3.d
-; CHECK-NEXT: splice z0.d, p1, z0.d, z1.d
-; CHECK-NEXT: movprfx z3, z6
-; CHECK-NEXT: frintx z3.d, p0/m, z6.d
-; CHECK-NEXT: movprfx z1, z4
-; CHECK-NEXT: frintx z1.d, p0/m, z4.d
+; CHECK-NEXT: frintx z6.d, p0/m, z6.d
+; CHECK-NEXT: frintx z4.d, p0/m, z4.d
; CHECK-NEXT: frintx z2.d, p0/m, z2.d
; CHECK-NEXT: frintx z0.d, p0/m, z0.d
-; CHECK-NEXT: mov z4.d, z3.d[2]
-; CHECK-NEXT: mov z5.d, z1.d[2]
-; CHECK-NEXT: mov z6.d, z2.d[3]
-; CHECK-NEXT: fcvtzs x11, d0
-; CHECK-NEXT: fcvtzs x12, d1
-; CHECK-NEXT: fcvtzs x13, d2
-; CHECK-NEXT: fcvtzs x14, d3
-; CHECK-NEXT: mov z7.d, z3.d[3]
-; CHECK-NEXT: mov z16.d, z1.d[3]
-; CHECK-NEXT: fcvtzs x9, d4
+; CHECK-NEXT: mov z1.d, z6.d[3]
+; CHECK-NEXT: mov z3.d, z4.d[3]
+; CHECK-NEXT: mov z5.d, z2.d[3]
+; CHECK-NEXT: mov z16.d, z4.d[1]
+; CHECK-NEXT: mov z7.d, z0.d[3]
+; CHECK-NEXT: mov z17.d, z0.d[2]
+; CHECK-NEXT: mov z18.d, z4.d[2]
+; CHECK-NEXT: mov z19.d, z6.d[1]
+; CHECK-NEXT: fcvtzs d4, d4
+; CHECK-NEXT: fcvtzs x8, d1
+; CHECK-NEXT: mov z1.d, z2.d[1]
+; CHECK-NEXT: fcvtzs x9, d3
+; CHECK-NEXT: mov z3.d, z0.d[1]
; CHECK-NEXT: fcvtzs x10, d5
-; CHECK-NEXT: mov z4.d, z2.d[2]
-; CHECK-NEXT: mov z5.d, z0.d[2]
-; CHECK-NEXT: fcvtzs x8, d6
-; CHECK-NEXT: mov z2.d, z2.d[1]
-; CHECK-NEXT: mov z6.d, z0.d[3]
-; CHECK-NEXT: mov z1.d, z1.d[1]
-; CHECK-NEXT: mov z3.d, z3.d[1]
-; CHECK-NEXT: fcvtzs x15, d4
-; CHECK-NEXT: mov z4.d, z0.d[1]
-; CHECK-NEXT: fmov d0, x11
-; CHECK-NEXT: fcvtzs x16, d5
-; CHECK-NEXT: fcvtzs x11, d2
-; CHECK-NEXT: fmov d2, x13
-; CHECK-NEXT: fcvtzs x17, d7
-; CHECK-NEXT: fcvtzs x18, d16
-; CHECK-NEXT: fcvtzs x0, d3
-; CHECK-NEXT: fcvtzs x13, d4
-; CHECK-NEXT: fmov d4, x12
-; CHECK-NEXT: fcvtzs x12, d6
-; CHECK-NEXT: fmov d6, x14
-; CHECK-NEXT: fcvtzs x14, d1
-; CHECK-NEXT: fmov d3, x15
-; CHECK-NEXT: fmov d1, x16
-; CHECK-NEXT: fmov d5, x10
-; CHECK-NEXT: fmov d7, x9
-; CHECK-NEXT: mov v2.d[1], x11
-; CHECK-NEXT: mov v0.d[1], x13
-; CHECK-NEXT: mov v3.d[1], x8
-; CHECK-NEXT: mov v6.d[1], x0
-; CHECK-NEXT: mov v4.d[1], x14
-; CHECK-NEXT: mov v1.d[1], x12
-; CHECK-NEXT: mov v5.d[1], x18
-; CHECK-NEXT: mov v7.d[1], x17
+; CHECK-NEXT: mov z5.d, z6.d[2]
+; CHECK-NEXT: fcvtzs x12, d16
+; CHECK-NEXT: mov z16.d, z2.d[2]
+; CHECK-NEXT: fcvtzs x11, d7
+; CHECK-NEXT: fcvtzs x13, d1
+; CHECK-NEXT: fcvtzs d1, d17
+; CHECK-NEXT: fcvtzs d0, d0
+; CHECK-NEXT: fcvtzs x14, d3
+; CHECK-NEXT: fcvtzs d7, d5
+; CHECK-NEXT: fcvtzs d2, d2
+; CHECK-NEXT: fcvtzs d3, d16
+; CHECK-NEXT: fcvtzs d5, d18
+; CHECK-NEXT: fcvtzs x15, d19
+; CHECK-NEXT: fcvtzs d6, d6
+; CHECK-NEXT: mov v4.d[1], x12
+; CHECK-NEXT: mov v1.d[1], x11
+; CHECK-NEXT: mov v0.d[1], x14
+; CHECK-NEXT: mov v2.d[1], x13
+; CHECK-NEXT: mov v7.d[1], x8
+; CHECK-NEXT: mov v3.d[1], x10
+; CHECK-NEXT: mov v5.d[1], x9
+; CHECK-NEXT: mov v6.d[1], x15
; CHECK-NEXT: ret
%a = call <16 x i64> @llvm.llrint.v16i64.v16f64(<16 x double> %x)
ret <16 x i64> %a
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll b/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll
index 465ba38b17874..edf1027633906 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll
@@ -97,17 +97,17 @@ define <8 x iXLen> @lrint_v8f16(<8 x half> %x) nounwind {
; CHECK-i32-NEXT: mov h3, v2.h[7]
; CHECK-i32-NEXT: fcvtzs w12, h4
; CHECK-i32-NEXT: mov h2, v2.h[3]
-; CHECK-i32-NEXT: fcvtzs w13, h0
-; CHECK-i32-NEXT: fmov s0, w9
; CHECK-i32-NEXT: fmov s1, w8
-; CHECK-i32-NEXT: fcvtzs w8, h3
-; CHECK-i32-NEXT: fcvtzs w9, h2
+; CHECK-i32-NEXT: fcvtzs w8, h0
+; CHECK-i32-NEXT: fmov s0, w9
+; CHECK-i32-NEXT: fcvtzs w9, h3
; CHECK-i32-NEXT: mov v0.s[1], w11
; CHECK-i32-NEXT: mov v1.s[1], w10
-; CHECK-i32-NEXT: mov v0.s[2], w13
+; CHECK-i32-NEXT: fcvtzs w10, h2
+; CHECK-i32-NEXT: mov v0.s[2], w8
; CHECK-i32-NEXT: mov v1.s[2], w12
-; CHECK-i32-NEXT: mov v0.s[3], w9
-; CHECK-i32-NEXT: mov v1.s[3], w8
+; CHECK-i32-NEXT: mov v0.s[3], w10
+; CHECK-i32-NEXT: mov v1.s[3], w9
; CHECK-i32-NEXT: ret
;
; CHECK-i64-LABEL: lrint_v8f16:
@@ -115,28 +115,28 @@ define <8 x iXLen> @lrint_v8f16(<8 x half> %x) nounwind {
; CHECK-i64-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-i64-NEXT: frintx v0.4h, v0.4h
; CHECK-i64-NEXT: frintx v1.4h, v1.4h
-; CHECK-i64-NEXT: mov h4, v0.h[2]
-; CHECK-i64-NEXT: mov h2, v0.h[1]
-; CHECK-i64-NEXT: mov h7, v0.h[3]
+; CHECK-i64-NEXT: mov h3, v0.h[2]
+; CHECK-i64-NEXT: mov h4, v0.h[1]
+; CHECK-i64-NEXT: mov h5, v0.h[3]
; CHECK-i64-NEXT: fcvtzs x8, h0
-; CHECK-i64-NEXT: mov h3, v1.h[2]
-; CHECK-i64-NEXT: mov h5, v1.h[3]
-; CHECK-i64-NEXT: mov h6, v1.h[1]
-; CHECK-i64-NEXT: fcvtzs x11, h1
+; CHECK-i64-NEXT: mov h2, v1.h[2]
+; CHECK-i64-NEXT: mov h6, v1.h[3]
+; CHECK-i64-NEXT: mov h7, v1.h[1]
+; CHECK-i64-NEXT: fcvtzs x10, h1
+; CHECK-i64-NEXT: fcvtzs x11, h3
; CHECK-i64-NEXT: fcvtzs x12, h4
-; CHECK-i64-NEXT: fcvtzs x9, h2
-; CHECK-i64-NEXT: fcvtzs x15, h7
-; CHECK-i64-NEXT: fmov d0, x8
-; CHECK-i64-NEXT: fcvtzs x10, h3
; CHECK-i64-NEXT: fcvtzs x13, h5
+; CHECK-i64-NEXT: fmov d0, x8
+; CHECK-i64-NEXT: fcvtzs x9, h2
; CHECK-i64-NEXT: fcvtzs x14, h6
-; CHECK-i64-NEXT: fmov d1, x12
-; CHECK-i64-NEXT: fmov d2, x11
-; CHECK-i64-NEXT: mov v0.d[1], x9
-; CHECK-i64-NEXT: fmov d3, x10
-; CHECK-i64-NEXT: mov v1.d[1], x15
-; CHECK-i64-NEXT: mov v2.d[1], x14
-; CHECK-i64-NEXT: mov v3.d[1], x13
+; CHECK-i64-NEXT: fcvtzs x15, h7
+; CHECK-i64-NEXT: fmov d2, x10
+; CHECK-i64-NEXT: fmov d1, x11
+; CHECK-i64-NEXT: mov v0.d[1], x12
+; CHECK-i64-NEXT: fmov d3, x9
+; CHECK-i64-NEXT: mov v1.d[1], x13
+; CHECK-i64-NEXT: mov v2.d[1], x15
+; CHECK-i64-NEXT: mov v3.d[1], x14
; CHECK-i64-NEXT: ret
%a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f16(<8 x half> %x)
ret <8 x iXLen> %a
@@ -147,107 +147,107 @@ define <16 x iXLen> @lrint_v16f16(<16 x half> %x) nounwind {
; CHECK-i32-LABEL: lrint_v16f16:
; CHECK-i32: // %bb.0:
; CHECK-i32-NEXT: frintx v1.8h, v1.8h
-; CHECK-i32-NEXT: frintx v0.8h, v0.8h
-; CHECK-i32-NEXT: mov h3, v1.h[4]
+; CHECK-i32-NEXT: frintx v4.8h, v0.8h
+; CHECK-i32-NEXT: mov h0, v1.h[6]
; CHECK-i32-NEXT: mov h2, v1.h[5]
-; CHECK-i32-NEXT: mov h5, v0.h[4]
-; CHECK-i32-NEXT: mov h4, v1.h[1]
-; CHECK-i32-NEXT: mov h6, v0.h[1]
-; CHECK-i32-NEXT: fcvtzs w11, h0
-; CHECK-i32-NEXT: fcvtzs w14, h1
-; CHECK-i32-NEXT: mov h7, v1.h[6]
+; CHECK-i32-NEXT: mov h3, v1.h[4]
+; CHECK-i32-NEXT: mov h5, v4.h[4]
+; CHECK-i32-NEXT: mov h7, v4.h[1]
+; CHECK-i32-NEXT: fcvtzs w10, h1
+; CHECK-i32-NEXT: fcvtzs w13, h4
+; CHECK-i32-NEXT: mov h6, v1.h[2]
; CHECK-i32-NEXT: mov h16, v1.h[3]
-; CHECK-i32-NEXT: mov h17, v0.h[7]
-; CHECK-i32-NEXT: mov h18, v0.h[3]
-; CHECK-i32-NEXT: fcvtzs w9, h3
-; CHECK-i32-NEXT: mov h3, v0.h[5]
-; CHECK-i32-NEXT: fcvtzs w8, h2
-; CHECK-i32-NEXT: mov h2, v1.h[2]
+; CHECK-i32-NEXT: mov h17, v4.h[7]
+; CHECK-i32-NEXT: fcvtzs w8, h0
+; CHECK-i32-NEXT: mov h0, v1.h[1]
+; CHECK-i32-NEXT: fcvtzs w9, h2
+; CHECK-i32-NEXT: mov h2, v4.h[5]
+; CHECK-i32-NEXT: fcvtzs w11, h3
+; CHECK-i32-NEXT: mov h3, v4.h[6]
; CHECK-i32-NEXT: fcvtzs w12, h5
-; CHECK-i32-NEXT: fcvtzs w10, h4
-; CHECK-i32-NEXT: mov h4, v0.h[6]
-; CHECK-i32-NEXT: mov h5, v0.h[2]
-; CHECK-i32-NEXT: fcvtzs w13, h6
-; CHECK-i32-NEXT: mov h6, v1.h[7]
-; CHECK-i32-NEXT: fmov s0, w11
-; CHECK-i32-NEXT: fcvtzs w16, h7
-; CHECK-i32-NEXT: fcvtzs w15, h3
-; CHECK-i32-NEXT: fmov s3, w9
-; CHECK-i32-NEXT: fcvtzs w9, h16
-; CHECK-i32-NEXT: fcvtzs w17, h2
+; CHECK-i32-NEXT: mov h5, v4.h[2]
+; CHECK-i32-NEXT: fcvtzs w14, h7
+; CHECK-i32-NEXT: mov h7, v1.h[7]
+; CHECK-i32-NEXT: fcvtzs w17, h6
+; CHECK-i32-NEXT: mov h4, v4.h[3]
+; CHECK-i32-NEXT: fcvtzs w15, h0
+; CHECK-i32-NEXT: fmov s0, w13
+; CHECK-i32-NEXT: fcvtzs w16, h2
+; CHECK-i32-NEXT: fmov s2, w10
+; CHECK-i32-NEXT: fcvtzs w10, h3
+; CHECK-i32-NEXT: fmov s3, w11
; CHECK-i32-NEXT: fmov s1, w12
-; CHECK-i32-NEXT: fmov s2, w14
-; CHECK-i32-NEXT: fcvtzs w11, h4
; CHECK-i32-NEXT: fcvtzs w18, h5
-; CHECK-i32-NEXT: mov v0.s[1], w13
-; CHECK-i32-NEXT: mov v3.s[1], w8
-; CHECK-i32-NEXT: fcvtzs w8, h6
-; CHECK-i32-NEXT: fcvtzs w12, h18
-; CHECK-i32-NEXT: mov v1.s[1], w15
-; CHECK-i32-NEXT: mov v2.s[1], w10
-; CHECK-i32-NEXT: fcvtzs w10, h17
+; CHECK-i32-NEXT: mov v0.s[1], w14
+; CHECK-i32-NEXT: fcvtzs w11, h16
+; CHECK-i32-NEXT: fcvtzs w12, h17
+; CHECK-i32-NEXT: mov v2.s[1], w15
+; CHECK-i32-NEXT: fcvtzs w13, h4
+; CHECK-i32-NEXT: mov v1.s[1], w16
+; CHECK-i32-NEXT: mov v3.s[1], w9
+; CHECK-i32-NEXT: fcvtzs w9, h7
; CHECK-i32-NEXT: mov v0.s[2], w18
-; CHECK-i32-NEXT: mov v3.s[2], w16
-; CHECK-i32-NEXT: mov v1.s[2], w11
; CHECK-i32-NEXT: mov v2.s[2], w17
-; CHECK-i32-NEXT: mov v0.s[3], w12
-; CHECK-i32-NEXT: mov v3.s[3], w8
-; CHECK-i32-NEXT: mov v1.s[3], w10
-; CHECK-i32-NEXT: mov v2.s[3], w9
+; CHECK-i32-NEXT: mov v1.s[2], w10
+; CHECK-i32-NEXT: mov v3.s[2], w8
+; CHECK-i32-NEXT: mov v0.s[3], w13
+; CHECK-i32-NEXT: mov v2.s[3], w11
+; CHECK-i32-NEXT: mov v1.s[3], w12
+; CHECK-i32-NEXT: mov v3.s[3], w9
; CHECK-i32-NEXT: ret
;
; CHECK-i64-LABEL: lrint_v16f16:
; CHECK-i64: // %bb.0:
; CHECK-i64-NEXT: ext v2.16b, v1.16b, v1.16b, #8
+; CHECK-i64-NEXT: ext v3.16b, v0.16b, v0.16b, #8
+; CHECK-i64-NEXT: frintx v0.4h, v0.4h
; CHECK-i64-NEXT: frintx v1.4h, v1.4h
-; CHECK-i64-NEXT: frintx v3.4h, v0.4h
-; CHECK-i64-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-i64-NEXT: frintx v2.4h, v2.4h
+; CHECK-i64-NEXT: frintx v3.4h, v3.4h
+; CHECK-i64-NEXT: mov h5, v0.h[2]
; CHECK-i64-NEXT: mov h4, v1.h[2]
+; CHECK-i64-NEXT: mov h6, v0.h[1]
+; CHECK-i64-NEXT: fcvtzs x8, h1
+; CHECK-i64-NEXT: mov h16, v0.h[3]
+; CHECK-i64-NEXT: fcvtzs x9, h0
+; CHECK-i64-NEXT: mov h7, v1.h[1]
+; CHECK-i64-NEXT: mov h1, v1.h[3]
+; CHECK-i64-NEXT: mov h0, v2.h[3]
+; CHECK-i64-NEXT: mov h17, v2.h[2]
+; CHECK-i64-NEXT: fcvtzs x12, h5
; CHECK-i64-NEXT: mov h5, v3.h[2]
-; CHECK-i64-NEXT: frintx v0.4h, v0.4h
-; CHECK-i64-NEXT: mov h6, v3.h[1]
-; CHECK-i64-NEXT: fcvtzs x9, h3
-; CHECK-i64-NEXT: mov h16, v1.h[1]
-; CHECK-i64-NEXT: fcvtzs x12, h1
-; CHECK-i64-NEXT: mov h3, v3.h[3]
-; CHECK-i64-NEXT: mov h17, v1.h[3]
-; CHECK-i64-NEXT: mov h7, v2.h[3]
-; CHECK-i64-NEXT: fcvtzs x8, h4
-; CHECK-i64-NEXT: fcvtzs x10, h5
-; CHECK-i64-NEXT: mov h4, v2.h[2]
-; CHECK-i64-NEXT: mov h5, v0.h[2]
-; CHECK-i64-NEXT: fcvtzs x11, h6
-; CHECK-i64-NEXT: mov h6, v0.h[3]
-; CHECK-i64-NEXT: fcvtzs x15, h2
-; CHECK-i64-NEXT: mov h2, v2.h[1]
-; CHECK-i64-NEXT: fcvtzs x14, h0
-; CHECK-i64-NEXT: fcvtzs x17, h3
-; CHECK-i64-NEXT: fcvtzs x0, h17
-; CHECK-i64-NEXT: fcvtzs x13, h7
-; CHECK-i64-NEXT: mov h7, v0.h[1]
+; CHECK-i64-NEXT: fcvtzs x11, h2
+; CHECK-i64-NEXT: mov h18, v3.h[3]
+; CHECK-i64-NEXT: fcvtzs x14, h3
+; CHECK-i64-NEXT: mov h3, v3.h[1]
+; CHECK-i64-NEXT: mov h19, v2.h[1]
+; CHECK-i64-NEXT: fcvtzs x10, h4
+; CHECK-i64-NEXT: fmov d4, x8
+; CHECK-i64-NEXT: fcvtzs x13, h6
+; CHECK-i64-NEXT: fcvtzs x15, h0
+; CHECK-i64-NEXT: fcvtzs x8, h17
; CHECK-i64-NEXT: fmov d0, x9
-; CHECK-i64-NEXT: fcvtzs x16, h4
; CHECK-i64-NEXT: fcvtzs x9, h5
-; CHECK-i64-NEXT: fmov d4, x12
-; CHECK-i64-NEXT: fcvtzs x12, h16
-; CHECK-i64-NEXT: fmov d1, x10
-; CHECK-i64-NEXT: fcvtzs x10, h6
-; CHECK-i64-NEXT: fmov d5, x8
-; CHECK-i64-NEXT: fcvtzs x8, h2
+; CHECK-i64-NEXT: fcvtzs x16, h7
+; CHECK-i64-NEXT: fcvtzs x17, h16
+; CHECK-i64-NEXT: fmov d6, x11
+; CHECK-i64-NEXT: fcvtzs x11, h18
+; CHECK-i64-NEXT: fcvtzs x18, h3
; CHECK-i64-NEXT: fmov d2, x14
-; CHECK-i64-NEXT: fcvtzs x18, h7
-; CHECK-i64-NEXT: fmov d6, x15
-; CHECK-i64-NEXT: mov v0.d[1], x11
+; CHECK-i64-NEXT: fcvtzs x14, h19
+; CHECK-i64-NEXT: fcvtzs x0, h1
+; CHECK-i64-NEXT: fmov d5, x10
+; CHECK-i64-NEXT: fmov d1, x12
+; CHECK-i64-NEXT: fmov d7, x8
; CHECK-i64-NEXT: fmov d3, x9
-; CHECK-i64-NEXT: fmov d7, x16
+; CHECK-i64-NEXT: mov v0.d[1], x13
+; CHECK-i64-NEXT: mov v4.d[1], x16
+; CHECK-i64-NEXT: mov v2.d[1], x18
; CHECK-i64-NEXT: mov v1.d[1], x17
-; CHECK-i64-NEXT: mov v4.d[1], x12
; CHECK-i64-NEXT: mov v5.d[1], x0
-; CHECK-i64-NEXT: mov v6.d[1], x8
-; CHECK-i64-NEXT: mov v2.d[1], x18
-; CHECK-i64-NEXT: mov v3.d[1], x10
-; CHECK-i64-NEXT: mov v7.d[1], x13
+; CHECK-i64-NEXT: mov v6.d[1], x14
+; CHECK-i64-NEXT: mov v3.d[1], x11
+; CHECK-i64-NEXT: mov v7.d[1], x15
; CHECK-i64-NEXT: ret
%a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half> %x)
ret <16 x iXLen> %a
@@ -257,110 +257,104 @@ declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half>)
define <32 x iXLen> @lrint_v32f16(<32 x half> %x) nounwind {
; CHECK-i32-LABEL: lrint_v32f16:
; CHECK-i32: // %bb.0:
-; CHECK-i32-NEXT: stp x26, x25, [sp, #-64]! // 16-byte Folded Spill
+; CHECK-i32-NEXT: stp x20, x19, [sp, #-16]! // 16-byte Folded Spill
; CHECK-i32-NEXT: frintx v3.8h, v3.8h
; CHECK-i32-NEXT: frintx v2.8h, v2.8h
-; CHECK-i32-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
; CHECK-i32-NEXT: frintx v1.8h, v1.8h
-; CHECK-i32-NEXT: frintx v0.8h, v0.8h
-; CHECK-i32-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill
-; CHECK-i32-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill
+; CHECK-i32-NEXT: mov h16, v3.h[3]
+; CHECK-i32-NEXT: mov h17, v3.h[2]
; CHECK-i32-NEXT: mov h4, v3.h[7]
; CHECK-i32-NEXT: mov h5, v3.h[6]
; CHECK-i32-NEXT: mov h6, v3.h[5]
; CHECK-i32-NEXT: mov h7, v3.h[4]
-; CHECK-i32-NEXT: mov h16, v3.h[3]
-; CHECK-i32-NEXT: mov h17, v3.h[2]
; CHECK-i32-NEXT: mov h18, v3.h[1]
-; CHECK-i32-NEXT: mov h19, v2.h[7]
-; CHECK-i32-NEXT: fcvtzs w1, h3
-; CHECK-i32-NEXT: mov h3, v1.h[6]
-; CHECK-i32-NEXT: fcvtzs w7, h2
-; CHECK-i32-NEXT: fcvtzs w22, h0
+; CHECK-i32-NEXT: fcvtzs w13, h3
+; CHECK-i32-NEXT: mov h3, v2.h[7]
+; CHECK-i32-NEXT: mov h19, v2.h[4]
+; CHECK-i32-NEXT: fcvtzs w18, h2
+; CHECK-i32-NEXT: mov h20, v2.h[3]
+; CHECK-i32-NEXT: fcvtzs w9, h16
+; CHECK-i32-NEXT: fcvtzs w11, h17
+; CHECK-i32-NEXT: mov h16, v2.h[1]
+; CHECK-i32-NEXT: frintx v17.8h, v0.8h
; CHECK-i32-NEXT: fcvtzs w8, h4
; CHECK-i32-NEXT: mov h4, v2.h[6]
+; CHECK-i32-NEXT: mov h0, v1.h[6]
; CHECK-i32-NEXT: fcvtzs w10, h5
; CHECK-i32-NEXT: mov h5, v2.h[5]
+; CHECK-i32-NEXT: mov h21, v2.h[2]
+; CHECK-i32-NEXT: mov h2, v1.h[4]
+; CHECK-i32-NEXT: fcvtzs w15, h7
+; CHECK-i32-NEXT: fcvtzs w1, h16
; CHECK-i32-NEXT: fcvtzs w12, h6
-; CHECK-i32-NEXT: mov h6, v2.h[4]
-; CHECK-i32-NEXT: fcvtzs w13, h7
-; CHECK-i32-NEXT: mov h7, v2.h[3]
-; CHECK-i32-NEXT: fcvtzs w9, h16
-; CHECK-i32-NEXT: fcvtzs w11, h17
-; CHECK-i32-NEXT: mov h16, v2.h[2]
-; CHECK-i32-NEXT: mov h17, v2.h[1]
-; CHECK-i32-NEXT: fcvtzs w17, h4
-; CHECK-i32-NEXT: mov h4, v1.h[5]
-; CHECK-i32-NEXT: mov h2, v0.h[5]
+; CHECK-i32-NEXT: fcvtzs w17, h19
+; CHECK-i32-NEXT: mov h16, v17.h[4]
+; CHECK-i32-NEXT: fcvtzs w14, h18
+; CHECK-i32-NEXT: fmov s6, w13
+; CHECK-i32-NEXT: fcvtzs w13, h3
+; CHECK-i32-NEXT: fcvtzs w16, h4
+; CHECK-i32-NEXT: mov h3, v1.h[5]
+; CHECK-i32-NEXT: mov h18, v17.h[5]
+; CHECK-i32-NEXT: fmov s4, w18
+; CHECK-i32-NEXT: fcvtzs w18, h0
+; CHECK-i32-NEXT: mov h0, v17.h[1]
+; CHECK-i32-NEXT: mov h19, v1.h[1]
+; CHECK-i32-NEXT: fcvtzs w2, h2
+; CHECK-i32-NEXT: mov h2, v1.h[2]
+; CHECK-i32-NEXT: fcvtzs w4, h1
+; CHECK-i32-NEXT: fcvtzs w6, h16
+; CHECK-i32-NEXT: fcvtzs w7, h17
+; CHECK-i32-NEXT: fmov s7, w15
; CHECK-i32-NEXT: fcvtzs w0, h5
-; CHECK-i32-NEXT: fcvtzs w3, h6
-; CHECK-i32-NEXT: mov h5, v1.h[4]
-; CHECK-i32-NEXT: mov h6, v0.h[4]
-; CHECK-i32-NEXT: fcvtzs w16, h7
-; CHECK-i32-NEXT: mov h7, v0.h[1]
-; CHECK-i32-NEXT: fcvtzs w15, h18
-; CHECK-i32-NEXT: fcvtzs w2, h3
-; CHECK-i32-NEXT: mov h3, v1.h[2]
-; CHECK-i32-NEXT: fcvtzs w19, h4
-; CHECK-i32-NEXT: mov h4, v1.h[1]
-; CHECK-i32-NEXT: mov h18, v0.h[6]
-; CHECK-i32-NEXT: fcvtzs w20, h5
-; CHECK-i32-NEXT: fcvtzs w23, h2
-; CHECK-i32-NEXT: mov h2, v0.h[2]
-; CHECK-i32-NEXT: fcvtzs w21, h6
-; CHECK-i32-NEXT: fcvtzs w25, h1
-; CHECK-i32-NEXT: fcvtzs w4, h17
-; CHECK-i32-NEXT: fcvtzs w24, h7
-; CHECK-i32-NEXT: fcvtzs w14, h19
-; CHECK-i32-NEXT: fcvtzs w18, h16
-; CHECK-i32-NEXT: fcvtzs w26, h4
+; CHECK-i32-NEXT: fcvtzs w15, h20
+; CHECK-i32-NEXT: fcvtzs w3, h3
+; CHECK-i32-NEXT: mov h20, v17.h[6]
+; CHECK-i32-NEXT: fcvtzs w5, h18
+; CHECK-i32-NEXT: mov h18, v17.h[2]
+; CHECK-i32-NEXT: fcvtzs w19, h0
+; CHECK-i32-NEXT: fcvtzs w20, h19
+; CHECK-i32-NEXT: fmov s5, w17
+; CHECK-i32-NEXT: fcvtzs w17, h21
; CHECK-i32-NEXT: mov h16, v1.h[7]
-; CHECK-i32-NEXT: mov h17, v1.h[3]
-; CHECK-i32-NEXT: fcvtzs w5, h3
-; CHECK-i32-NEXT: mov h19, v0.h[7]
+; CHECK-i32-NEXT: fmov s3, w2
+; CHECK-i32-NEXT: mov h21, v1.h[3]
+; CHECK-i32-NEXT: fcvtzs w2, h2
+; CHECK-i32-NEXT: fmov s2, w4
+; CHECK-i32-NEXT: fmov s1, w6
+; CHECK-i32-NEXT: fmov s0, w7
+; CHECK-i32-NEXT: mov h19, v17.h[7]
+; CHECK-i32-NEXT: fcvtzs w4, h20
; CHECK-i32-NEXT: fcvtzs w6, h18
-; CHECK-i32-NEXT: mov h18, v0.h[3]
-; CHECK-i32-NEXT: fmov s0, w22
-; CHECK-i32-NEXT: fmov s1, w21
-; CHECK-i32-NEXT: fcvtzs w21, h2
-; CHECK-i32-NEXT: fmov s2, w25
-; CHECK-i32-NEXT: fmov s3, w20
-; CHECK-i32-NEXT: fmov s4, w7
-; CHECK-i32-NEXT: fmov s5, w3
-; CHECK-i32-NEXT: fmov s6, w1
-; CHECK-i32-NEXT: fmov s7, w13
-; CHECK-i32-NEXT: mov v0.s[1], w24
-; CHECK-i32-NEXT: mov v1.s[1], w23
-; CHECK-i32-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload
-; CHECK-i32-NEXT: mov v2.s[1], w26
-; CHECK-i32-NEXT: mov v3.s[1], w19
-; CHECK-i32-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-i32-NEXT: mov v4.s[1], w4
+; CHECK-i32-NEXT: mov h17, v17.h[3]
+; CHECK-i32-NEXT: mov v3.s[1], w3
+; CHECK-i32-NEXT: mov v1.s[1], w5
+; CHECK-i32-NEXT: mov v2.s[1], w20
+; CHECK-i32-NEXT: mov v4.s[1], w1
+; CHECK-i32-NEXT: mov v0.s[1], w19
; CHECK-i32-NEXT: mov v5.s[1], w0
-; CHECK-i32-NEXT: mov v6.s[1], w15
+; CHECK-i32-NEXT: mov v6.s[1], w14
; CHECK-i32-NEXT: mov v7.s[1], w12
; CHECK-i32-NEXT: fcvtzs w12, h16
-; CHECK-i32-NEXT: fcvtzs w13, h17
-; CHECK-i32-NEXT: fcvtzs w15, h19
-; CHECK-i32-NEXT: fcvtzs w0, h18
-; CHECK-i32-NEXT: mov v0.s[2], w21
-; CHECK-i32-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload
-; CHECK-i32-NEXT: mov v1.s[2], w6
-; CHECK-i32-NEXT: mov v2.s[2], w5
-; CHECK-i32-NEXT: mov v3.s[2], w2
-; CHECK-i32-NEXT: mov v4.s[2], w18
-; CHECK-i32-NEXT: mov v5.s[2], w17
+; CHECK-i32-NEXT: fcvtzs w14, h21
+; CHECK-i32-NEXT: fcvtzs w0, h19
+; CHECK-i32-NEXT: fcvtzs w1, h17
+; CHECK-i32-NEXT: mov v3.s[2], w18
+; CHECK-i32-NEXT: mov v1.s[2], w4
+; CHECK-i32-NEXT: mov v2.s[2], w2
+; CHECK-i32-NEXT: mov v4.s[2], w17
+; CHECK-i32-NEXT: mov v0.s[2], w6
+; CHECK-i32-NEXT: mov v5.s[2], w16
; CHECK-i32-NEXT: mov v6.s[2], w11
; CHECK-i32-NEXT: mov v7.s[2], w10
-; CHECK-i32-NEXT: mov v0.s[3], w0
-; CHECK-i32-NEXT: mov v1.s[3], w15
-; CHECK-i32-NEXT: mov v2.s[3], w13
; CHECK-i32-NEXT: mov v3.s[3], w12
-; CHECK-i32-NEXT: mov v4.s[3], w16
-; CHECK-i32-NEXT: mov v5.s[3], w14
+; CHECK-i32-NEXT: mov v1.s[3], w0
+; CHECK-i32-NEXT: mov v2.s[3], w14
+; CHECK-i32-NEXT: mov v4.s[3], w15
+; CHECK-i32-NEXT: mov v0.s[3], w1
+; CHECK-i32-NEXT: mov v5.s[3], w13
; CHECK-i32-NEXT: mov v6.s[3], w9
; CHECK-i32-NEXT: mov v7.s[3], w8
-; CHECK-i32-NEXT: ldp x26, x25, [sp], #64 // 16-byte Folded Reload
+; CHECK-i32-NEXT: ldp x20, x19, [sp], #16 // 16-byte Folded Reload
; CHECK-i32-NEXT: ret
;
; CHECK-i64-LABEL: lrint_v32f16:
@@ -567,54 +561,52 @@ define <8 x iXLen> @lrint_v8f32(<8 x float> %x) nounwind {
; CHECK-i32-NEXT: ptrue p0.s, vl8
; CHECK-i32-NEXT: movprfx z2, z0
; CHECK-i32-NEXT: frintx z2.s, p0/m, z0.s
-; CHECK-i32-NEXT: mov z0.s, z2.s[4]
-; CHECK-i32-NEXT: mov z1.s, z2.s[5]
+; CHECK-i32-NEXT: mov z0.s, z2.s[5]
+; CHECK-i32-NEXT: mov z1.s, z2.s[4]
; CHECK-i32-NEXT: mov z3.s, z2.s[1]
-; CHECK-i32-NEXT: fcvtzs w9, s2
+; CHECK-i32-NEXT: mov z4.s, z2.s[6]
+; CHECK-i32-NEXT: mov z5.s, z2.s[2]
; CHECK-i32-NEXT: fcvtzs w8, s0
-; CHECK-i32-NEXT: mov z0.s, z2.s[6]
-; CHECK-i32-NEXT: fcvtzs w10, s1
-; CHECK-i32-NEXT: mov z1.s, z2.s[2]
-; CHECK-i32-NEXT: fcvtzs w11, s3
+; CHECK-i32-NEXT: fcvtzs s1, s1
+; CHECK-i32-NEXT: fcvtzs w9, s3
+; CHECK-i32-NEXT: fcvtzs s0, s2
+; CHECK-i32-NEXT: fcvtzs w10, s4
+; CHECK-i32-NEXT: fcvtzs w11, s5
; CHECK-i32-NEXT: mov z3.s, z2.s[7]
; CHECK-i32-NEXT: mov z2.s, z2.s[3]
-; CHECK-i32-NEXT: fcvtzs w12, s0
-; CHECK-i32-NEXT: fmov s0, w9
-; CHECK-i32-NEXT: fcvtzs w13, s1
-; CHECK-i32-NEXT: fmov s1, w8
+; CHECK-i32-NEXT: mov v1.s[1], w8
+; CHECK-i32-NEXT: mov v0.s[1], w9
; CHECK-i32-NEXT: fcvtzs w8, s3
; CHECK-i32-NEXT: fcvtzs w9, s2
-; CHECK-i32-NEXT: mov v0.s[1], w11
-; CHECK-i32-NEXT: mov v1.s[1], w10
-; CHECK-i32-NEXT: mov v0.s[2], w13
-; CHECK-i32-NEXT: mov v1.s[2], w12
-; CHECK-i32-NEXT: mov v0.s[3], w9
+; CHECK-i32-NEXT: mov v1.s[2], w10
+; CHECK-i32-NEXT: mov v0.s[2], w11
; CHECK-i32-NEXT: mov v1.s[3], w8
+; CHECK-i32-NEXT: mov v0.s[3], w9
; CHECK-i32-NEXT: ret
;
; CHECK-i64-LABEL: lrint_v8f32:
; CHECK-i64: // %bb.0:
-; CHECK-i64-NEXT: frintx v0.4s, v0.4s
; CHECK-i64-NEXT: frintx v1.4s, v1.4s
-; CHECK-i64-NEXT: mov s3, v1.s[2]
-; CHECK-i64-NEXT: mov s4, v0.s[2]
-; CHECK-i64-NEXT: mov s2, v0.s[1]
+; CHECK-i64-NEXT: frintx v0.4s, v0.4s
+; CHECK-i64-NEXT: mov s2, v1.s[2]
+; CHECK-i64-NEXT: mov s3, v0.s[2]
+; CHECK-i64-NEXT: mov s4, v0.s[1]
; CHECK-i64-NEXT: mov s5, v1.s[3]
; CHECK-i64-NEXT: mov s6, v1.s[1]
; CHECK-i64-NEXT: mov s7, v0.s[3]
; CHECK-i64-NEXT: fcvtzs x8, s0
; CHECK-i64-NEXT: fcvtzs x10, s1
+; CHECK-i64-NEXT: fcvtzs x9, s2
; CHECK-i64-NEXT: fcvtzs x11, s3
; CHECK-i64-NEXT: fcvtzs x12, s4
-; CHECK-i64-NEXT: fcvtzs x9, s2
; CHECK-i64-NEXT: fcvtzs x13, s5
; CHECK-i64-NEXT: fcvtzs x14, s6
; CHECK-i64-NEXT: fcvtzs x15, s7
; CHECK-i64-NEXT: fmov d0, x8
; CHECK-i64-NEXT: fmov d2, x10
-; CHECK-i64-NEXT: fmov d1, x12
-; CHECK-i64-NEXT: fmov d3, x11
-; CHECK-i64-NEXT: mov v0.d[1], x9
+; CHECK-i64-NEXT: fmov d3, x9
+; CHECK-i64-NEXT: fmov d1, x11
+; CHECK-i64-NEXT: mov v0.d[1], x12
; CHECK-i64-NEXT: mov v2.d[1], x14
; CHECK-i64-NEXT: mov v1.d[1], x15
; CHECK-i64-NEXT: mov v3.d[1], x13
@@ -629,61 +621,58 @@ define <16 x iXLen> @lrint_v16f32(<16 x float> %x) nounwind {
; CHECK-i32: // %bb.0:
; CHECK-i32-NEXT: ptrue p0.d, vl2
; CHECK-i32-NEXT: // kill: def $q2 killed $q2 def $z2
-; CHECK-i32-NEXT: // kill: def $q3 killed $q3 def $z3
; CHECK-i32-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-i32-NEXT: // kill: def $q3 killed $q3 def $z3
; CHECK-i32-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-i32-NEXT: splice z2.d, p0, z2.d, z3.d
; CHECK-i32-NEXT: splice z0.d, p0, z0.d, z1.d
; CHECK-i32-NEXT: ptrue p0.s, vl8
-; CHECK-i32-NEXT: movprfx z1, z2
-; CHECK-i32-NEXT: frintx z1.s, p0/m, z2.s
-; CHECK-i32-NEXT: frintx z0.s, p0/m, z0.s
-; CHECK-i32-NEXT: mov z2.s, z1.s[5]
-; CHECK-i32-NEXT: mov z3.s, z1.s[4]
-; CHECK-i32-NEXT: mov z5.s, z0.s[5]
-; CHECK-i32-NEXT: mov z7.s, z0.s[1]
-; CHECK-i32-NEXT: fcvtzs w11, s0
-; CHECK-i32-NEXT: fcvtzs w13, s1
-; CHECK-i32-NEXT: mov z4.s, z1.s[7]
-; CHECK-i32-NEXT: mov z6.s, z1.s[6]
-; CHECK-i32-NEXT: mov z16.s, z0.s[7]
-; CHECK-i32-NEXT: fcvtzs w8, s2
-; CHECK-i32-NEXT: mov z2.s, z0.s[4]
-; CHECK-i32-NEXT: fcvtzs w9, s3
-; CHECK-i32-NEXT: mov z3.s, z1.s[1]
-; CHECK-i32-NEXT: fcvtzs w10, s5
-; CHECK-i32-NEXT: fcvtzs w12, s7
-; CHECK-i32-NEXT: mov z5.s, z0.s[6]
-; CHECK-i32-NEXT: mov z7.s, z1.s[2]
-; CHECK-i32-NEXT: mov z17.s, z1.s[3]
-; CHECK-i32-NEXT: fcvtzs w14, s2
-; CHECK-i32-NEXT: mov z2.s, z0.s[2]
-; CHECK-i32-NEXT: mov z18.s, z0.s[3]
-; CHECK-i32-NEXT: fcvtzs w15, s3
-; CHECK-i32-NEXT: fmov s0, w11
-; CHECK-i32-NEXT: fmov s3, w9
-; CHECK-i32-NEXT: fcvtzs w16, s6
-; CHECK-i32-NEXT: fcvtzs w17, s5
+; CHECK-i32-NEXT: movprfx z4, z2
+; CHECK-i32-NEXT: frintx z4.s, p0/m, z2.s
+; CHECK-i32-NEXT: movprfx z5, z0
+; CHECK-i32-NEXT: frintx z5.s, p0/m, z0.s
+; CHECK-i32-NEXT: mov z0.s, z4.s[5]
+; CHECK-i32-NEXT: mov z1.s, z5.s[5]
+; CHECK-i32-NEXT: mov z3.s, z4.s[4]
+; CHECK-i32-NEXT: mov z2.s, z4.s[1]
+; CHECK-i32-NEXT: mov z7.s, z5.s[1]
+; CHECK-i32-NEXT: mov z17.s, z5.s[4]
+; CHECK-i32-NEXT: mov z6.s, z4.s[6]
+; CHECK-i32-NEXT: mov z16.s, z5.s[6]
+; CHECK-i32-NEXT: mov z18.s, z4.s[2]
+; CHECK-i32-NEXT: fcvtzs w8, s0
+; CHECK-i32-NEXT: fcvtzs w9, s1
+; CHECK-i32-NEXT: fcvtzs s0, s5
+; CHECK-i32-NEXT: fcvtzs w10, s2
; CHECK-i32-NEXT: fcvtzs w11, s7
-; CHECK-i32-NEXT: fcvtzs w18, s2
-; CHECK-i32-NEXT: fmov s2, w13
-; CHECK-i32-NEXT: fcvtzs w9, s16
-; CHECK-i32-NEXT: fmov s1, w14
-; CHECK-i32-NEXT: mov v0.s[1], w12
+; CHECK-i32-NEXT: fcvtzs s2, s4
+; CHECK-i32-NEXT: fcvtzs s3, s3
+; CHECK-i32-NEXT: fcvtzs s1, s17
+; CHECK-i32-NEXT: mov z19.s, z5.s[2]
+; CHECK-i32-NEXT: fcvtzs w12, s6
+; CHECK-i32-NEXT: fcvtzs w13, s16
+; CHECK-i32-NEXT: fcvtzs w14, s18
+; CHECK-i32-NEXT: mov z6.s, z4.s[7]
+; CHECK-i32-NEXT: mov z7.s, z5.s[7]
+; CHECK-i32-NEXT: mov z4.s, z4.s[3]
+; CHECK-i32-NEXT: fcvtzs w15, s19
+; CHECK-i32-NEXT: mov v0.s[1], w11
+; CHECK-i32-NEXT: mov v2.s[1], w10
+; CHECK-i32-NEXT: mov v1.s[1], w9
; CHECK-i32-NEXT: mov v3.s[1], w8
-; CHECK-i32-NEXT: fcvtzs w8, s4
-; CHECK-i32-NEXT: fcvtzs w12, s18
-; CHECK-i32-NEXT: mov v2.s[1], w15
-; CHECK-i32-NEXT: mov v1.s[1], w10
-; CHECK-i32-NEXT: fcvtzs w10, s17
-; CHECK-i32-NEXT: mov v0.s[2], w18
-; CHECK-i32-NEXT: mov v3.s[2], w16
-; CHECK-i32-NEXT: mov v2.s[2], w11
-; CHECK-i32-NEXT: mov v1.s[2], w17
-; CHECK-i32-NEXT: mov v0.s[3], w12
-; CHECK-i32-NEXT: mov v3.s[3], w8
+; CHECK-i32-NEXT: mov z5.s, z5.s[3]
+; CHECK-i32-NEXT: fcvtzs w8, s6
+; CHECK-i32-NEXT: fcvtzs w9, s7
+; CHECK-i32-NEXT: fcvtzs w10, s4
+; CHECK-i32-NEXT: fcvtzs w11, s5
+; CHECK-i32-NEXT: mov v0.s[2], w15
+; CHECK-i32-NEXT: mov v2.s[2], w14
+; CHECK-i32-NEXT: mov v1.s[2], w13
+; CHECK-i32-NEXT: mov v3.s[2], w12
+; CHECK-i32-NEXT: mov v0.s[3], w11
; CHECK-i32-NEXT: mov v2.s[3], w10
; CHECK-i32-NEXT: mov v1.s[3], w9
+; CHECK-i32-NEXT: mov v3.s[3], w8
; CHECK-i32-NEXT: ret
;
; CHECK-i64-LABEL: lrint_v16f32:
@@ -694,48 +683,48 @@ define <16 x iXLen> @lrint_v16f32(<16 x float> %x) nounwind {
; CHECK-i64-NEXT: frintx v0.4s, v0.4s
; CHECK-i64-NEXT: mov s4, v3.s[2]
; CHECK-i64-NEXT: mov s5, v2.s[2]
-; CHECK-i64-NEXT: mov s6, v1.s[2]
-; CHECK-i64-NEXT: mov s7, v0.s[2]
-; CHECK-i64-NEXT: fcvtzs x10, s1
-; CHECK-i64-NEXT: fcvtzs x11, s0
-; CHECK-i64-NEXT: mov s16, v0.s[1]
-; CHECK-i64-NEXT: mov s17, v1.s[1]
-; CHECK-i64-NEXT: mov s18, v3.s[1]
-; CHECK-i64-NEXT: fcvtzs x14, s3
-; CHECK-i64-NEXT: fcvtzs x16, s2
-; CHECK-i64-NEXT: fcvtzs x8, s4
-; CHECK-i64-NEXT: mov s4, v2.s[1]
-; CHECK-i64-NEXT: fcvtzs x9, s5
-; CHECK-i64-NEXT: mov s5, v1.s[3]
-; CHECK-i64-NEXT: fcvtzs x12, s6
-; CHECK-i64-NEXT: mov s6, v0.s[3]
-; CHECK-i64-NEXT: fcvtzs x13, s7
-; CHECK-i64-NEXT: mov s7, v3.s[3]
-; CHECK-i64-NEXT: fmov d0, x11
-; CHECK-i64-NEXT: fcvtzs x17, s16
-; CHECK-i64-NEXT: fcvtzs x18, s18
-; CHECK-i64-NEXT: fcvtzs x15, s4
-; CHECK-i64-NEXT: mov s4, v2.s[3]
-; CHECK-i64-NEXT: fmov d2, x10
+; CHECK-i64-NEXT: mov s6, v2.s[1]
+; CHECK-i64-NEXT: mov s7, v1.s[2]
+; CHECK-i64-NEXT: fcvtzs x8, s3
+; CHECK-i64-NEXT: mov s16, v0.s[2]
+; CHECK-i64-NEXT: fcvtzs x9, s2
+; CHECK-i64-NEXT: mov s17, v1.s[3]
+; CHECK-i64-NEXT: mov s18, v0.s[1]
+; CHECK-i64-NEXT: mov s19, v3.s[3]
+; CHECK-i64-NEXT: fcvtzs x14, s1
+; CHECK-i64-NEXT: mov s1, v1.s[1]
+; CHECK-i64-NEXT: fcvtzs x10, s4
; CHECK-i64-NEXT: fcvtzs x11, s5
-; CHECK-i64-NEXT: fcvtzs x10, s6
-; CHECK-i64-NEXT: fmov d3, x12
-; CHECK-i64-NEXT: fmov d1, x13
-; CHECK-i64-NEXT: fcvtzs x12, s17
+; CHECK-i64-NEXT: mov s5, v0.s[3]
+; CHECK-i64-NEXT: mov s3, v3.s[1]
+; CHECK-i64-NEXT: mov s2, v2.s[3]
+; CHECK-i64-NEXT: fcvtzs x12, s6
; CHECK-i64-NEXT: fcvtzs x13, s7
-; CHECK-i64-NEXT: fmov d5, x9
-; CHECK-i64-NEXT: fmov d6, x14
-; CHECK-i64-NEXT: fmov d7, x8
-; CHECK-i64-NEXT: fcvtzs x0, s4
-; CHECK-i64-NEXT: fmov d4, x16
+; CHECK-i64-NEXT: fcvtzs x15, s16
+; CHECK-i64-NEXT: fmov d6, x8
+; CHECK-i64-NEXT: fcvtzs x8, s0
+; CHECK-i64-NEXT: fmov d4, x9
+; CHECK-i64-NEXT: fcvtzs x9, s17
+; CHECK-i64-NEXT: fcvtzs x16, s5
+; CHECK-i64-NEXT: fcvtzs x17, s18
+; CHECK-i64-NEXT: fmov d7, x10
+; CHECK-i64-NEXT: fmov d5, x11
+; CHECK-i64-NEXT: fcvtzs x10, s1
+; CHECK-i64-NEXT: fcvtzs x11, s19
+; CHECK-i64-NEXT: fcvtzs x18, s3
+; CHECK-i64-NEXT: fcvtzs x0, s2
+; CHECK-i64-NEXT: fmov d3, x13
+; CHECK-i64-NEXT: fmov d1, x15
+; CHECK-i64-NEXT: fmov d0, x8
+; CHECK-i64-NEXT: fmov d2, x14
+; CHECK-i64-NEXT: mov v4.d[1], x12
+; CHECK-i64-NEXT: mov v3.d[1], x9
+; CHECK-i64-NEXT: mov v7.d[1], x11
; CHECK-i64-NEXT: mov v0.d[1], x17
-; CHECK-i64-NEXT: mov v1.d[1], x10
-; CHECK-i64-NEXT: mov v3.d[1], x11
-; CHECK-i64-NEXT: mov v2.d[1], x12
-; CHECK-i64-NEXT: mov v6.d[1], x18
-; CHECK-i64-NEXT: mov v7.d[1], x13
-; CHECK-i64-NEXT: mov v4.d[1], x15
+; CHECK-i64-NEXT: mov v1.d[1], x16
+; CHECK-i64-NEXT: mov v2.d[1], x10
; CHECK-i64-NEXT: mov v5.d[1], x0
+; CHECK-i64-NEXT: mov v6.d[1], x18
; CHECK-i64-NEXT: ret
%a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float> %x)
ret <16 x iXLen> %a
@@ -745,128 +734,114 @@ declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float>)
define <32 x iXLen> @lrint_v32f32(<32 x float> %x) nounwind {
; CHECK-i32-LABEL: lrint_v32f32:
; CHECK-i32: // %bb.0:
-; CHECK-i32-NEXT: str x27, [sp, #-80]! // 8-byte Folded Spill
+; CHECK-i32-NEXT: str x19, [sp, #-16]! // 8-byte Folded Spill
; CHECK-i32-NEXT: ptrue p1.d, vl2
; CHECK-i32-NEXT: // kill: def $q6 killed $q6 def $z6
; CHECK-i32-NEXT: // kill: def $q7 killed $q7 def $z7
-; CHECK-i32-NEXT: // kill: def $q2 killed $q2 def $z2
; CHECK-i32-NEXT: // kill: def $q4 killed $q4 def $z4
-; CHECK-i32-NEXT: // kill: def $q3 killed $q3 def $z3
+; CHECK-i32-NEXT: // kill: def $q2 killed $q2 def $z2
+; CHECK-i32-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-i32-NEXT: // kill: def $q5 killed $q5 def $z5
+; CHECK-i32-NEXT: // kill: def $q3 killed $q3 def $z3
; CHECK-i32-NEXT: // kill: def $q1 killed $q1 def $z1
-; CHECK-i32-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-i32-NEXT: stp x24, x23, [sp, #32] // 16-byte Folded Spill
; CHECK-i32-NEXT: ptrue p0.s, vl8
-; CHECK-i32-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill
; CHECK-i32-NEXT: splice z6.d, p1, z6.d, z7.d
-; CHECK-i32-NEXT: splice z2.d, p1, z2.d, z3.d
; CHECK-i32-NEXT: splice z4.d, p1, z4.d, z5.d
+; CHECK-i32-NEXT: splice z2.d, p1, z2.d, z3.d
; CHECK-i32-NEXT: splice z0.d, p1, z0.d, z1.d
-; CHECK-i32-NEXT: stp x26, x25, [sp, #16] // 16-byte Folded Spill
-; CHECK-i32-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill
-; CHECK-i32-NEXT: movprfx z3, z6
-; CHECK-i32-NEXT: frintx z3.s, p0/m, z6.s
-; CHECK-i32-NEXT: frintx z2.s, p0/m, z2.s
-; CHECK-i32-NEXT: movprfx z1, z4
-; CHECK-i32-NEXT: frintx z1.s, p0/m, z4.s
-; CHECK-i32-NEXT: frintx z0.s, p0/m, z0.s
-; CHECK-i32-NEXT: mov z4.s, z3.s[7]
-; CHECK-i32-NEXT: mov z5.s, z3.s[6]
-; CHECK-i32-NEXT: mov z6.s, z3.s[5]
-; CHECK-i32-NEXT: mov z16.s, z1.s[7]
-; CHECK-i32-NEXT: mov z7.s, z3.s[4]
-; CHECK-i32-NEXT: mov z17.s, z1.s[6]
-; CHECK-i32-NEXT: mov z18.s, z1.s[5]
-; CHECK-i32-NEXT: mov z19.s, z1.s[4]
-; CHECK-i32-NEXT: fcvtzs w7, s3
-; CHECK-i32-NEXT: fcvtzs w8, s4
-; CHECK-i32-NEXT: mov z4.s, z2.s[7]
-; CHECK-i32-NEXT: fcvtzs w10, s5
-; CHECK-i32-NEXT: mov z5.s, z2.s[6]
-; CHECK-i32-NEXT: fcvtzs w13, s6
-; CHECK-i32-NEXT: fcvtzs w9, s16
-; CHECK-i32-NEXT: mov z6.s, z2.s[4]
-; CHECK-i32-NEXT: mov z16.s, z0.s[6]
-; CHECK-i32-NEXT: fcvtzs w14, s7
-; CHECK-i32-NEXT: fcvtzs w11, s4
-; CHECK-i32-NEXT: mov z4.s, z2.s[5]
-; CHECK-i32-NEXT: mov z7.s, z0.s[7]
-; CHECK-i32-NEXT: fcvtzs w16, s5
-; CHECK-i32-NEXT: mov z5.s, z0.s[4]
-; CHECK-i32-NEXT: fcvtzs w12, s17
-; CHECK-i32-NEXT: fcvtzs w15, s18
-; CHECK-i32-NEXT: fcvtzs w17, s19
-; CHECK-i32-NEXT: mov z17.s, z0.s[5]
-; CHECK-i32-NEXT: fcvtzs w3, s4
-; CHECK-i32-NEXT: mov z4.s, z3.s[1]
-; CHECK-i32-NEXT: mov z18.s, z3.s[2]
-; CHECK-i32-NEXT: fcvtzs w4, s6
-; CHECK-i32-NEXT: fcvtzs w0, s16
-; CHECK-i32-NEXT: fcvtzs w6, s5
-; CHECK-i32-NEXT: mov z16.s, z3.s[3]
-; CHECK-i32-NEXT: mov z3.s, z0.s[1]
-; CHECK-i32-NEXT: mov z5.s, z1.s[1]
-; CHECK-i32-NEXT: mov z6.s, z2.s[1]
-; CHECK-i32-NEXT: fcvtzs w21, s1
-; CHECK-i32-NEXT: fcvtzs w22, s0
-; CHECK-i32-NEXT: fcvtzs w23, s2
-; CHECK-i32-NEXT: fcvtzs w18, s7
-; CHECK-i32-NEXT: fcvtzs w2, s4
-; CHECK-i32-NEXT: mov z4.s, z1.s[2]
-; CHECK-i32-NEXT: mov z7.s, z2.s[2]
-; CHECK-i32-NEXT: fcvtzs w5, s17
-; CHECK-i32-NEXT: fcvtzs w24, s3
-; CHECK-i32-NEXT: fcvtzs w25, s5
-; CHECK-i32-NEXT: fcvtzs w26, s6
-; CHECK-i32-NEXT: fcvtzs w1, s18
-; CHECK-i32-NEXT: mov z18.s, z0.s[2]
-; CHECK-i32-NEXT: mov z17.s, z1.s[3]
-; CHECK-i32-NEXT: fcvtzs w19, s4
-; CHECK-i32-NEXT: mov z19.s, z2.s[3]
-; CHECK-i32-NEXT: fcvtzs w20, s7
-; CHECK-i32-NEXT: mov z20.s, z0.s[3]
-; CHECK-i32-NEXT: fmov s0, w22
-; CHECK-i32-NEXT: fmov s2, w23
-; CHECK-i32-NEXT: fmov s4, w21
-; CHECK-i32-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload
-; CHECK-i32-NEXT: fmov s1, w6
-; CHECK-i32-NEXT: fmov s6, w7
-; CHECK-i32-NEXT: fmov s3, w4
-; CHECK-i32-NEXT: fmov s5, w17
-; CHECK-i32-NEXT: fmov s7, w14
-; CHECK-i32-NEXT: fcvtzs w27, s18
-; CHECK-i32-NEXT: mov v0.s[1], w24
-; CHECK-i32-NEXT: ldp x24, x23, [sp, #32] // 16-byte Folded Reload
-; CHECK-i32-NEXT: mov v2.s[1], w26
-; CHECK-i32-NEXT: mov v4.s[1], w25
-; CHECK-i32-NEXT: mov v1.s[1], w5
-; CHECK-i32-NEXT: ldp x26, x25, [sp, #16] // 16-byte Folded Reload
-; CHECK-i32-NEXT: mov v3.s[1], w3
-; CHECK-i32-NEXT: mov v6.s[1], w2
-; CHECK-i32-NEXT: mov v5.s[1], w15
+; CHECK-i32-NEXT: movprfx z16, z6
+; CHECK-i32-NEXT: frintx z16.s, p0/m, z6.s
+; CHECK-i32-NEXT: movprfx z17, z4
+; CHECK-i32-NEXT: frintx z17.s, p0/m, z4.s
+; CHECK-i32-NEXT: movprfx z18, z2
+; CHECK-i32-NEXT: frintx z18.s, p0/m, z2.s
+; CHECK-i32-NEXT: movprfx z19, z0
+; CHECK-i32-NEXT: frintx z19.s, p0/m, z0.s
+; CHECK-i32-NEXT: mov z0.s, z16.s[7]
+; CHECK-i32-NEXT: mov z2.s, z16.s[5]
+; CHECK-i32-NEXT: mov z3.s, z16.s[4]
+; CHECK-i32-NEXT: mov z1.s, z16.s[6]
+; CHECK-i32-NEXT: mov z4.s, z17.s[7]
+; CHECK-i32-NEXT: mov z6.s, z17.s[6]
+; CHECK-i32-NEXT: mov z20.s, z17.s[5]
+; CHECK-i32-NEXT: mov z5.s, z17.s[4]
+; CHECK-i32-NEXT: mov z21.s, z19.s[1]
+; CHECK-i32-NEXT: fcvtzs w8, s0
+; CHECK-i32-NEXT: mov z0.s, z18.s[7]
+; CHECK-i32-NEXT: fcvtzs w13, s2
+; CHECK-i32-NEXT: mov z2.s, z18.s[5]
+; CHECK-i32-NEXT: fcvtzs s7, s3
+; CHECK-i32-NEXT: mov z3.s, z19.s[7]
+; CHECK-i32-NEXT: fcvtzs w10, s1
+; CHECK-i32-NEXT: mov z1.s, z18.s[6]
+; CHECK-i32-NEXT: fcvtzs w9, s4
+; CHECK-i32-NEXT: fcvtzs w12, s6
+; CHECK-i32-NEXT: fcvtzs w11, s0
+; CHECK-i32-NEXT: mov z0.s, z19.s[6]
+; CHECK-i32-NEXT: mov z4.s, z19.s[5]
+; CHECK-i32-NEXT: fcvtzs w18, s2
+; CHECK-i32-NEXT: mov z2.s, z18.s[4]
+; CHECK-i32-NEXT: fcvtzs w15, s3
+; CHECK-i32-NEXT: mov z3.s, z16.s[1]
+; CHECK-i32-NEXT: mov z6.s, z17.s[1]
+; CHECK-i32-NEXT: fcvtzs w14, s1
+; CHECK-i32-NEXT: mov z1.s, z16.s[2]
+; CHECK-i32-NEXT: fcvtzs w17, s0
+; CHECK-i32-NEXT: fcvtzs w1, s4
+; CHECK-i32-NEXT: mov z0.s, z17.s[2]
+; CHECK-i32-NEXT: mov z4.s, z19.s[4]
+; CHECK-i32-NEXT: fcvtzs w2, s3
+; CHECK-i32-NEXT: fcvtzs s3, s2
+; CHECK-i32-NEXT: mov z2.s, z18.s[1]
+; CHECK-i32-NEXT: fcvtzs w6, s6
+; CHECK-i32-NEXT: mov z6.s, z19.s[2]
+; CHECK-i32-NEXT: fcvtzs w16, s20
+; CHECK-i32-NEXT: fcvtzs w0, s1
+; CHECK-i32-NEXT: fcvtzs w3, s0
+; CHECK-i32-NEXT: fcvtzs s1, s4
+; CHECK-i32-NEXT: fcvtzs w7, s21
+; CHECK-i32-NEXT: fcvtzs s0, s19
+; CHECK-i32-NEXT: fcvtzs s4, s17
+; CHECK-i32-NEXT: fcvtzs w19, s2
+; CHECK-i32-NEXT: fcvtzs s2, s18
+; CHECK-i32-NEXT: fcvtzs s5, s5
+; CHECK-i32-NEXT: fcvtzs w5, s6
+; CHECK-i32-NEXT: fcvtzs s6, s16
+; CHECK-i32-NEXT: mov z20.s, z18.s[2]
+; CHECK-i32-NEXT: mov v1.s[1], w1
+; CHECK-i32-NEXT: mov v3.s[1], w18
; CHECK-i32-NEXT: mov v7.s[1], w13
+; CHECK-i32-NEXT: mov v0.s[1], w7
+; CHECK-i32-NEXT: mov v4.s[1], w6
+; CHECK-i32-NEXT: mov z16.s, z16.s[3]
+; CHECK-i32-NEXT: fcvtzs w4, s20
+; CHECK-i32-NEXT: mov v2.s[1], w19
+; CHECK-i32-NEXT: mov v5.s[1], w16
+; CHECK-i32-NEXT: mov v6.s[1], w2
+; CHECK-i32-NEXT: mov z17.s, z17.s[3]
+; CHECK-i32-NEXT: mov z18.s, z18.s[3]
+; CHECK-i32-NEXT: mov z19.s, z19.s[3]
; CHECK-i32-NEXT: fcvtzs w13, s16
-; CHECK-i32-NEXT: fcvtzs w14, s17
-; CHECK-i32-NEXT: fcvtzs w15, s19
-; CHECK-i32-NEXT: fcvtzs w17, s20
-; CHECK-i32-NEXT: mov v0.s[2], w27
-; CHECK-i32-NEXT: mov v1.s[2], w0
-; CHECK-i32-NEXT: mov v2.s[2], w20
-; CHECK-i32-NEXT: mov v4.s[2], w19
-; CHECK-i32-NEXT: mov v3.s[2], w16
-; CHECK-i32-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
-; CHECK-i32-NEXT: mov v6.s[2], w1
+; CHECK-i32-NEXT: mov v1.s[2], w17
+; CHECK-i32-NEXT: mov v0.s[2], w5
+; CHECK-i32-NEXT: mov v4.s[2], w3
+; CHECK-i32-NEXT: mov v3.s[2], w14
+; CHECK-i32-NEXT: fcvtzs w16, s17
+; CHECK-i32-NEXT: fcvtzs w18, s18
+; CHECK-i32-NEXT: mov v2.s[2], w4
+; CHECK-i32-NEXT: fcvtzs w1, s19
+; CHECK-i32-NEXT: mov v6.s[2], w0
; CHECK-i32-NEXT: mov v5.s[2], w12
; CHECK-i32-NEXT: mov v7.s[2], w10
-; CHECK-i32-NEXT: mov v0.s[3], w17
-; CHECK-i32-NEXT: mov v1.s[3], w18
-; CHECK-i32-NEXT: mov v2.s[3], w15
-; CHECK-i32-NEXT: mov v4.s[3], w14
+; CHECK-i32-NEXT: mov v1.s[3], w15
; CHECK-i32-NEXT: mov v3.s[3], w11
+; CHECK-i32-NEXT: mov v2.s[3], w18
+; CHECK-i32-NEXT: mov v4.s[3], w16
+; CHECK-i32-NEXT: mov v0.s[3], w1
; CHECK-i32-NEXT: mov v6.s[3], w13
; CHECK-i32-NEXT: mov v5.s[3], w9
; CHECK-i32-NEXT: mov v7.s[3], w8
-; CHECK-i32-NEXT: ldr x27, [sp], #80 // 8-byte Folded Reload
+; CHECK-i32-NEXT: ldr x19, [sp], #16 // 8-byte Folded Reload
; CHECK-i32-NEXT: ret
;
; CHECK-i64-LABEL: lrint_v32f32:
@@ -1006,8 +981,7 @@ define <1 x iXLen> @lrint_v1f64(<1 x double> %x) nounwind {
; CHECK-i64-LABEL: lrint_v1f64:
; CHECK-i64: // %bb.0:
; CHECK-i64-NEXT: frintx d0, d0
-; CHECK-i64-NEXT: fcvtzs x8, d0
-; CHECK-i64-NEXT: fmov d0, x8
+; CHECK-i64-NEXT: fcvtzs d0, d0
; CHECK-i64-NEXT: ret
%a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double> %x)
ret <1 x iXLen> %a
@@ -1067,17 +1041,15 @@ define <4 x iXLen> @lrint_v4f64(<4 x double> %x) nounwind {
; CHECK-i64-NEXT: splice z0.d, p0, z0.d, z1.d
; CHECK-i64-NEXT: ptrue p0.d, vl4
; CHECK-i64-NEXT: frintx z0.d, p0/m, z0.d
-; CHECK-i64-NEXT: mov z1.d, z0.d[2]
-; CHECK-i64-NEXT: mov z2.d, z0.d[3]
+; CHECK-i64-NEXT: mov z1.d, z0.d[3]
+; CHECK-i64-NEXT: mov z2.d, z0.d[2]
; CHECK-i64-NEXT: mov z3.d, z0.d[1]
-; CHECK-i64-NEXT: fcvtzs x9, d0
+; CHECK-i64-NEXT: fcvtzs d0, d0
; CHECK-i64-NEXT: fcvtzs x8, d1
-; CHECK-i64-NEXT: fcvtzs x10, d2
-; CHECK-i64-NEXT: fcvtzs x11, d3
-; CHECK-i64-NEXT: fmov d0, x9
-; CHECK-i64-NEXT: fmov d1, x8
-; CHECK-i64-NEXT: mov v0.d[1], x11
-; CHECK-i64-NEXT: mov v1.d[1], x10
+; CHECK-i64-NEXT: fcvtzs d1, d2
+; CHECK-i64-NEXT: fcvtzs x9, d3
+; CHECK-i64-NEXT: mov v0.d[1], x9
+; CHECK-i64-NEXT: mov v1.d[1], x8
; CHECK-i64-NEXT: ret
%a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f64(<4 x double> %x)
ret <4 x iXLen> %a
@@ -1132,31 +1104,27 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) nounwind {
; CHECK-i64-NEXT: splice z0.d, p0, z0.d, z1.d
; CHECK-i64-NEXT: splice z2.d, p0, z2.d, z3.d
; CHECK-i64-NEXT: ptrue p0.d, vl4
-; CHECK-i64-NEXT: frintx z0.d, p0/m, z0.d
; CHECK-i64-NEXT: movprfx z1, z2
; CHECK-i64-NEXT: frintx z1.d, p0/m, z2.d
-; CHECK-i64-NEXT: mov z4.d, z1.d[2]
-; CHECK-i64-NEXT: mov z5.d, z0.d[2]
-; CHECK-i64-NEXT: mov z2.d, z0.d[1]
-; CHECK-i64-NEXT: mov z3.d, z1.d[3]
-; CHECK-i64-NEXT: mov z6.d, z0.d[3]
-; CHECK-i64-NEXT: fcvtzs x8, d0
-; CHECK-i64-NEXT: mov z0.d, z1.d[1]
-; CHECK-i64-NEXT: fcvtzs x10, d1
-; CHECK-i64-NEXT: fcvtzs x11, d4
-; CHECK-i64-NEXT: fcvtzs x12, d5
-; CHECK-i64-NEXT: fcvtzs x9, d2
-; CHECK-i64-NEXT: fcvtzs x13, d3
-; CHECK-i64-NEXT: fcvtzs x14, d6
-; CHECK-i64-NEXT: fcvtzs x15, d0
-; CHECK-i64-NEXT: fmov d0, x8
-; CHECK-i64-NEXT: fmov d2, x10
-; CHECK-i64-NEXT: fmov d1, x12
-; CHECK-i64-NEXT: fmov d3, x11
-; CHECK-i64-NEXT: mov v0.d[1], x9
-; CHECK-i64-NEXT: mov v2.d[1], x15
-; CHECK-i64-NEXT: mov v1.d[1], x14
-; CHECK-i64-NEXT: mov v3.d[1], x13
+; CHECK-i64-NEXT: frintx z0.d, p0/m, z0.d
+; CHECK-i64-NEXT: mov z2.d, z1.d[3]
+; CHECK-i64-NEXT: mov z3.d, z0.d[3]
+; CHECK-i64-NEXT: mov z4.d, z0.d[1]
+; CHECK-i64-NEXT: mov z5.d, z1.d[2]
+; CHECK-i64-NEXT: mov z6.d, z0.d[2]
+; CHECK-i64-NEXT: mov z7.d, z1.d[1]
+; CHECK-i64-NEXT: fcvtzs d0, d0
+; CHECK-i64-NEXT: fcvtzs x8, d2
+; CHECK-i64-NEXT: fcvtzs x9, d3
+; CHECK-i64-NEXT: fcvtzs x10, d4
+; CHECK-i64-NEXT: fcvtzs d2, d1
+; CHECK-i64-NEXT: fcvtzs d3, d5
+; CHECK-i64-NEXT: fcvtzs d1, d6
+; CHECK-i64-NEXT: fcvtzs x11, d7
+; CHECK-i64-NEXT: mov v0.d[1], x10
+; CHECK-i64-NEXT: mov v1.d[1], x9
+; CHECK-i64-NEXT: mov v3.d[1], x8
+; CHECK-i64-NEXT: mov v2.d[1], x11
; CHECK-i64-NEXT: ret
%a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double> %x)
ret <8 x iXLen> %a
@@ -1234,70 +1202,60 @@ define <16 x iXLen> @lrint_v16f64(<16 x double> %x) nounwind {
;
; CHECK-i64-LABEL: lrint_v16f64:
; CHECK-i64: // %bb.0:
-; CHECK-i64-NEXT: ptrue p1.d, vl2
+; CHECK-i64-NEXT: ptrue p0.d, vl2
; CHECK-i64-NEXT: // kill: def $q6 killed $q6 def $z6
; CHECK-i64-NEXT: // kill: def $q4 killed $q4 def $z4
+; CHECK-i64-NEXT: // kill: def $q2 killed $q2 def $z2
; CHECK-i64-NEXT: // kill: def $q7 killed $q7 def $z7
; CHECK-i64-NEXT: // kill: def $q5 killed $q5 def $z5
-; CHECK-i64-NEXT: // kill: def $q2 killed $q2 def $z2
-; CHECK-i64-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-i64-NEXT: // kill: def $q3 killed $q3 def $z3
+; CHECK-i64-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-i64-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-i64-NEXT: splice z6.d, p0, z6.d, z7.d
+; CHECK-i64-NEXT: splice z2.d, p0, z2.d, z3.d
+; CHECK-i64-NEXT: splice z4.d, p0, z4.d, z5.d
+; CHECK-i64-NEXT: splice z0.d, p0, z0.d, z1.d
; CHECK-i64-NEXT: ptrue p0.d, vl4
-; CHECK-i64-NEXT: splice z6.d, p1, z6.d, z7.d
-; CHECK-i64-NEXT: splice z4.d, p1, z4.d, z5.d
-; CHECK-i64-NEXT: splice z2.d, p1, z2.d, z3.d
-; CHECK-i64-NEXT: splice z0.d, p1, z0.d, z1.d
-; CHECK-i64-NEXT: movprfx z3, z6
-; CHECK-i64-NEXT: frintx z3.d, p0/m, z6.d
-; CHECK-i64-NEXT: movprfx z1, z4
-; CHECK-i64-NEXT: frintx z1.d, p0/m, z4.d
+; CHECK-i64-NEXT: frintx z6.d, p0/m, z6.d
+; CHECK-i64-NEXT: frintx z4.d, p0/m, z4.d
; CHECK-i64-NEXT: frintx z2.d, p0/m, z2.d
; CHECK-i64-NEXT: frintx z0.d, p0/m, z0.d
-; CHECK-i64-NEXT: mov z4.d, z3.d[2]
-; CHECK-i64-NEXT: mov z5.d, z1.d[2]
-; CHECK-i64-NEXT: mov z6.d, z2.d[3]
-; CHECK-i64-NEXT: fcvtzs x11, d0
-; CHECK-i64-NEXT: fcvtzs x12, d1
-; CHECK-i64-NEXT: fcvtzs x13, d2
-; CHECK-i64-NEXT: fcvtzs x14, d3
-; CHECK-i64-NEXT: mov z7.d, z3.d[3]
-; CHECK-i64-NEXT: mov z16.d, z1.d[3]
-; CHECK-i64-NEXT: fcvtzs x9, d4
+; CHECK-i64-NEXT: mov z1.d, z6.d[3]
+; CHECK-i64-NEXT: mov z3.d, z4.d[3]
+; CHECK-i64-NEXT: mov z5.d, z2.d[3]
+; CHECK-i64-NEXT: mov z16.d, z4.d[1]
+; CHECK-i64-NEXT: mov z7.d, z0.d[3]
+; CHECK-i64-NEXT: mov z17.d, z0.d[2]
+; CHECK-i64-NEXT: mov z18.d, z4.d[2]
+; CHECK-i64-NEXT: mov z19.d, z6.d[1]
+; CHECK-i64-NEXT: fcvtzs d4, d4
+; CHECK-i64-NEXT: fcvtzs x8, d1
+; CHECK-i64-NEXT: mov z1.d, z2.d[1]
+; CHECK-i64-NEXT: fcvtzs x9, d3
+; CHECK-i64-NEXT: mov z3.d, z0.d[1]
; CHECK-i64-NEXT: fcvtzs x10, d5
-; CHECK-i64-NEXT: mov z4.d, z2.d[2]
-; CHECK-i64-NEXT: mov z5.d, z0.d[2]
-; CHECK-i64-NEXT: fcvtzs x8, d6
-; CHECK-i64-NEXT: mov z2.d, z2.d[1]
-; CHECK-i64-NEXT: mov z6.d, z0.d[3]
-; CHECK-i64-NEXT: mov z1.d, z1.d[1]
-; CHECK-i64-NEXT: mov z3.d, z3.d[1]
-; CHECK-i64-NEXT: fcvtzs x15, d4
-; CHECK-i64-NEXT: mov z4.d, z0.d[1]
-; CHECK-i64-NEXT: fmov d0, x11
-; CHECK-i64-NEXT: fcvtzs x16, d5
-; CHECK-i64-NEXT: fcvtzs x11, d2
-; CHECK-i64-NEXT: fmov d2, x13
-; CHECK-i64-NEXT: fcvtzs x17, d7
-; CHECK-i64-NEXT: fcvtzs x18, d16
-; CHECK-i64-NEXT: fcvtzs x0, d3
-; CHECK-i64-NEXT: fcvtzs x13, d4
-; CHECK-i64-NEXT: fmov d4, x12
-; CHECK-i64-NEXT: fcvtzs x12, d6
-; CHECK-i64-NEXT: fmov d6, x14
-; CHECK-i64-NEXT: fcvtzs x14, d1
-; CHECK-i64-NEXT: fmov d3, x15
-; CHECK-i64-NEXT: fmov d1, x16
-; CHECK-i64-NEXT: fmov d5, x10
-; CHECK-i64-NEXT: fmov d7, x9
-; CHECK-i64-NEXT: mov v2.d[1], x11
-; CHECK-i64-NEXT: mov v0.d[1], x13
-; CHECK-i64-NEXT: mov v3.d[1], x8
-; CHECK-i64-NEXT: mov v6.d[1], x0
-; CHECK-i64-NEXT: mov v4.d[1], x14
-; CHECK-i64-NEXT: mov v1.d[1], x12
-; CHECK-i64-NEXT: mov v5.d[1], x18
-; CHECK-i64-NEXT: mov v7.d[1], x17
+; CHECK-i64-NEXT: mov z5.d, z6.d[2]
+; CHECK-i64-NEXT: fcvtzs x12, d16
+; CHECK-i64-NEXT: mov z16.d, z2.d[2]
+; CHECK-i64-NEXT: fcvtzs x11, d7
+; CHECK-i64-NEXT: fcvtzs x13, d1
+; CHECK-i64-NEXT: fcvtzs d1, d17
+; CHECK-i64-NEXT: fcvtzs d0, d0
+; CHECK-i64-NEXT: fcvtzs x14, d3
+; CHECK-i64-NEXT: fcvtzs d7, d5
+; CHECK-i64-NEXT: fcvtzs d2, d2
+; CHECK-i64-NEXT: fcvtzs d3, d16
+; CHECK-i64-NEXT: fcvtzs d5, d18
+; CHECK-i64-NEXT: fcvtzs x15, d19
+; CHECK-i64-NEXT: fcvtzs d6, d6
+; CHECK-i64-NEXT: mov v4.d[1], x12
+; CHECK-i64-NEXT: mov v1.d[1], x11
+; CHECK-i64-NEXT: mov v0.d[1], x14
+; CHECK-i64-NEXT: mov v2.d[1], x13
+; CHECK-i64-NEXT: mov v7.d[1], x8
+; CHECK-i64-NEXT: mov v3.d[1], x10
+; CHECK-i64-NEXT: mov v5.d[1], x9
+; CHECK-i64-NEXT: mov v6.d[1], x15
; CHECK-i64-NEXT: ret
%a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f64(<16 x double> %x)
ret <16 x iXLen> %a
@@ -1723,13 +1681,13 @@ define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) nounwind {
; CHECK-i32-NEXT: sub sp, sp, #176
; CHECK-i32-NEXT: str q0, [sp, #96] // 16-byte Spill
; CHECK-i32-NEXT: mov v0.16b, v7.16b
-; CHECK-i32-NEXT: stp x30, x25, [sp, #112] // 16-byte Folded Spill
+; CHECK-i32-NEXT: str x30, [sp, #112] // 8-byte Spill
; CHECK-i32-NEXT: stp x24, x23, [sp, #128] // 16-byte Folded Spill
; CHECK-i32-NEXT: stp x22, x21, [sp, #144] // 16-byte Folded Spill
; CHECK-i32-NEXT: stp x20, x19, [sp, #160] // 16-byte Folded Spill
; CHECK-i32-NEXT: stp q6, q5, [sp] // 32-byte Folded Spill
-; CHECK-i32-NEXT: stp q4, q3, [sp, #32] // 32-byte Folded Spill
-; CHECK-i32-NEXT: stp q2, q1, [sp, #64] // 32-byte Folded Spill
+; CHECK-i32-NEXT: stp q3, q2, [sp, #32] // 32-byte Folded Spill
+; CHECK-i32-NEXT: stp q1, q4, [sp, #64] // 32-byte Folded Spill
; CHECK-i32-NEXT: bl lrintl
; CHECK-i32-NEXT: ldr q0, [sp] // 16-byte Reload
; CHECK-i32-NEXT: mov w19, w0
@@ -1749,21 +1707,22 @@ define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) nounwind {
; CHECK-i32-NEXT: ldr q0, [sp, #80] // 16-byte Reload
; CHECK-i32-NEXT: mov w24, w0
; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: fmov s1, w0
; CHECK-i32-NEXT: ldr q0, [sp, #96] // 16-byte Reload
-; CHECK-i32-NEXT: mov w25, w0
+; CHECK-i32-NEXT: str q1, [sp, #96] // 16-byte Spill
; CHECK-i32-NEXT: bl lrintl
-; CHECK-i32-NEXT: fmov s1, w22
; CHECK-i32-NEXT: fmov s0, w0
-; CHECK-i32-NEXT: mov v0.s[1], w25
+; CHECK-i32-NEXT: ldr q1, [sp, #96] // 16-byte Reload
+; CHECK-i32-NEXT: ldr x30, [sp, #112] // 8-byte Reload
; CHECK-i32-NEXT: mov v1.s[1], w21
-; CHECK-i32-NEXT: ldp x22, x21, [sp, #144] // 16-byte Folded Reload
-; CHECK-i32-NEXT: ldp x30, x25, [sp, #112] // 16-byte Folded Reload
-; CHECK-i32-NEXT: mov v0.s[2], w24
+; CHECK-i32-NEXT: mov v0.s[1], w24
; CHECK-i32-NEXT: mov v1.s[2], w20
-; CHECK-i32-NEXT: mov v0.s[3], w23
+; CHECK-i32-NEXT: mov v0.s[2], w23
+; CHECK-i32-NEXT: ldp x24, x23, [sp, #128] // 16-byte Folded Reload
; CHECK-i32-NEXT: mov v1.s[3], w19
; CHECK-i32-NEXT: ldp x20, x19, [sp, #160] // 16-byte Folded Reload
-; CHECK-i32-NEXT: ldp x24, x23, [sp, #128] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[3], w22
+; CHECK-i32-NEXT: ldp x22, x21, [sp, #144] // 16-byte Folded Reload
; CHECK-i32-NEXT: add sp, sp, #176
; CHECK-i32-NEXT: ret
;
@@ -1847,11 +1806,10 @@ define <16 x iXLen> @lrint_v16fp128(<16 x fp128> %x) nounwind {
; CHECK-i32-LABEL: lrint_v16fp128:
; CHECK-i32: // %bb.0:
; CHECK-i32-NEXT: sub sp, sp, #368
-; CHECK-i32-NEXT: stp q3, q0, [sp, #144] // 32-byte Folded Spill
-; CHECK-i32-NEXT: stp q2, q1, [sp, #176] // 32-byte Folded Spill
+; CHECK-i32-NEXT: stp q2, q1, [sp, #128] // 32-byte Folded Spill
; CHECK-i32-NEXT: ldr q1, [sp, #368]
; CHECK-i32-NEXT: stp x29, x30, [sp, #272] // 16-byte Folded Spill
-; CHECK-i32-NEXT: str q1, [sp, #64] // 16-byte Spill
+; CHECK-i32-NEXT: str q1, [sp, #160] // 16-byte Spill
; CHECK-i32-NEXT: ldr q1, [sp, #384]
; CHECK-i32-NEXT: stp x28, x27, [sp, #288] // 16-byte Folded Spill
; CHECK-i32-NEXT: str q1, [sp, #48] // 16-byte Spill
@@ -1860,43 +1818,40 @@ define <16 x iXLen> @lrint_v16fp128(<16 x fp128> %x) nounwind {
; CHECK-i32-NEXT: str q1, [sp, #32] // 16-byte Spill
; CHECK-i32-NEXT: ldr q1, [sp, #416]
; CHECK-i32-NEXT: stp x24, x23, [sp, #320] // 16-byte Folded Spill
-; CHECK-i32-NEXT: str q1, [sp, #208] // 16-byte Spill
-; CHECK-i32-NEXT: ldr q1, [sp, #432]
; CHECK-i32-NEXT: stp x22, x21, [sp, #336] // 16-byte Folded Spill
-; CHECK-i32-NEXT: str q1, [sp, #16] // 16-byte Spill
-; CHECK-i32-NEXT: ldr q1, [sp, #448]
; CHECK-i32-NEXT: stp x20, x19, [sp, #352] // 16-byte Folded Spill
-; CHECK-i32-NEXT: str q1, [sp, #224] // 16-byte Spill
+; CHECK-i32-NEXT: stp q7, q6, [sp, #64] // 32-byte Folded Spill
+; CHECK-i32-NEXT: stp q5, q3, [sp, #96] // 32-byte Folded Spill
+; CHECK-i32-NEXT: stp q0, q1, [sp, #208] // 32-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #432]
+; CHECK-i32-NEXT: stp q1, q4, [sp, #176] // 32-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #448]
+; CHECK-i32-NEXT: str q1, [sp, #16] // 16-byte Spill
; CHECK-i32-NEXT: ldr q1, [sp, #464]
-; CHECK-i32-NEXT: stp q7, q6, [sp, #80] // 32-byte Folded Spill
; CHECK-i32-NEXT: str q1, [sp, #240] // 16-byte Spill
; CHECK-i32-NEXT: ldr q1, [sp, #480]
-; CHECK-i32-NEXT: stp q5, q4, [sp, #112] // 32-byte Folded Spill
; CHECK-i32-NEXT: mov v0.16b, v1.16b
; CHECK-i32-NEXT: bl lrintl
; CHECK-i32-NEXT: ldr q0, [sp, #240] // 16-byte Reload
; CHECK-i32-NEXT: str w0, [sp, #268] // 4-byte Spill
; CHECK-i32-NEXT: bl lrintl
-; CHECK-i32-NEXT: ldr q0, [sp, #224] // 16-byte Reload
-; CHECK-i32-NEXT: str w0, [sp, #240] // 4-byte Spill
-; CHECK-i32-NEXT: bl lrintl
; CHECK-i32-NEXT: ldr q0, [sp, #16] // 16-byte Reload
-; CHECK-i32-NEXT: str w0, [sp, #224] // 4-byte Spill
+; CHECK-i32-NEXT: str w0, [sp, #240] // 4-byte Spill
; CHECK-i32-NEXT: bl lrintl
-; CHECK-i32-NEXT: ldr q0, [sp, #208] // 16-byte Reload
-; CHECK-i32-NEXT: mov w23, w0
+; CHECK-i32-NEXT: ldr q0, [sp, #224] // 16-byte Reload
+; CHECK-i32-NEXT: mov w22, w0
; CHECK-i32-NEXT: bl lrintl
; CHECK-i32-NEXT: ldr q0, [sp, #32] // 16-byte Reload
-; CHECK-i32-NEXT: str w0, [sp, #208] // 4-byte Spill
+; CHECK-i32-NEXT: str w0, [sp, #224] // 4-byte Spill
; CHECK-i32-NEXT: bl lrintl
; CHECK-i32-NEXT: ldr q0, [sp, #48] // 16-byte Reload
-; CHECK-i32-NEXT: mov w24, w0
+; CHECK-i32-NEXT: mov w23, w0
; CHECK-i32-NEXT: bl lrintl
; CHECK-i32-NEXT: ldr q0, [sp, #64] // 16-byte Reload
; CHECK-i32-NEXT: mov w25, w0
; CHECK-i32-NEXT: bl lrintl
; CHECK-i32-NEXT: ldr q0, [sp, #80] // 16-byte Reload
-; CHECK-i32-NEXT: mov w27, w0
+; CHECK-i32-NEXT: mov w24, w0
; CHECK-i32-NEXT: bl lrintl
; CHECK-i32-NEXT: ldr q0, [sp, #96] // 16-byte Reload
; CHECK-i32-NEXT: mov w26, w0
@@ -1905,46 +1860,52 @@ define <16 x iXLen> @lrint_v16fp128(<16 x fp128> %x) nounwind {
; CHECK-i32-NEXT: mov w28, w0
; CHECK-i32-NEXT: bl lrintl
; CHECK-i32-NEXT: ldr q0, [sp, #128] // 16-byte Reload
-; CHECK-i32-NEXT: mov w29, w0
+; CHECK-i32-NEXT: mov w27, w0
; CHECK-i32-NEXT: bl lrintl
; CHECK-i32-NEXT: ldr q0, [sp, #144] // 16-byte Reload
+; CHECK-i32-NEXT: mov w29, w0
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #192] // 16-byte Reload
; CHECK-i32-NEXT: mov w19, w0
; CHECK-i32-NEXT: bl lrintl
-; CHECK-i32-NEXT: ldr q0, [sp, #176] // 16-byte Reload
+; CHECK-i32-NEXT: ldr q0, [sp, #160] // 16-byte Reload
; CHECK-i32-NEXT: mov w20, w0
; CHECK-i32-NEXT: bl lrintl
-; CHECK-i32-NEXT: ldr q0, [sp, #192] // 16-byte Reload
+; CHECK-i32-NEXT: ldr q0, [sp, #176] // 16-byte Reload
; CHECK-i32-NEXT: mov w21, w0
; CHECK-i32-NEXT: bl lrintl
-; CHECK-i32-NEXT: ldr q0, [sp, #160] // 16-byte Reload
-; CHECK-i32-NEXT: mov w22, w0
+; CHECK-i32-NEXT: fmov s1, w0
+; CHECK-i32-NEXT: ldr q0, [sp, #208] // 16-byte Reload
+; CHECK-i32-NEXT: str q1, [sp, #208] // 16-byte Spill
+; CHECK-i32-NEXT: fmov s1, w21
+; CHECK-i32-NEXT: str q1, [sp, #192] // 16-byte Spill
+; CHECK-i32-NEXT: fmov s1, w20
+; CHECK-i32-NEXT: str q1, [sp, #176] // 16-byte Spill
; CHECK-i32-NEXT: bl lrintl
-; CHECK-i32-NEXT: fmov s1, w19
; CHECK-i32-NEXT: fmov s0, w0
-; CHECK-i32-NEXT: ldr w8, [sp, #224] // 4-byte Reload
-; CHECK-i32-NEXT: fmov s2, w27
-; CHECK-i32-NEXT: fmov s3, w23
-; CHECK-i32-NEXT: mov v0.s[1], w22
-; CHECK-i32-NEXT: mov v1.s[1], w29
-; CHECK-i32-NEXT: mov v2.s[1], w25
-; CHECK-i32-NEXT: mov v3.s[1], w8
+; CHECK-i32-NEXT: ldp q1, q2, [sp, #176] // 32-byte Folded Reload
+; CHECK-i32-NEXT: ldr q3, [sp, #208] // 16-byte Reload
; CHECK-i32-NEXT: ldr w8, [sp, #240] // 4-byte Reload
-; CHECK-i32-NEXT: ldp x29, x30, [sp, #272] // 16-byte Folded Reload
-; CHECK-i32-NEXT: mov v0.s[2], w21
-; CHECK-i32-NEXT: mov v1.s[2], w28
-; CHECK-i32-NEXT: mov v2.s[2], w24
-; CHECK-i32-NEXT: mov v3.s[2], w8
-; CHECK-i32-NEXT: ldr w8, [sp, #208] // 4-byte Reload
+; CHECK-i32-NEXT: mov v0.s[1], w19
+; CHECK-i32-NEXT: mov v1.s[1], w28
+; CHECK-i32-NEXT: mov v2.s[1], w25
+; CHECK-i32-NEXT: mov v3.s[1], w22
+; CHECK-i32-NEXT: ldp x20, x19, [sp, #352] // 16-byte Folded Reload
; CHECK-i32-NEXT: ldp x22, x21, [sp, #336] // 16-byte Folded Reload
-; CHECK-i32-NEXT: ldp x24, x23, [sp, #320] // 16-byte Folded Reload
-; CHECK-i32-NEXT: mov v0.s[3], w20
-; CHECK-i32-NEXT: mov v1.s[3], w26
+; CHECK-i32-NEXT: mov v0.s[2], w29
+; CHECK-i32-NEXT: mov v1.s[2], w26
+; CHECK-i32-NEXT: mov v2.s[2], w23
+; CHECK-i32-NEXT: mov v3.s[2], w8
+; CHECK-i32-NEXT: ldr w8, [sp, #224] // 4-byte Reload
+; CHECK-i32-NEXT: ldp x26, x25, [sp, #304] // 16-byte Folded Reload
+; CHECK-i32-NEXT: ldp x29, x30, [sp, #272] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[3], w27
+; CHECK-i32-NEXT: mov v1.s[3], w24
; CHECK-i32-NEXT: mov v2.s[3], w8
; CHECK-i32-NEXT: ldr w8, [sp, #268] // 4-byte Reload
-; CHECK-i32-NEXT: ldp x20, x19, [sp, #352] // 16-byte Folded Reload
-; CHECK-i32-NEXT: ldp x26, x25, [sp, #304] // 16-byte Folded Reload
-; CHECK-i32-NEXT: mov v3.s[3], w8
+; CHECK-i32-NEXT: ldp x24, x23, [sp, #320] // 16-byte Folded Reload
; CHECK-i32-NEXT: ldp x28, x27, [sp, #288] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v3.s[3], w8
; CHECK-i32-NEXT: add sp, sp, #368
; CHECK-i32-NEXT: ret
;
diff --git a/llvm/test/CodeGen/AArch64/vector-llrint.ll b/llvm/test/CodeGen/AArch64/vector-llrint.ll
index ae7617d9c0b66..d9a9e57fe0a63 100644
--- a/llvm/test/CodeGen/AArch64/vector-llrint.ll
+++ b/llvm/test/CodeGen/AArch64/vector-llrint.ll
@@ -806,18 +806,11 @@ define <32 x i64> @llrint_v32i64_v32f32(<32 x float> %x) nounwind {
declare <32 x i64> @llvm.llrint.v32i64.v32f32(<32 x float>)
define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) nounwind {
-; CHECK-SD-LABEL: llrint_v1i64_v1f64:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: frintx d0, d0
-; CHECK-SD-NEXT: fcvtzs x8, d0
-; CHECK-SD-NEXT: fmov d0, x8
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: llrint_v1i64_v1f64:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: frintx d0, d0
-; CHECK-GI-NEXT: fcvtzs d0, d0
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: llrint_v1i64_v1f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintx d0, d0
+; CHECK-NEXT: fcvtzs d0, d0
+; CHECK-NEXT: ret
%a = call <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double> %x)
ret <1 x i64> %a
}
diff --git a/llvm/test/CodeGen/AArch64/vector-lrint.ll b/llvm/test/CodeGen/AArch64/vector-lrint.ll
index 9eaad687fb4a2..2abe0b7ae2106 100644
--- a/llvm/test/CodeGen/AArch64/vector-lrint.ll
+++ b/llvm/test/CodeGen/AArch64/vector-lrint.ll
@@ -11,13 +11,12 @@
; RUN: FileCheck %s --check-prefixes=CHECK-i64,CHECK-i64-GI
define <1 x iXLen> @lrint_v1f16(<1 x half> %x) nounwind {
-; CHECK-i32-SD-LABEL: lrint_v1f16:
-; CHECK-i32-SD: // %bb.0:
-; CHECK-i32-SD-NEXT: fcvt s0, h0
-; CHECK-i32-SD-NEXT: frintx s0, s0
-; CHECK-i32-SD-NEXT: fcvtzs w8, s0
-; CHECK-i32-SD-NEXT: fmov s0, w8
-; CHECK-i32-SD-NEXT: ret
+; CHECK-i32-LABEL: lrint_v1f16:
+; CHECK-i32: // %bb.0:
+; CHECK-i32-NEXT: fcvt s0, h0
+; CHECK-i32-NEXT: frintx s0, s0
+; CHECK-i32-NEXT: fcvtzs s0, s0
+; CHECK-i32-NEXT: ret
;
; CHECK-i64-LABEL: lrint_v1f16:
; CHECK-i64: // %bb.0:
@@ -26,13 +25,6 @@ define <1 x iXLen> @lrint_v1f16(<1 x half> %x) nounwind {
; CHECK-i64-NEXT: fcvtzs x8, s0
; CHECK-i64-NEXT: fmov d0, x8
; CHECK-i64-NEXT: ret
-;
-; CHECK-i32-GI-LABEL: lrint_v1f16:
-; CHECK-i32-GI: // %bb.0:
-; CHECK-i32-GI-NEXT: fcvt s0, h0
-; CHECK-i32-GI-NEXT: frintx s0, s0
-; CHECK-i32-GI-NEXT: fcvtzs s0, s0
-; CHECK-i32-GI-NEXT: ret
%a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half> %x)
ret <1 x iXLen> %a
}
@@ -47,10 +39,9 @@ define <2 x iXLen> @lrint_v2f16(<2 x half> %x) nounwind {
; CHECK-i32-SD-NEXT: fcvt s1, h1
; CHECK-i32-SD-NEXT: frintx s0, s0
; CHECK-i32-SD-NEXT: frintx s1, s1
-; CHECK-i32-SD-NEXT: fcvtzs w8, s0
-; CHECK-i32-SD-NEXT: fcvtzs w9, s1
-; CHECK-i32-SD-NEXT: fmov s0, w8
-; CHECK-i32-SD-NEXT: mov v0.s[1], w9
+; CHECK-i32-SD-NEXT: fcvtzs s0, s0
+; CHECK-i32-SD-NEXT: fcvtzs w8, s1
+; CHECK-i32-SD-NEXT: mov v0.s[1], w8
; CHECK-i32-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-i32-SD-NEXT: ret
;
@@ -98,17 +89,16 @@ define <4 x iXLen> @lrint_v4f16(<4 x half> %x) nounwind {
; CHECK-i32-SD-NEXT: mov h1, v0.h[1]
; CHECK-i32-SD-NEXT: fcvt s2, h0
; CHECK-i32-SD-NEXT: mov h3, v0.h[2]
-; CHECK-i32-SD-NEXT: mov h0, v0.h[3]
+; CHECK-i32-SD-NEXT: mov h4, v0.h[3]
; CHECK-i32-SD-NEXT: fcvt s1, h1
-; CHECK-i32-SD-NEXT: frintx s2, s2
-; CHECK-i32-SD-NEXT: fcvt s3, h3
+; CHECK-i32-SD-NEXT: frintx s0, s2
+; CHECK-i32-SD-NEXT: fcvt s2, h3
; CHECK-i32-SD-NEXT: frintx s1, s1
-; CHECK-i32-SD-NEXT: fcvtzs w8, s2
-; CHECK-i32-SD-NEXT: fcvt s2, h0
-; CHECK-i32-SD-NEXT: fcvtzs w9, s1
-; CHECK-i32-SD-NEXT: frintx s1, s3
-; CHECK-i32-SD-NEXT: fmov s0, w8
-; CHECK-i32-SD-NEXT: mov v0.s[1], w9
+; CHECK-i32-SD-NEXT: fcvtzs s0, s0
+; CHECK-i32-SD-NEXT: fcvtzs w8, s1
+; CHECK-i32-SD-NEXT: frintx s1, s2
+; CHECK-i32-SD-NEXT: fcvt s2, h4
+; CHECK-i32-SD-NEXT: mov v0.s[1], w8
; CHECK-i32-SD-NEXT: fcvtzs w8, s1
; CHECK-i32-SD-NEXT: frintx s1, s2
; CHECK-i32-SD-NEXT: mov v0.s[2], w8
@@ -169,41 +159,39 @@ define <8 x iXLen> @lrint_v8f16(<8 x half> %x) nounwind {
; CHECK-i32-SD-LABEL: lrint_v8f16:
; CHECK-i32-SD: // %bb.0:
; CHECK-i32-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-i32-SD-NEXT: mov h3, v0.h[1]
-; CHECK-i32-SD-NEXT: fcvt s6, h0
+; CHECK-i32-SD-NEXT: mov h2, v0.h[1]
; CHECK-i32-SD-NEXT: mov h4, v0.h[2]
+; CHECK-i32-SD-NEXT: fcvt s7, h0
; CHECK-i32-SD-NEXT: mov h0, v0.h[3]
-; CHECK-i32-SD-NEXT: mov h2, v1.h[1]
+; CHECK-i32-SD-NEXT: mov h3, v1.h[1]
+; CHECK-i32-SD-NEXT: fcvt s2, h2
; CHECK-i32-SD-NEXT: fcvt s5, h1
-; CHECK-i32-SD-NEXT: mov h7, v1.h[2]
-; CHECK-i32-SD-NEXT: fcvt s3, h3
-; CHECK-i32-SD-NEXT: frintx s6, s6
+; CHECK-i32-SD-NEXT: mov h6, v1.h[2]
; CHECK-i32-SD-NEXT: fcvt s4, h4
-; CHECK-i32-SD-NEXT: mov h1, v1.h[3]
-; CHECK-i32-SD-NEXT: fcvt s2, h2
+; CHECK-i32-SD-NEXT: mov h16, v1.h[3]
+; CHECK-i32-SD-NEXT: frintx s7, s7
+; CHECK-i32-SD-NEXT: fcvt s3, h3
+; CHECK-i32-SD-NEXT: frintx s2, s2
; CHECK-i32-SD-NEXT: frintx s5, s5
-; CHECK-i32-SD-NEXT: fcvt s7, h7
+; CHECK-i32-SD-NEXT: fcvt s6, h6
; CHECK-i32-SD-NEXT: frintx s3, s3
-; CHECK-i32-SD-NEXT: fcvtzs w9, s6
-; CHECK-i32-SD-NEXT: frintx s4, s4
-; CHECK-i32-SD-NEXT: frintx s2, s2
-; CHECK-i32-SD-NEXT: fcvtzs w8, s5
-; CHECK-i32-SD-NEXT: fcvt s5, h1
-; CHECK-i32-SD-NEXT: fcvtzs w11, s3
-; CHECK-i32-SD-NEXT: fcvt s3, h0
-; CHECK-i32-SD-NEXT: fmov s0, w9
-; CHECK-i32-SD-NEXT: fcvtzs w12, s4
-; CHECK-i32-SD-NEXT: fcvtzs w10, s2
-; CHECK-i32-SD-NEXT: frintx s2, s7
-; CHECK-i32-SD-NEXT: fmov s1, w8
-; CHECK-i32-SD-NEXT: mov v0.s[1], w11
; CHECK-i32-SD-NEXT: fcvtzs w8, s2
-; CHECK-i32-SD-NEXT: mov v1.s[1], w10
-; CHECK-i32-SD-NEXT: frintx s2, s3
+; CHECK-i32-SD-NEXT: frintx s2, s4
+; CHECK-i32-SD-NEXT: fcvtzs s1, s5
+; CHECK-i32-SD-NEXT: fcvt s4, h0
+; CHECK-i32-SD-NEXT: fcvt s5, h16
+; CHECK-i32-SD-NEXT: fcvtzs s0, s7
+; CHECK-i32-SD-NEXT: fcvtzs w9, s3
+; CHECK-i32-SD-NEXT: frintx s3, s6
+; CHECK-i32-SD-NEXT: fcvtzs w10, s2
+; CHECK-i32-SD-NEXT: frintx s2, s4
+; CHECK-i32-SD-NEXT: mov v0.s[1], w8
+; CHECK-i32-SD-NEXT: fcvtzs w11, s3
+; CHECK-i32-SD-NEXT: mov v1.s[1], w9
; CHECK-i32-SD-NEXT: frintx s3, s5
-; CHECK-i32-SD-NEXT: mov v0.s[2], w12
-; CHECK-i32-SD-NEXT: mov v1.s[2], w8
; CHECK-i32-SD-NEXT: fcvtzs w9, s2
+; CHECK-i32-SD-NEXT: mov v0.s[2], w10
+; CHECK-i32-SD-NEXT: mov v1.s[2], w11
; CHECK-i32-SD-NEXT: fcvtzs w8, s3
; CHECK-i32-SD-NEXT: mov v0.s[3], w9
; CHECK-i32-SD-NEXT: mov v1.s[3], w8
@@ -295,82 +283,78 @@ define <16 x iXLen> @lrint_v16f16(<16 x half> %x) nounwind {
; CHECK-i32-SD: // %bb.0:
; CHECK-i32-SD-NEXT: ext v2.16b, v0.16b, v0.16b, #8
; CHECK-i32-SD-NEXT: ext v3.16b, v1.16b, v1.16b, #8
-; CHECK-i32-SD-NEXT: mov h18, v0.h[1]
-; CHECK-i32-SD-NEXT: mov h19, v1.h[1]
-; CHECK-i32-SD-NEXT: fcvt s20, h0
-; CHECK-i32-SD-NEXT: mov h21, v0.h[2]
+; CHECK-i32-SD-NEXT: mov h4, v0.h[1]
+; CHECK-i32-SD-NEXT: mov h5, v1.h[1]
+; CHECK-i32-SD-NEXT: mov h16, v0.h[2]
+; CHECK-i32-SD-NEXT: fcvt s19, h0
+; CHECK-i32-SD-NEXT: mov h20, v1.h[2]
; CHECK-i32-SD-NEXT: mov h0, v0.h[3]
-; CHECK-i32-SD-NEXT: mov h4, v2.h[1]
-; CHECK-i32-SD-NEXT: mov h5, v2.h[2]
-; CHECK-i32-SD-NEXT: fcvt s6, h2
-; CHECK-i32-SD-NEXT: fcvt s7, h3
-; CHECK-i32-SD-NEXT: mov h16, v3.h[1]
-; CHECK-i32-SD-NEXT: mov h17, v3.h[2]
-; CHECK-i32-SD-NEXT: fcvt s18, h18
-; CHECK-i32-SD-NEXT: fcvt s19, h19
-; CHECK-i32-SD-NEXT: mov h2, v2.h[3]
+; CHECK-i32-SD-NEXT: mov h21, v1.h[3]
+; CHECK-i32-SD-NEXT: mov h6, v2.h[1]
+; CHECK-i32-SD-NEXT: mov h7, v3.h[1]
; CHECK-i32-SD-NEXT: fcvt s4, h4
; CHECK-i32-SD-NEXT: fcvt s5, h5
+; CHECK-i32-SD-NEXT: mov h17, v2.h[2]
+; CHECK-i32-SD-NEXT: mov h18, v3.h[2]
+; CHECK-i32-SD-NEXT: frintx s19, s19
+; CHECK-i32-SD-NEXT: fcvt s23, h0
+; CHECK-i32-SD-NEXT: fcvt s6, h6
+; CHECK-i32-SD-NEXT: fcvt s7, h7
+; CHECK-i32-SD-NEXT: frintx s4, s4
+; CHECK-i32-SD-NEXT: frintx s5, s5
+; CHECK-i32-SD-NEXT: fcvtzs s0, s19
; CHECK-i32-SD-NEXT: frintx s6, s6
; CHECK-i32-SD-NEXT: frintx s7, s7
-; CHECK-i32-SD-NEXT: fcvt s16, h16
-; CHECK-i32-SD-NEXT: fcvt s17, h17
+; CHECK-i32-SD-NEXT: fcvtzs w8, s4
+; CHECK-i32-SD-NEXT: fcvt s4, h2
+; CHECK-i32-SD-NEXT: fcvtzs w9, s5
+; CHECK-i32-SD-NEXT: fcvt s5, h3
+; CHECK-i32-SD-NEXT: mov h2, v2.h[3]
+; CHECK-i32-SD-NEXT: fcvtzs w10, s6
+; CHECK-i32-SD-NEXT: fcvt s6, h1
+; CHECK-i32-SD-NEXT: fcvtzs w11, s7
+; CHECK-i32-SD-NEXT: fcvt s7, h16
+; CHECK-i32-SD-NEXT: fcvt s16, h17
+; CHECK-i32-SD-NEXT: fcvt s17, h18
+; CHECK-i32-SD-NEXT: fcvt s18, h20
+; CHECK-i32-SD-NEXT: frintx s4, s4
+; CHECK-i32-SD-NEXT: frintx s5, s5
+; CHECK-i32-SD-NEXT: mov h20, v3.h[3]
+; CHECK-i32-SD-NEXT: fcvt s22, h2
+; CHECK-i32-SD-NEXT: mov v0.s[1], w8
+; CHECK-i32-SD-NEXT: frintx s6, s6
+; CHECK-i32-SD-NEXT: frintx s16, s16
+; CHECK-i32-SD-NEXT: frintx s17, s17
+; CHECK-i32-SD-NEXT: frintx s7, s7
; CHECK-i32-SD-NEXT: frintx s18, s18
-; CHECK-i32-SD-NEXT: fcvt s2, h2
+; CHECK-i32-SD-NEXT: fcvtzs s1, s4
+; CHECK-i32-SD-NEXT: fcvtzs s3, s5
+; CHECK-i32-SD-NEXT: fcvt s4, h20
+; CHECK-i32-SD-NEXT: fcvt s5, h21
+; CHECK-i32-SD-NEXT: fcvtzs s2, s6
+; CHECK-i32-SD-NEXT: frintx s6, s22
+; CHECK-i32-SD-NEXT: fcvtzs w12, s16
+; CHECK-i32-SD-NEXT: fcvtzs w13, s17
+; CHECK-i32-SD-NEXT: fcvtzs w14, s7
+; CHECK-i32-SD-NEXT: fcvtzs w15, s18
+; CHECK-i32-SD-NEXT: frintx s7, s23
+; CHECK-i32-SD-NEXT: mov v1.s[1], w10
+; CHECK-i32-SD-NEXT: mov v3.s[1], w11
; CHECK-i32-SD-NEXT: frintx s4, s4
; CHECK-i32-SD-NEXT: frintx s5, s5
+; CHECK-i32-SD-NEXT: mov v2.s[1], w9
; CHECK-i32-SD-NEXT: fcvtzs w8, s6
-; CHECK-i32-SD-NEXT: fcvt s6, h1
+; CHECK-i32-SD-NEXT: mov v0.s[2], w14
; CHECK-i32-SD-NEXT: fcvtzs w9, s7
-; CHECK-i32-SD-NEXT: mov h7, v1.h[2]
-; CHECK-i32-SD-NEXT: frintx s16, s16
-; CHECK-i32-SD-NEXT: fcvtzs w15, s18
+; CHECK-i32-SD-NEXT: mov v1.s[2], w12
+; CHECK-i32-SD-NEXT: mov v3.s[2], w13
; CHECK-i32-SD-NEXT: fcvtzs w10, s4
-; CHECK-i32-SD-NEXT: frintx s4, s17
; CHECK-i32-SD-NEXT: fcvtzs w11, s5
-; CHECK-i32-SD-NEXT: frintx s5, s20
-; CHECK-i32-SD-NEXT: fcvt s17, h21
-; CHECK-i32-SD-NEXT: frintx s6, s6
-; CHECK-i32-SD-NEXT: fcvtzs w12, s16
-; CHECK-i32-SD-NEXT: frintx s16, s19
-; CHECK-i32-SD-NEXT: fcvt s7, h7
-; CHECK-i32-SD-NEXT: mov h19, v1.h[3]
-; CHECK-i32-SD-NEXT: fmov s1, w8
-; CHECK-i32-SD-NEXT: fcvtzs w13, s4
-; CHECK-i32-SD-NEXT: mov h4, v3.h[3]
-; CHECK-i32-SD-NEXT: fmov s3, w9
-; CHECK-i32-SD-NEXT: fcvtzs w14, s5
-; CHECK-i32-SD-NEXT: frintx s5, s17
-; CHECK-i32-SD-NEXT: fcvtzs w16, s6
-; CHECK-i32-SD-NEXT: fcvt s17, h0
-; CHECK-i32-SD-NEXT: fcvtzs w8, s16
-; CHECK-i32-SD-NEXT: frintx s6, s7
-; CHECK-i32-SD-NEXT: fcvt s7, h19
-; CHECK-i32-SD-NEXT: mov v1.s[1], w10
-; CHECK-i32-SD-NEXT: mov v3.s[1], w12
-; CHECK-i32-SD-NEXT: fcvt s4, h4
-; CHECK-i32-SD-NEXT: fcvtzs w9, s5
-; CHECK-i32-SD-NEXT: fmov s0, w14
-; CHECK-i32-SD-NEXT: frintx s5, s2
-; CHECK-i32-SD-NEXT: fmov s2, w16
-; CHECK-i32-SD-NEXT: frintx s16, s17
-; CHECK-i32-SD-NEXT: fcvtzs w10, s6
-; CHECK-i32-SD-NEXT: frintx s6, s7
-; CHECK-i32-SD-NEXT: mov v1.s[2], w11
-; CHECK-i32-SD-NEXT: mov v3.s[2], w13
-; CHECK-i32-SD-NEXT: mov v0.s[1], w15
-; CHECK-i32-SD-NEXT: frintx s4, s4
-; CHECK-i32-SD-NEXT: mov v2.s[1], w8
-; CHECK-i32-SD-NEXT: fcvtzs w8, s5
-; CHECK-i32-SD-NEXT: fcvtzs w12, s16
-; CHECK-i32-SD-NEXT: mov v0.s[2], w9
-; CHECK-i32-SD-NEXT: fcvtzs w9, s4
-; CHECK-i32-SD-NEXT: mov v2.s[2], w10
-; CHECK-i32-SD-NEXT: fcvtzs w10, s6
+; CHECK-i32-SD-NEXT: mov v2.s[2], w15
+; CHECK-i32-SD-NEXT: mov v0.s[3], w9
; CHECK-i32-SD-NEXT: mov v1.s[3], w8
-; CHECK-i32-SD-NEXT: mov v0.s[3], w12
-; CHECK-i32-SD-NEXT: mov v3.s[3], w9
-; CHECK-i32-SD-NEXT: mov v2.s[3], w10
+; CHECK-i32-SD-NEXT: mov v3.s[3], w10
+; CHECK-i32-SD-NEXT: mov v2.s[3], w11
; CHECK-i32-SD-NEXT: ret
;
; CHECK-i64-SD-LABEL: lrint_v16f16:
@@ -522,164 +506,156 @@ declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half>)
define <32 x iXLen> @lrint_v32f16(<32 x half> %x) nounwind {
; CHECK-i32-SD-LABEL: lrint_v32f16:
; CHECK-i32-SD: // %bb.0:
-; CHECK-i32-SD-NEXT: ext v5.16b, v0.16b, v0.16b, #8
-; CHECK-i32-SD-NEXT: ext v4.16b, v1.16b, v1.16b, #8
-; CHECK-i32-SD-NEXT: ext v17.16b, v2.16b, v2.16b, #8
-; CHECK-i32-SD-NEXT: mov h6, v5.h[1]
-; CHECK-i32-SD-NEXT: fcvt s7, h5
-; CHECK-i32-SD-NEXT: mov h16, v5.h[2]
-; CHECK-i32-SD-NEXT: mov h5, v5.h[3]
-; CHECK-i32-SD-NEXT: mov h18, v4.h[1]
-; CHECK-i32-SD-NEXT: mov h20, v4.h[3]
-; CHECK-i32-SD-NEXT: mov h19, v4.h[2]
-; CHECK-i32-SD-NEXT: fcvt s21, h4
-; CHECK-i32-SD-NEXT: mov h23, v17.h[1]
-; CHECK-i32-SD-NEXT: ext v4.16b, v3.16b, v3.16b, #8
-; CHECK-i32-SD-NEXT: fcvt s22, h17
-; CHECK-i32-SD-NEXT: fcvt s6, h6
-; CHECK-i32-SD-NEXT: frintx s7, s7
-; CHECK-i32-SD-NEXT: fcvt s16, h16
+; CHECK-i32-SD-NEXT: str x19, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-i32-SD-NEXT: ext v4.16b, v0.16b, v0.16b, #8
+; CHECK-i32-SD-NEXT: ext v5.16b, v1.16b, v1.16b, #8
+; CHECK-i32-SD-NEXT: ext v6.16b, v2.16b, v2.16b, #8
+; CHECK-i32-SD-NEXT: mov h27, v3.h[2]
+; CHECK-i32-SD-NEXT: mov h16, v4.h[2]
+; CHECK-i32-SD-NEXT: mov h17, v4.h[3]
+; CHECK-i32-SD-NEXT: mov h18, v5.h[1]
+; CHECK-i32-SD-NEXT: mov h7, v4.h[1]
+; CHECK-i32-SD-NEXT: mov h19, v5.h[2]
+; CHECK-i32-SD-NEXT: mov h20, v5.h[3]
+; CHECK-i32-SD-NEXT: mov h21, v6.h[1]
+; CHECK-i32-SD-NEXT: mov h22, v6.h[2]
+; CHECK-i32-SD-NEXT: fcvt s4, h4
; CHECK-i32-SD-NEXT: fcvt s5, h5
+; CHECK-i32-SD-NEXT: fcvt s16, h16
+; CHECK-i32-SD-NEXT: fcvt s17, h17
; CHECK-i32-SD-NEXT: fcvt s18, h18
-; CHECK-i32-SD-NEXT: fcvt s20, h20
+; CHECK-i32-SD-NEXT: fcvt s23, h7
; CHECK-i32-SD-NEXT: fcvt s19, h19
-; CHECK-i32-SD-NEXT: frintx s22, s22
-; CHECK-i32-SD-NEXT: frintx s6, s6
-; CHECK-i32-SD-NEXT: fcvtzs w12, s7
-; CHECK-i32-SD-NEXT: frintx s7, s16
+; CHECK-i32-SD-NEXT: ext v7.16b, v3.16b, v3.16b, #8
+; CHECK-i32-SD-NEXT: fcvt s20, h20
+; CHECK-i32-SD-NEXT: fcvt s21, h21
+; CHECK-i32-SD-NEXT: fcvt s22, h22
+; CHECK-i32-SD-NEXT: frintx s4, s4
; CHECK-i32-SD-NEXT: frintx s5, s5
-; CHECK-i32-SD-NEXT: frintx s16, s21
-; CHECK-i32-SD-NEXT: fcvt s21, h23
+; CHECK-i32-SD-NEXT: frintx s24, s16
+; CHECK-i32-SD-NEXT: frintx s17, s17
; CHECK-i32-SD-NEXT: frintx s18, s18
-; CHECK-i32-SD-NEXT: frintx s20, s20
+; CHECK-i32-SD-NEXT: mov h16, v6.h[3]
; CHECK-i32-SD-NEXT: frintx s19, s19
-; CHECK-i32-SD-NEXT: fcvtzs w15, s22
-; CHECK-i32-SD-NEXT: mov h22, v1.h[2]
-; CHECK-i32-SD-NEXT: fcvtzs w17, s6
-; CHECK-i32-SD-NEXT: mov h6, v17.h[2]
-; CHECK-i32-SD-NEXT: mov h17, v17.h[3]
-; CHECK-i32-SD-NEXT: fcvtzs w9, s7
-; CHECK-i32-SD-NEXT: mov h7, v4.h[2]
-; CHECK-i32-SD-NEXT: fcvtzs w8, s5
-; CHECK-i32-SD-NEXT: mov h5, v4.h[1]
-; CHECK-i32-SD-NEXT: fcvtzs w13, s16
-; CHECK-i32-SD-NEXT: frintx s16, s21
-; CHECK-i32-SD-NEXT: fcvtzs w14, s18
-; CHECK-i32-SD-NEXT: fcvtzs w10, s20
-; CHECK-i32-SD-NEXT: fcvt s18, h4
+; CHECK-i32-SD-NEXT: mov h25, v7.h[1]
+; CHECK-i32-SD-NEXT: mov h26, v7.h[2]
+; CHECK-i32-SD-NEXT: frintx s20, s20
+; CHECK-i32-SD-NEXT: frintx s21, s21
+; CHECK-i32-SD-NEXT: frintx s22, s22
+; CHECK-i32-SD-NEXT: frintx s23, s23
; CHECK-i32-SD-NEXT: fcvt s6, h6
+; CHECK-i32-SD-NEXT: fcvtzs w8, s17
+; CHECK-i32-SD-NEXT: fcvtzs w12, s18
+; CHECK-i32-SD-NEXT: mov h17, v0.h[1]
+; CHECK-i32-SD-NEXT: mov h18, v0.h[2]
+; CHECK-i32-SD-NEXT: fcvt s16, h16
+; CHECK-i32-SD-NEXT: fcvtzs w9, s24
+; CHECK-i32-SD-NEXT: fcvtzs w10, s19
+; CHECK-i32-SD-NEXT: fcvtzs w13, s20
+; CHECK-i32-SD-NEXT: mov h19, v0.h[3]
+; CHECK-i32-SD-NEXT: mov h20, v1.h[1]
+; CHECK-i32-SD-NEXT: fcvtzs w15, s21
+; CHECK-i32-SD-NEXT: fcvtzs w14, s22
+; CHECK-i32-SD-NEXT: fcvt s21, h25
+; CHECK-i32-SD-NEXT: fcvt s22, h26
; CHECK-i32-SD-NEXT: fcvt s17, h17
-; CHECK-i32-SD-NEXT: mov h20, v0.h[2]
-; CHECK-i32-SD-NEXT: fcvt s7, h7
-; CHECK-i32-SD-NEXT: fcvtzs w11, s19
-; CHECK-i32-SD-NEXT: mov h19, v0.h[1]
-; CHECK-i32-SD-NEXT: fcvt s5, h5
-; CHECK-i32-SD-NEXT: fcvtzs w0, s16
-; CHECK-i32-SD-NEXT: mov h21, v1.h[1]
-; CHECK-i32-SD-NEXT: frintx s18, s18
-; CHECK-i32-SD-NEXT: mov h4, v4.h[3]
-; CHECK-i32-SD-NEXT: frintx s6, s6
-; CHECK-i32-SD-NEXT: frintx s16, s17
-; CHECK-i32-SD-NEXT: mov h17, v0.h[3]
-; CHECK-i32-SD-NEXT: fcvt s0, h0
+; CHECK-i32-SD-NEXT: fcvt s18, h18
+; CHECK-i32-SD-NEXT: mov h24, v2.h[1]
+; CHECK-i32-SD-NEXT: mov h25, v2.h[2]
+; CHECK-i32-SD-NEXT: frintx s16, s16
+; CHECK-i32-SD-NEXT: mov h26, v3.h[1]
+; CHECK-i32-SD-NEXT: fcvtzs w11, s23
+; CHECK-i32-SD-NEXT: mov h23, v1.h[2]
; CHECK-i32-SD-NEXT: fcvt s19, h19
-; CHECK-i32-SD-NEXT: frintx s5, s5
-; CHECK-i32-SD-NEXT: fcvtzs w2, s18
-; CHECK-i32-SD-NEXT: fcvt s18, h21
-; CHECK-i32-SD-NEXT: fcvt s21, h2
-; CHECK-i32-SD-NEXT: fcvtzs w18, s6
-; CHECK-i32-SD-NEXT: frintx s6, s7
-; CHECK-i32-SD-NEXT: fcvt s7, h20
-; CHECK-i32-SD-NEXT: fcvtzs w16, s16
-; CHECK-i32-SD-NEXT: fcvt s16, h17
-; CHECK-i32-SD-NEXT: fcvt s17, h1
-; CHECK-i32-SD-NEXT: frintx s0, s0
-; CHECK-i32-SD-NEXT: fcvtzs w3, s5
-; CHECK-i32-SD-NEXT: frintx s5, s19
-; CHECK-i32-SD-NEXT: fcvt s19, h22
-; CHECK-i32-SD-NEXT: mov h1, v1.h[3]
-; CHECK-i32-SD-NEXT: fcvtzs w1, s6
-; CHECK-i32-SD-NEXT: frintx s6, s7
-; CHECK-i32-SD-NEXT: mov h7, v2.h[1]
+; CHECK-i32-SD-NEXT: fcvt s20, h20
; CHECK-i32-SD-NEXT: frintx s17, s17
-; CHECK-i32-SD-NEXT: frintx s20, s16
-; CHECK-i32-SD-NEXT: fmov s16, w12
-; CHECK-i32-SD-NEXT: fcvtzs w4, s0
-; CHECK-i32-SD-NEXT: frintx s0, s18
-; CHECK-i32-SD-NEXT: fcvtzs w5, s5
-; CHECK-i32-SD-NEXT: frintx s5, s19
-; CHECK-i32-SD-NEXT: frintx s18, s21
-; CHECK-i32-SD-NEXT: fcvt s19, h3
-; CHECK-i32-SD-NEXT: fcvtzs w12, s6
-; CHECK-i32-SD-NEXT: fcvt s6, h7
-; CHECK-i32-SD-NEXT: mov h7, v3.h[1]
-; CHECK-i32-SD-NEXT: fcvtzs w6, s17
-; CHECK-i32-SD-NEXT: fmov s17, w13
-; CHECK-i32-SD-NEXT: mov v16.s[1], w17
-; CHECK-i32-SD-NEXT: fcvtzs w17, s20
-; CHECK-i32-SD-NEXT: fcvtzs w7, s0
-; CHECK-i32-SD-NEXT: mov h0, v2.h[2]
-; CHECK-i32-SD-NEXT: mov h20, v3.h[2]
-; CHECK-i32-SD-NEXT: fcvtzs w13, s5
-; CHECK-i32-SD-NEXT: fmov s5, w15
-; CHECK-i32-SD-NEXT: frintx s6, s6
-; CHECK-i32-SD-NEXT: fcvt s7, h7
-; CHECK-i32-SD-NEXT: mov v17.s[1], w14
-; CHECK-i32-SD-NEXT: fcvtzs w14, s18
-; CHECK-i32-SD-NEXT: frintx s18, s19
-; CHECK-i32-SD-NEXT: mov h2, v2.h[3]
+; CHECK-i32-SD-NEXT: frintx s21, s21
+; CHECK-i32-SD-NEXT: frintx s22, s22
+; CHECK-i32-SD-NEXT: frintx s18, s18
+; CHECK-i32-SD-NEXT: fcvt s24, h24
+; CHECK-i32-SD-NEXT: fcvt s25, h25
+; CHECK-i32-SD-NEXT: fcvtzs w16, s16
+; CHECK-i32-SD-NEXT: fcvt s16, h26
+; CHECK-i32-SD-NEXT: fcvt s26, h27
+; CHECK-i32-SD-NEXT: fcvt s23, h23
+; CHECK-i32-SD-NEXT: frintx s19, s19
+; CHECK-i32-SD-NEXT: frintx s20, s20
+; CHECK-i32-SD-NEXT: fcvtzs w2, s17
+; CHECK-i32-SD-NEXT: fcvtzs w1, s21
; CHECK-i32-SD-NEXT: fcvt s0, h0
-; CHECK-i32-SD-NEXT: mov h3, v3.h[3]
-; CHECK-i32-SD-NEXT: mov v5.s[1], w0
-; CHECK-i32-SD-NEXT: fcvt s19, h20
-; CHECK-i32-SD-NEXT: fcvt s1, h1
-; CHECK-i32-SD-NEXT: mov v16.s[2], w9
-; CHECK-i32-SD-NEXT: fcvtzs w15, s6
-; CHECK-i32-SD-NEXT: frintx s6, s7
-; CHECK-i32-SD-NEXT: fmov s7, w2
; CHECK-i32-SD-NEXT: fcvtzs w0, s18
-; CHECK-i32-SD-NEXT: fcvt s20, h2
-; CHECK-i32-SD-NEXT: fcvt s18, h4
-; CHECK-i32-SD-NEXT: frintx s21, s0
-; CHECK-i32-SD-NEXT: fcvt s3, h3
-; CHECK-i32-SD-NEXT: fmov s0, w4
-; CHECK-i32-SD-NEXT: frintx s19, s19
-; CHECK-i32-SD-NEXT: fmov s2, w6
-; CHECK-i32-SD-NEXT: fmov s4, w14
-; CHECK-i32-SD-NEXT: fcvtzs w2, s6
-; CHECK-i32-SD-NEXT: mov v7.s[1], w3
-; CHECK-i32-SD-NEXT: frintx s1, s1
-; CHECK-i32-SD-NEXT: fmov s6, w0
-; CHECK-i32-SD-NEXT: mov v0.s[1], w5
+; CHECK-i32-SD-NEXT: frintx s17, s24
+; CHECK-i32-SD-NEXT: frintx s18, s25
+; CHECK-i32-SD-NEXT: frintx s16, s16
+; CHECK-i32-SD-NEXT: fcvtzs w18, s22
+; CHECK-i32-SD-NEXT: frintx s6, s6
+; CHECK-i32-SD-NEXT: frintx s21, s23
+; CHECK-i32-SD-NEXT: fcvtzs w17, s19
+; CHECK-i32-SD-NEXT: fcvtzs w3, s20
+; CHECK-i32-SD-NEXT: frintx s19, s26
+; CHECK-i32-SD-NEXT: fcvt s20, h7
+; CHECK-i32-SD-NEXT: frintx s0, s0
+; CHECK-i32-SD-NEXT: fcvtzs w5, s17
+; CHECK-i32-SD-NEXT: fcvt s17, h1
+; CHECK-i32-SD-NEXT: fcvtzs w6, s18
+; CHECK-i32-SD-NEXT: fcvt s18, h2
+; CHECK-i32-SD-NEXT: fcvtzs w7, s16
+; CHECK-i32-SD-NEXT: fcvt s16, h3
+; CHECK-i32-SD-NEXT: fcvtzs w4, s21
+; CHECK-i32-SD-NEXT: mov h2, v2.h[3]
+; CHECK-i32-SD-NEXT: mov h21, v7.h[3]
+; CHECK-i32-SD-NEXT: fcvtzs w19, s19
+; CHECK-i32-SD-NEXT: mov h19, v1.h[3]
; CHECK-i32-SD-NEXT: frintx s20, s20
-; CHECK-i32-SD-NEXT: mov v2.s[1], w7
-; CHECK-i32-SD-NEXT: fcvtzs w3, s21
-; CHECK-i32-SD-NEXT: mov v4.s[1], w15
-; CHECK-i32-SD-NEXT: fcvtzs w14, s19
+; CHECK-i32-SD-NEXT: frintx s17, s17
+; CHECK-i32-SD-NEXT: mov h22, v3.h[3]
+; CHECK-i32-SD-NEXT: fcvtzs s1, s4
; CHECK-i32-SD-NEXT: frintx s18, s18
-; CHECK-i32-SD-NEXT: frintx s3, s3
-; CHECK-i32-SD-NEXT: mov v6.s[1], w2
-; CHECK-i32-SD-NEXT: mov v17.s[2], w11
-; CHECK-i32-SD-NEXT: fcvtzs w15, s1
-; CHECK-i32-SD-NEXT: fcvtzs w0, s20
-; CHECK-i32-SD-NEXT: mov v5.s[2], w18
-; CHECK-i32-SD-NEXT: mov v0.s[2], w12
-; CHECK-i32-SD-NEXT: mov v7.s[2], w1
-; CHECK-i32-SD-NEXT: mov v2.s[2], w13
-; CHECK-i32-SD-NEXT: mov v4.s[2], w3
-; CHECK-i32-SD-NEXT: fcvtzs w9, s18
-; CHECK-i32-SD-NEXT: fcvtzs w11, s3
-; CHECK-i32-SD-NEXT: mov v16.s[3], w8
-; CHECK-i32-SD-NEXT: mov v6.s[2], w14
-; CHECK-i32-SD-NEXT: mov v17.s[3], w10
+; CHECK-i32-SD-NEXT: frintx s16, s16
+; CHECK-i32-SD-NEXT: fcvtzs s3, s5
+; CHECK-i32-SD-NEXT: fcvt s23, h2
+; CHECK-i32-SD-NEXT: fcvtzs s5, s6
+; CHECK-i32-SD-NEXT: fcvtzs s0, s0
+; CHECK-i32-SD-NEXT: fcvt s19, h19
+; CHECK-i32-SD-NEXT: fcvtzs s7, s20
+; CHECK-i32-SD-NEXT: fcvtzs s2, s17
+; CHECK-i32-SD-NEXT: fcvt s17, h21
+; CHECK-i32-SD-NEXT: mov v1.s[1], w11
+; CHECK-i32-SD-NEXT: fcvtzs s4, s18
+; CHECK-i32-SD-NEXT: fcvtzs s6, s16
+; CHECK-i32-SD-NEXT: fcvt s16, h22
+; CHECK-i32-SD-NEXT: mov v3.s[1], w12
+; CHECK-i32-SD-NEXT: mov v5.s[1], w15
+; CHECK-i32-SD-NEXT: mov v0.s[1], w2
+; CHECK-i32-SD-NEXT: frintx s18, s19
+; CHECK-i32-SD-NEXT: frintx s19, s23
+; CHECK-i32-SD-NEXT: mov v7.s[1], w1
+; CHECK-i32-SD-NEXT: mov v2.s[1], w3
+; CHECK-i32-SD-NEXT: frintx s17, s17
+; CHECK-i32-SD-NEXT: mov v1.s[2], w9
+; CHECK-i32-SD-NEXT: mov v4.s[1], w5
+; CHECK-i32-SD-NEXT: mov v6.s[1], w7
+; CHECK-i32-SD-NEXT: frintx s16, s16
+; CHECK-i32-SD-NEXT: mov v3.s[2], w10
+; CHECK-i32-SD-NEXT: mov v5.s[2], w14
+; CHECK-i32-SD-NEXT: mov v0.s[2], w0
+; CHECK-i32-SD-NEXT: fcvtzs w11, s18
+; CHECK-i32-SD-NEXT: fcvtzs w12, s19
+; CHECK-i32-SD-NEXT: mov v7.s[2], w18
+; CHECK-i32-SD-NEXT: mov v2.s[2], w4
+; CHECK-i32-SD-NEXT: fcvtzs w9, s17
+; CHECK-i32-SD-NEXT: mov v1.s[3], w8
+; CHECK-i32-SD-NEXT: mov v4.s[2], w6
+; CHECK-i32-SD-NEXT: mov v6.s[2], w19
+; CHECK-i32-SD-NEXT: fcvtzs w10, s16
; CHECK-i32-SD-NEXT: mov v0.s[3], w17
+; CHECK-i32-SD-NEXT: mov v3.s[3], w13
; CHECK-i32-SD-NEXT: mov v5.s[3], w16
-; CHECK-i32-SD-NEXT: mov v2.s[3], w15
-; CHECK-i32-SD-NEXT: mov v4.s[3], w0
+; CHECK-i32-SD-NEXT: mov v2.s[3], w11
; CHECK-i32-SD-NEXT: mov v7.s[3], w9
-; CHECK-i32-SD-NEXT: mov v1.16b, v16.16b
-; CHECK-i32-SD-NEXT: mov v6.s[3], w11
-; CHECK-i32-SD-NEXT: mov v3.16b, v17.16b
+; CHECK-i32-SD-NEXT: mov v4.s[3], w12
+; CHECK-i32-SD-NEXT: mov v6.s[3], w10
+; CHECK-i32-SD-NEXT: ldr x19, [sp], #16 // 8-byte Folded Reload
; CHECK-i32-SD-NEXT: ret
;
; CHECK-i64-SD-LABEL: lrint_v32f16:
@@ -1326,18 +1302,11 @@ define <1 x iXLen> @lrint_v1f64(<1 x double> %x) nounwind {
; CHECK-i32-NEXT: fmov s0, w8
; CHECK-i32-NEXT: ret
;
-; CHECK-i64-SD-LABEL: lrint_v1f64:
-; CHECK-i64-SD: // %bb.0:
-; CHECK-i64-SD-NEXT: frintx d0, d0
-; CHECK-i64-SD-NEXT: fcvtzs x8, d0
-; CHECK-i64-SD-NEXT: fmov d0, x8
-; CHECK-i64-SD-NEXT: ret
-;
-; CHECK-i64-GI-LABEL: lrint_v1f64:
-; CHECK-i64-GI: // %bb.0:
-; CHECK-i64-GI-NEXT: frintx d0, d0
-; CHECK-i64-GI-NEXT: fcvtzs d0, d0
-; CHECK-i64-GI-NEXT: ret
+; CHECK-i64-LABEL: lrint_v1f64:
+; CHECK-i64: // %bb.0:
+; CHECK-i64-NEXT: frintx d0, d0
+; CHECK-i64-NEXT: fcvtzs d0, d0
+; CHECK-i64-NEXT: ret
%a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double> %x)
ret <1 x iXLen> %a
}
diff --git a/llvm/test/CodeGen/AArch64/zext.ll b/llvm/test/CodeGen/AArch64/zext.ll
index 7078d9b2586a8..292b7b28903ee 100644
--- a/llvm/test/CodeGen/AArch64/zext.ll
+++ b/llvm/test/CodeGen/AArch64/zext.ll
@@ -1127,41 +1127,41 @@ entry:
define <16 x i64> @zext_v16i10_v16i64(<16 x i10> %a) {
; CHECK-SD-LABEL: zext_v16i10_v16i64:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: fmov s0, w2
-; CHECK-SD-NEXT: fmov s1, w0
+; CHECK-SD-NEXT: fmov s0, w6
+; CHECK-SD-NEXT: fmov s1, w4
; CHECK-SD-NEXT: ldr s2, [sp]
-; CHECK-SD-NEXT: fmov s3, w4
-; CHECK-SD-NEXT: fmov s4, w6
-; CHECK-SD-NEXT: add x9, sp, #8
+; CHECK-SD-NEXT: fmov s3, w2
+; CHECK-SD-NEXT: fmov s4, w0
; CHECK-SD-NEXT: ldr s5, [sp, #16]
; CHECK-SD-NEXT: ldr s6, [sp, #32]
; CHECK-SD-NEXT: ldr s7, [sp, #48]
-; CHECK-SD-NEXT: mov v1.s[1], w1
-; CHECK-SD-NEXT: mov v0.s[1], w3
-; CHECK-SD-NEXT: ld1 { v2.s }[1], [x9]
-; CHECK-SD-NEXT: mov v3.s[1], w5
-; CHECK-SD-NEXT: mov v4.s[1], w7
+; CHECK-SD-NEXT: add x8, sp, #8
+; CHECK-SD-NEXT: mov v1.s[1], w5
+; CHECK-SD-NEXT: mov v0.s[1], w7
; CHECK-SD-NEXT: add x9, sp, #24
+; CHECK-SD-NEXT: mov v4.s[1], w1
+; CHECK-SD-NEXT: mov v3.s[1], w3
; CHECK-SD-NEXT: add x10, sp, #40
; CHECK-SD-NEXT: add x11, sp, #56
+; CHECK-SD-NEXT: ld1 { v2.s }[1], [x8]
; CHECK-SD-NEXT: ld1 { v5.s }[1], [x9]
; CHECK-SD-NEXT: ld1 { v6.s }[1], [x10]
; CHECK-SD-NEXT: ld1 { v7.s }[1], [x11]
; CHECK-SD-NEXT: mov w8, #1023 // =0x3ff
-; CHECK-SD-NEXT: ushll v1.2d, v1.2s, #0
; CHECK-SD-NEXT: dup v16.2d, x8
-; CHECK-SD-NEXT: ushll v17.2d, v0.2s, #0
-; CHECK-SD-NEXT: ushll v3.2d, v3.2s, #0
+; CHECK-SD-NEXT: ushll v17.2d, v1.2s, #0
+; CHECK-SD-NEXT: ushll v18.2d, v0.2s, #0
; CHECK-SD-NEXT: ushll v4.2d, v4.2s, #0
-; CHECK-SD-NEXT: ushll v18.2d, v2.2s, #0
+; CHECK-SD-NEXT: ushll v3.2d, v3.2s, #0
+; CHECK-SD-NEXT: ushll v2.2d, v2.2s, #0
; CHECK-SD-NEXT: ushll v5.2d, v5.2s, #0
; CHECK-SD-NEXT: ushll v6.2d, v6.2s, #0
; CHECK-SD-NEXT: ushll v7.2d, v7.2s, #0
-; CHECK-SD-NEXT: and v0.16b, v1.16b, v16.16b
-; CHECK-SD-NEXT: and v1.16b, v17.16b, v16.16b
-; CHECK-SD-NEXT: and v2.16b, v3.16b, v16.16b
-; CHECK-SD-NEXT: and v3.16b, v4.16b, v16.16b
-; CHECK-SD-NEXT: and v4.16b, v18.16b, v16.16b
+; CHECK-SD-NEXT: and v0.16b, v4.16b, v16.16b
+; CHECK-SD-NEXT: and v1.16b, v3.16b, v16.16b
+; CHECK-SD-NEXT: and v4.16b, v2.16b, v16.16b
+; CHECK-SD-NEXT: and v2.16b, v17.16b, v16.16b
+; CHECK-SD-NEXT: and v3.16b, v18.16b, v16.16b
; CHECK-SD-NEXT: and v5.16b, v5.16b, v16.16b
; CHECK-SD-NEXT: and v6.16b, v6.16b, v16.16b
; CHECK-SD-NEXT: and v7.16b, v7.16b, v16.16b
>From a8aaad464f36e1c3a33bc2f45e594de31637cd99 Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Wed, 21 Jan 2026 15:42:30 +0000
Subject: [PATCH 6/8] clean-up trigger conditions
---
llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index b1377aeaaa69c..be79aab5357dd 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -7808,9 +7808,8 @@ void AArch64DAGToDAGISel::PreprocessISelDAG() {
switch (N.getOpcode()) {
case ISD::SCALAR_TO_VECTOR: {
EVT VT = N.getValueType(0);
- if (!VT.isVector() || VT.isScalableVector() || !VT.isInteger())
- break;
- if (VT.getVectorElementType() != N.getOperand(0).getValueType())
+ if (VT.isScalableVector() || !VT.isInteger() ||
+ VT.getVectorElementType() != N.getOperand(0).getValueType())
break;
Result = addBitcastHints(*CurDAG, N);
>From 1d19c22fcf7e9075c61c07b7d9511ab6e1c7a799 Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Thu, 22 Jan 2026 11:51:25 +0000
Subject: [PATCH 7/8] Adjust check for adding bitcast
---
llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index be79aab5357dd..c32a59e8af447 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -7807,12 +7807,10 @@ void AArch64DAGToDAGISel::PreprocessISelDAG() {
SDValue Result;
switch (N.getOpcode()) {
case ISD::SCALAR_TO_VECTOR: {
- EVT VT = N.getValueType(0);
- if (VT.isScalableVector() || !VT.isInteger() ||
- VT.getVectorElementType() != N.getOperand(0).getValueType())
- break;
-
- Result = addBitcastHints(*CurDAG, N);
+ EVT ScalarTy = N.getValueType(0).getVectorElementType();
+ if ((ScalarTy == MVT::i32 || ScalarTy == MVT::i64) && ScalarTy == N.getOperand(0).getValueType())
+ Result = addBitcastHints(*CurDAG, N);
+
break;
}
default:
>From 55a16ead7577205167b166989fbb9b2e92b2bdd2 Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Thu, 22 Jan 2026 12:40:30 +0000
Subject: [PATCH 8/8] Fix interface
---
llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index b7d807557f29a..fb0be3e812a96 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -537,10 +537,11 @@ INITIALIZE_PASS(AArch64DAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
/// to help instruction selector determine which operands are in Neon registers.
static SDValue addBitcastHints(SelectionDAG &DAG, SDNode &N) {
SDLoc DL(&N);
- auto getFloatVT = [](EVT VT) {
+ auto getFloatVT = [&](EVT VT) {
EVT ScalarVT = VT.getScalarType();
assert((ScalarVT == MVT::i32 || ScalarVT == MVT::i64) && "Unexpected VT");
- return VT.changeElementType(ScalarVT == MVT::i32 ? MVT::f32 : MVT::f64);
+ return VT.changeElementType(*(DAG.getContext()),
+ ScalarVT == MVT::i32 ? MVT::f32 : MVT::f64);
};
auto bitcastToFloat = [&](SDValue Val) {
return DAG.getBitcast(getFloatVT(Val.getValueType()), Val);
@@ -7828,7 +7829,7 @@ void AArch64DAGToDAGISel::PreprocessISelDAG() {
EVT ScalarTy = N.getValueType(0).getVectorElementType();
if ((ScalarTy == MVT::i32 || ScalarTy == MVT::i64) && ScalarTy == N.getOperand(0).getValueType())
Result = addBitcastHints(*CurDAG, N);
-
+
break;
}
default:
More information about the llvm-commits
mailing list