[llvm] [AArch64] Don't try to vectorize fixed point to fp narrowing conversion (PR #130665)
Pranav Kant via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 24 13:50:40 PDT 2025
https://github.com/pranavk updated https://github.com/llvm/llvm-project/pull/130665
>From 11f13142fbe99e175884687304bfbbeaf8495310 Mon Sep 17 00:00:00 2001
From: Pranav Kant <prka at google.com>
Date: Mon, 24 Mar 2025 17:27:32 +0000
Subject: [PATCH 1/2] init
---
.../Target/AArch64/AArch64ISelLowering.cpp | 23 +++++++++++++++++++
1 file changed, 23 insertions(+)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index d0f51b73a4a44..1c8e3afdfd718 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -5106,6 +5106,29 @@ SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
uint64_t VTSize = VT.getFixedSizeInBits();
uint64_t InVTSize = InVT.getFixedSizeInBits();
if (VTSize < InVTSize) {
+ // AArch64 doesn't have a direct vector instruction to convert
+ // fixed point to floating point AND narrow it at the same time.
+ // Additional rounding when the target is f32/f64 causes double
+ // rounding issues. Conversion to f16 is fine due to narrow width.
+ bool IsTargetf32 = VT.getVectorElementType() == MVT::f32;
+ bool IsTargetf16 = false;
+ if (Op.hasOneUse() &&
+ Op->user_begin()->getOpcode() == ISD::CONCAT_VECTORS) {
+ // Some vector types are split during legalization into half, followed by
+ // concatenation, followed by rounding to the original vector type. If we
+ // end up resolving to f16 type, we shouldn't worry about rounding errors.
+ SDNode *U = *Op->user_begin();
+ if (U->hasOneUse() && U->user_begin()->getOpcode() == ISD::FP_ROUND) {
+ EVT TmpVT = U->user_begin()->getValueType(0);
+ if (TmpVT.getScalarType() == MVT::f16)
+ IsTargetf16 = true;
+ }
+ }
+
+ if (IsTargetf32 && !IsTargetf16) {
+ return !IsStrict ? DAG.UnrollVectorOp(Op.getNode()) : SDValue();
+ }
+
MVT CastVT =
MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()),
InVT.getVectorNumElements());
>From 04aafe43bbaeac48040d02dd24271aa3256133bd Mon Sep 17 00:00:00 2001
From: Pranav Kant <prka at google.com>
Date: Mon, 24 Mar 2025 17:51:51 +0000
Subject: [PATCH 2/2] modify tests
---
.../aarch64-neon-vector-insert-uaddlv.ll | 32 +-
.../CodeGen/AArch64/arm64-convert-v4f64.ll | 33 +-
.../CodeGen/AArch64/bf16-v4-instructions.ll | 100 +-
.../CodeGen/AArch64/bf16-v8-instructions.ll | 190 ++--
.../test/CodeGen/AArch64/complex-int-to-fp.ll | 25 +-
.../fold-int-pow2-with-fmul-or-fdiv.ll | 11 +-
.../CodeGen/AArch64/fp-intrinsics-vector.ll | 51 +-
llvm/test/CodeGen/AArch64/fprcvt-cvtf.ll | 26 +-
llvm/test/CodeGen/AArch64/itofp-bf16.ll | 970 +++++++++++-------
llvm/test/CodeGen/AArch64/itofp.ll | 622 ++++++++---
.../AArch64/sve-fixed-length-int-to-fp.ll | 30 +-
llvm/test/CodeGen/AArch64/vector-fcvt.ll | 62 +-
12 files changed, 1437 insertions(+), 715 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll b/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll
index b357a24f892ff..91eda8d552397 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll
@@ -148,9 +148,9 @@ define void @insert_vec_v6i64_uaddlv_from_v4i32(ptr %0) {
; CHECK-NEXT: movi.2d v0, #0000000000000000
; CHECK-NEXT: str xzr, [x0, #16]
; CHECK-NEXT: uaddlv.4s d1, v0
-; CHECK-NEXT: mov.d v0[0], v1[0]
-; CHECK-NEXT: ucvtf.2d v0, v0
-; CHECK-NEXT: fcvtn v0.2s, v0.2d
+; CHECK-NEXT: fmov x8, d1
+; CHECK-NEXT: ucvtf s1, x8
+; CHECK-NEXT: mov.s v0[0], v1[0]
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
@@ -166,10 +166,11 @@ define void @insert_vec_v2i64_uaddlv_from_v4i32(ptr %0) {
; CHECK-LABEL: insert_vec_v2i64_uaddlv_from_v4i32:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: movi.2d v0, #0000000000000000
-; CHECK-NEXT: uaddlv.4s d1, v0
-; CHECK-NEXT: mov.d v0[0], v1[0]
-; CHECK-NEXT: ucvtf.2d v0, v0
-; CHECK-NEXT: fcvtn v0.2s, v0.2d
+; CHECK-NEXT: uaddlv.4s d0, v0
+; CHECK-NEXT: fmov x8, d0
+; CHECK-NEXT: movi d0, #0000000000000000
+; CHECK-NEXT: ucvtf s1, x8
+; CHECK-NEXT: mov.s v0[0], v1[0]
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
@@ -187,9 +188,9 @@ define void @insert_vec_v5i64_uaddlv_from_v4i32(ptr %0) {
; CHECK-NEXT: movi.2d v0, #0000000000000000
; CHECK-NEXT: str wzr, [x0, #16]
; CHECK-NEXT: uaddlv.4s d1, v0
-; CHECK-NEXT: mov.d v0[0], v1[0]
-; CHECK-NEXT: ucvtf.2d v0, v0
-; CHECK-NEXT: fcvtn v0.2s, v0.2d
+; CHECK-NEXT: fmov x8, d1
+; CHECK-NEXT: ucvtf s1, x8
+; CHECK-NEXT: mov.s v0[0], v1[0]
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
@@ -254,9 +255,14 @@ define void @insert_vec_v16i64_uaddlv_from_v4i16(ptr %0) {
; CHECK-NEXT: uaddlv.4h s1, v0
; CHECK-NEXT: stp q0, q0, [x0, #32]
; CHECK-NEXT: mov.s v2[0], v1[0]
-; CHECK-NEXT: ucvtf.2d v1, v2
-; CHECK-NEXT: fcvtn v1.2s, v1.2d
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: fmov x8, d2
+; CHECK-NEXT: mov.d x9, v2[1]
+; CHECK-NEXT: movi.2d v2, #0000000000000000
+; CHECK-NEXT: ucvtf s1, x8
+; CHECK-NEXT: ucvtf s3, x9
+; CHECK-NEXT: mov.s v2[0], v1[0]
+; CHECK-NEXT: mov.s v2[1], v3[0]
+; CHECK-NEXT: stp q2, q0, [x0]
; CHECK-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/AArch64/arm64-convert-v4f64.ll b/llvm/test/CodeGen/AArch64/arm64-convert-v4f64.ll
index 508f68d6f14d4..2b9e334cc7812 100644
--- a/llvm/test/CodeGen/AArch64/arm64-convert-v4f64.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-convert-v4f64.ll
@@ -53,20 +53,27 @@ define <4 x half> @uitofp_v4i64_to_v4f16(ptr %ptr) {
define <4 x bfloat> @uitofp_v4i64_to_v4bf16(ptr %ptr) {
; CHECK-LABEL: uitofp_v4i64_to_v4bf16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: ldp q0, q2, [x0]
+; CHECK-NEXT: mov x8, v0.d[1]
+; CHECK-NEXT: fmov x9, d0
+; CHECK-NEXT: ucvtf s1, x9
+; CHECK-NEXT: mov x9, v2.d[1]
+; CHECK-NEXT: ucvtf s0, x8
+; CHECK-NEXT: fmov x8, d2
+; CHECK-NEXT: ucvtf s2, x8
+; CHECK-NEXT: mov v1.s[1], v0.s[0]
+; CHECK-NEXT: ucvtf s0, x9
+; CHECK-NEXT: mov v1.s[2], v2.s[0]
; CHECK-NEXT: movi v2.4s, #127, msl #8
-; CHECK-NEXT: ucvtf v0.2d, v0.2d
-; CHECK-NEXT: ucvtf v1.2d, v1.2d
-; CHECK-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-NEXT: fcvtn2 v0.4s, v1.2d
-; CHECK-NEXT: movi v1.4s, #1
-; CHECK-NEXT: ushr v3.4s, v0.4s, #16
-; CHECK-NEXT: add v2.4s, v0.4s, v2.4s
-; CHECK-NEXT: and v1.16b, v3.16b, v1.16b
-; CHECK-NEXT: fcmeq v3.4s, v0.4s, v0.4s
-; CHECK-NEXT: orr v0.4s, #64, lsl #16
-; CHECK-NEXT: add v1.4s, v1.4s, v2.4s
-; CHECK-NEXT: bit v0.16b, v1.16b, v3.16b
+; CHECK-NEXT: mov v1.s[3], v0.s[0]
+; CHECK-NEXT: movi v0.4s, #1
+; CHECK-NEXT: ushr v3.4s, v1.4s, #16
+; CHECK-NEXT: add v2.4s, v1.4s, v2.4s
+; CHECK-NEXT: and v0.16b, v3.16b, v0.16b
+; CHECK-NEXT: fcmeq v3.4s, v1.4s, v1.4s
+; CHECK-NEXT: orr v1.4s, #64, lsl #16
+; CHECK-NEXT: add v0.4s, v0.4s, v2.4s
+; CHECK-NEXT: bif v0.16b, v1.16b, v3.16b
; CHECK-NEXT: shrn v0.4h, v0.4s, #16
; CHECK-NEXT: ret
%tmp1 = load <4 x i64>, ptr %ptr
diff --git a/llvm/test/CodeGen/AArch64/bf16-v4-instructions.ll b/llvm/test/CodeGen/AArch64/bf16-v4-instructions.ll
index 1cd0294b0083e..e185da3093645 100644
--- a/llvm/test/CodeGen/AArch64/bf16-v4-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/bf16-v4-instructions.ll
@@ -310,29 +310,43 @@ define <4 x bfloat> @sitofp_i32(<4 x i32> %a) #0 {
define <4 x bfloat> @sitofp_i64(<4 x i64> %a) #0 {
; CHECK-CVT-LABEL: sitofp_i64:
; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: scvtf v0.2d, v0.2d
-; CHECK-CVT-NEXT: scvtf v1.2d, v1.2d
-; CHECK-CVT-NEXT: movi v2.4s, #127, msl #8
-; CHECK-CVT-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-CVT-NEXT: fcvtn2 v0.4s, v1.2d
-; CHECK-CVT-NEXT: movi v1.4s, #1
-; CHECK-CVT-NEXT: ushr v3.4s, v0.4s, #16
-; CHECK-CVT-NEXT: add v2.4s, v0.4s, v2.4s
-; CHECK-CVT-NEXT: and v1.16b, v3.16b, v1.16b
-; CHECK-CVT-NEXT: fcmeq v3.4s, v0.4s, v0.4s
-; CHECK-CVT-NEXT: orr v0.4s, #64, lsl #16
-; CHECK-CVT-NEXT: add v1.4s, v1.4s, v2.4s
-; CHECK-CVT-NEXT: bit v0.16b, v1.16b, v3.16b
+; CHECK-CVT-NEXT: mov x8, v0.d[1]
+; CHECK-CVT-NEXT: fmov x9, d0
+; CHECK-CVT-NEXT: scvtf s2, x9
+; CHECK-CVT-NEXT: mov x9, v1.d[1]
+; CHECK-CVT-NEXT: scvtf s0, x8
+; CHECK-CVT-NEXT: fmov x8, d1
+; CHECK-CVT-NEXT: scvtf s1, x8
+; CHECK-CVT-NEXT: mov v2.s[1], v0.s[0]
+; CHECK-CVT-NEXT: scvtf s0, x9
+; CHECK-CVT-NEXT: mov v2.s[2], v1.s[0]
+; CHECK-CVT-NEXT: movi v1.4s, #127, msl #8
+; CHECK-CVT-NEXT: mov v2.s[3], v0.s[0]
+; CHECK-CVT-NEXT: movi v0.4s, #1
+; CHECK-CVT-NEXT: ushr v3.4s, v2.4s, #16
+; CHECK-CVT-NEXT: add v1.4s, v2.4s, v1.4s
+; CHECK-CVT-NEXT: and v0.16b, v3.16b, v0.16b
+; CHECK-CVT-NEXT: fcmeq v3.4s, v2.4s, v2.4s
+; CHECK-CVT-NEXT: orr v2.4s, #64, lsl #16
+; CHECK-CVT-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-CVT-NEXT: bif v0.16b, v2.16b, v3.16b
; CHECK-CVT-NEXT: shrn v0.4h, v0.4s, #16
; CHECK-CVT-NEXT: ret
;
; CHECK-BF16-LABEL: sitofp_i64:
; CHECK-BF16: // %bb.0:
-; CHECK-BF16-NEXT: scvtf v0.2d, v0.2d
-; CHECK-BF16-NEXT: scvtf v1.2d, v1.2d
-; CHECK-BF16-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-BF16-NEXT: fcvtn2 v0.4s, v1.2d
-; CHECK-BF16-NEXT: bfcvtn v0.4h, v0.4s
+; CHECK-BF16-NEXT: mov x8, v0.d[1]
+; CHECK-BF16-NEXT: fmov x9, d0
+; CHECK-BF16-NEXT: scvtf s2, x9
+; CHECK-BF16-NEXT: mov x9, v1.d[1]
+; CHECK-BF16-NEXT: scvtf s0, x8
+; CHECK-BF16-NEXT: fmov x8, d1
+; CHECK-BF16-NEXT: mov v2.s[1], v0.s[0]
+; CHECK-BF16-NEXT: scvtf s0, x8
+; CHECK-BF16-NEXT: mov v2.s[2], v0.s[0]
+; CHECK-BF16-NEXT: scvtf s0, x9
+; CHECK-BF16-NEXT: mov v2.s[3], v0.s[0]
+; CHECK-BF16-NEXT: bfcvtn v0.4h, v2.4s
; CHECK-BF16-NEXT: ret
%1 = sitofp <4 x i64> %a to <4 x bfloat>
ret <4 x bfloat> %1
@@ -413,29 +427,43 @@ define <4 x bfloat> @uitofp_i32(<4 x i32> %a) #0 {
define <4 x bfloat> @uitofp_i64(<4 x i64> %a) #0 {
; CHECK-CVT-LABEL: uitofp_i64:
; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: ucvtf v0.2d, v0.2d
-; CHECK-CVT-NEXT: ucvtf v1.2d, v1.2d
-; CHECK-CVT-NEXT: movi v2.4s, #127, msl #8
-; CHECK-CVT-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-CVT-NEXT: fcvtn2 v0.4s, v1.2d
-; CHECK-CVT-NEXT: movi v1.4s, #1
-; CHECK-CVT-NEXT: ushr v3.4s, v0.4s, #16
-; CHECK-CVT-NEXT: add v2.4s, v0.4s, v2.4s
-; CHECK-CVT-NEXT: and v1.16b, v3.16b, v1.16b
-; CHECK-CVT-NEXT: fcmeq v3.4s, v0.4s, v0.4s
-; CHECK-CVT-NEXT: orr v0.4s, #64, lsl #16
-; CHECK-CVT-NEXT: add v1.4s, v1.4s, v2.4s
-; CHECK-CVT-NEXT: bit v0.16b, v1.16b, v3.16b
+; CHECK-CVT-NEXT: mov x8, v0.d[1]
+; CHECK-CVT-NEXT: fmov x9, d0
+; CHECK-CVT-NEXT: ucvtf s2, x9
+; CHECK-CVT-NEXT: mov x9, v1.d[1]
+; CHECK-CVT-NEXT: ucvtf s0, x8
+; CHECK-CVT-NEXT: fmov x8, d1
+; CHECK-CVT-NEXT: ucvtf s1, x8
+; CHECK-CVT-NEXT: mov v2.s[1], v0.s[0]
+; CHECK-CVT-NEXT: ucvtf s0, x9
+; CHECK-CVT-NEXT: mov v2.s[2], v1.s[0]
+; CHECK-CVT-NEXT: movi v1.4s, #127, msl #8
+; CHECK-CVT-NEXT: mov v2.s[3], v0.s[0]
+; CHECK-CVT-NEXT: movi v0.4s, #1
+; CHECK-CVT-NEXT: ushr v3.4s, v2.4s, #16
+; CHECK-CVT-NEXT: add v1.4s, v2.4s, v1.4s
+; CHECK-CVT-NEXT: and v0.16b, v3.16b, v0.16b
+; CHECK-CVT-NEXT: fcmeq v3.4s, v2.4s, v2.4s
+; CHECK-CVT-NEXT: orr v2.4s, #64, lsl #16
+; CHECK-CVT-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-CVT-NEXT: bif v0.16b, v2.16b, v3.16b
; CHECK-CVT-NEXT: shrn v0.4h, v0.4s, #16
; CHECK-CVT-NEXT: ret
;
; CHECK-BF16-LABEL: uitofp_i64:
; CHECK-BF16: // %bb.0:
-; CHECK-BF16-NEXT: ucvtf v0.2d, v0.2d
-; CHECK-BF16-NEXT: ucvtf v1.2d, v1.2d
-; CHECK-BF16-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-BF16-NEXT: fcvtn2 v0.4s, v1.2d
-; CHECK-BF16-NEXT: bfcvtn v0.4h, v0.4s
+; CHECK-BF16-NEXT: mov x8, v0.d[1]
+; CHECK-BF16-NEXT: fmov x9, d0
+; CHECK-BF16-NEXT: ucvtf s2, x9
+; CHECK-BF16-NEXT: mov x9, v1.d[1]
+; CHECK-BF16-NEXT: ucvtf s0, x8
+; CHECK-BF16-NEXT: fmov x8, d1
+; CHECK-BF16-NEXT: mov v2.s[1], v0.s[0]
+; CHECK-BF16-NEXT: ucvtf s0, x8
+; CHECK-BF16-NEXT: mov v2.s[2], v0.s[0]
+; CHECK-BF16-NEXT: ucvtf s0, x9
+; CHECK-BF16-NEXT: mov v2.s[3], v0.s[0]
+; CHECK-BF16-NEXT: bfcvtn v0.4h, v2.4s
; CHECK-BF16-NEXT: ret
%1 = uitofp <4 x i64> %a to <4 x bfloat>
ret <4 x bfloat> %1
diff --git a/llvm/test/CodeGen/AArch64/bf16-v8-instructions.ll b/llvm/test/CodeGen/AArch64/bf16-v8-instructions.ll
index 2eaa58de92807..3a55b68f2d1a3 100644
--- a/llvm/test/CodeGen/AArch64/bf16-v8-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/bf16-v8-instructions.ll
@@ -489,45 +489,74 @@ define <8 x bfloat> @sitofp_i32(<8 x i32> %a) #0 {
define <8 x bfloat> @sitofp_i64(<8 x i64> %a) #0 {
; CHECK-CVT-LABEL: sitofp_i64:
; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: scvtf v2.2d, v2.2d
-; CHECK-CVT-NEXT: scvtf v0.2d, v0.2d
-; CHECK-CVT-NEXT: scvtf v3.2d, v3.2d
-; CHECK-CVT-NEXT: scvtf v1.2d, v1.2d
-; CHECK-CVT-NEXT: fcvtn v2.2s, v2.2d
-; CHECK-CVT-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-CVT-NEXT: fcvtn2 v2.4s, v3.2d
-; CHECK-CVT-NEXT: fcvtn2 v0.4s, v1.2d
-; CHECK-CVT-NEXT: movi v1.4s, #1
-; CHECK-CVT-NEXT: movi v3.4s, #127, msl #8
-; CHECK-CVT-NEXT: ushr v4.4s, v2.4s, #16
-; CHECK-CVT-NEXT: ushr v5.4s, v0.4s, #16
-; CHECK-CVT-NEXT: add v6.4s, v2.4s, v3.4s
-; CHECK-CVT-NEXT: add v3.4s, v0.4s, v3.4s
-; CHECK-CVT-NEXT: and v4.16b, v4.16b, v1.16b
-; CHECK-CVT-NEXT: and v1.16b, v5.16b, v1.16b
+; CHECK-CVT-NEXT: fmov x10, d2
+; CHECK-CVT-NEXT: mov x8, v2.d[1]
+; CHECK-CVT-NEXT: mov x9, v0.d[1]
+; CHECK-CVT-NEXT: scvtf s2, x10
+; CHECK-CVT-NEXT: fmov x10, d0
+; CHECK-CVT-NEXT: scvtf s0, x8
+; CHECK-CVT-NEXT: scvtf s5, x9
+; CHECK-CVT-NEXT: fmov x9, d3
+; CHECK-CVT-NEXT: mov x8, v3.d[1]
+; CHECK-CVT-NEXT: scvtf s4, x10
+; CHECK-CVT-NEXT: fmov x10, d1
+; CHECK-CVT-NEXT: scvtf s3, x9
+; CHECK-CVT-NEXT: mov x9, v1.d[1]
+; CHECK-CVT-NEXT: mov v2.s[1], v0.s[0]
+; CHECK-CVT-NEXT: scvtf s0, x10
+; CHECK-CVT-NEXT: scvtf s1, x8
+; CHECK-CVT-NEXT: mov v4.s[1], v5.s[0]
+; CHECK-CVT-NEXT: mov v2.s[2], v3.s[0]
+; CHECK-CVT-NEXT: scvtf s3, x9
+; CHECK-CVT-NEXT: mov v4.s[2], v0.s[0]
+; CHECK-CVT-NEXT: movi v0.4s, #1
+; CHECK-CVT-NEXT: mov v2.s[3], v1.s[0]
+; CHECK-CVT-NEXT: movi v1.4s, #127, msl #8
+; CHECK-CVT-NEXT: mov v4.s[3], v3.s[0]
+; CHECK-CVT-NEXT: ushr v3.4s, v2.4s, #16
+; CHECK-CVT-NEXT: add v6.4s, v2.4s, v1.4s
+; CHECK-CVT-NEXT: ushr v5.4s, v4.4s, #16
+; CHECK-CVT-NEXT: add v1.4s, v4.4s, v1.4s
+; CHECK-CVT-NEXT: and v3.16b, v3.16b, v0.16b
+; CHECK-CVT-NEXT: and v0.16b, v5.16b, v0.16b
; CHECK-CVT-NEXT: fcmeq v5.4s, v2.4s, v2.4s
; CHECK-CVT-NEXT: orr v2.4s, #64, lsl #16
-; CHECK-CVT-NEXT: add v4.4s, v4.4s, v6.4s
-; CHECK-CVT-NEXT: fcmeq v6.4s, v0.4s, v0.4s
-; CHECK-CVT-NEXT: add v1.4s, v1.4s, v3.4s
-; CHECK-CVT-NEXT: orr v0.4s, #64, lsl #16
-; CHECK-CVT-NEXT: bit v2.16b, v4.16b, v5.16b
-; CHECK-CVT-NEXT: bit v0.16b, v1.16b, v6.16b
-; CHECK-CVT-NEXT: uzp2 v0.8h, v0.8h, v2.8h
+; CHECK-CVT-NEXT: add v3.4s, v3.4s, v6.4s
+; CHECK-CVT-NEXT: fcmeq v6.4s, v4.4s, v4.4s
+; CHECK-CVT-NEXT: orr v4.4s, #64, lsl #16
+; CHECK-CVT-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-CVT-NEXT: mov v1.16b, v5.16b
+; CHECK-CVT-NEXT: bif v0.16b, v4.16b, v6.16b
+; CHECK-CVT-NEXT: bsl v1.16b, v3.16b, v2.16b
+; CHECK-CVT-NEXT: uzp2 v0.8h, v0.8h, v1.8h
; CHECK-CVT-NEXT: ret
;
; CHECK-BF16-LABEL: sitofp_i64:
; CHECK-BF16: // %bb.0:
-; CHECK-BF16-NEXT: scvtf v0.2d, v0.2d
-; CHECK-BF16-NEXT: scvtf v2.2d, v2.2d
-; CHECK-BF16-NEXT: scvtf v1.2d, v1.2d
-; CHECK-BF16-NEXT: scvtf v3.2d, v3.2d
-; CHECK-BF16-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-BF16-NEXT: fcvtn v2.2s, v2.2d
-; CHECK-BF16-NEXT: fcvtn2 v0.4s, v1.2d
-; CHECK-BF16-NEXT: fcvtn2 v2.4s, v3.2d
-; CHECK-BF16-NEXT: bfcvtn v0.4h, v0.4s
-; CHECK-BF16-NEXT: bfcvtn2 v0.8h, v2.4s
+; CHECK-BF16-NEXT: mov x9, v0.d[1]
+; CHECK-BF16-NEXT: fmov x10, d0
+; CHECK-BF16-NEXT: mov x8, v2.d[1]
+; CHECK-BF16-NEXT: scvtf s4, x10
+; CHECK-BF16-NEXT: fmov x10, d1
+; CHECK-BF16-NEXT: scvtf s0, x9
+; CHECK-BF16-NEXT: fmov x9, d2
+; CHECK-BF16-NEXT: scvtf s2, x8
+; CHECK-BF16-NEXT: mov x8, v1.d[1]
+; CHECK-BF16-NEXT: scvtf s1, x9
+; CHECK-BF16-NEXT: fmov x9, d3
+; CHECK-BF16-NEXT: mov v4.s[1], v0.s[0]
+; CHECK-BF16-NEXT: scvtf s0, x10
+; CHECK-BF16-NEXT: mov x10, v3.d[1]
+; CHECK-BF16-NEXT: scvtf s3, x9
+; CHECK-BF16-NEXT: mov v1.s[1], v2.s[0]
+; CHECK-BF16-NEXT: scvtf s2, x8
+; CHECK-BF16-NEXT: mov v4.s[2], v0.s[0]
+; CHECK-BF16-NEXT: scvtf s0, x10
+; CHECK-BF16-NEXT: mov v1.s[2], v3.s[0]
+; CHECK-BF16-NEXT: mov v4.s[3], v2.s[0]
+; CHECK-BF16-NEXT: mov v1.s[3], v0.s[0]
+; CHECK-BF16-NEXT: bfcvtn v0.4h, v4.4s
+; CHECK-BF16-NEXT: bfcvtn2 v0.8h, v1.4s
; CHECK-BF16-NEXT: ret
%1 = sitofp <8 x i64> %a to <8 x bfloat>
ret <8 x bfloat> %1
@@ -712,45 +741,74 @@ define <8 x bfloat> @uitofp_i32(<8 x i32> %a) #0 {
define <8 x bfloat> @uitofp_i64(<8 x i64> %a) #0 {
; CHECK-CVT-LABEL: uitofp_i64:
; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: ucvtf v2.2d, v2.2d
-; CHECK-CVT-NEXT: ucvtf v0.2d, v0.2d
-; CHECK-CVT-NEXT: ucvtf v3.2d, v3.2d
-; CHECK-CVT-NEXT: ucvtf v1.2d, v1.2d
-; CHECK-CVT-NEXT: fcvtn v2.2s, v2.2d
-; CHECK-CVT-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-CVT-NEXT: fcvtn2 v2.4s, v3.2d
-; CHECK-CVT-NEXT: fcvtn2 v0.4s, v1.2d
-; CHECK-CVT-NEXT: movi v1.4s, #1
-; CHECK-CVT-NEXT: movi v3.4s, #127, msl #8
-; CHECK-CVT-NEXT: ushr v4.4s, v2.4s, #16
-; CHECK-CVT-NEXT: ushr v5.4s, v0.4s, #16
-; CHECK-CVT-NEXT: add v6.4s, v2.4s, v3.4s
-; CHECK-CVT-NEXT: add v3.4s, v0.4s, v3.4s
-; CHECK-CVT-NEXT: and v4.16b, v4.16b, v1.16b
-; CHECK-CVT-NEXT: and v1.16b, v5.16b, v1.16b
+; CHECK-CVT-NEXT: fmov x10, d2
+; CHECK-CVT-NEXT: mov x8, v2.d[1]
+; CHECK-CVT-NEXT: mov x9, v0.d[1]
+; CHECK-CVT-NEXT: ucvtf s2, x10
+; CHECK-CVT-NEXT: fmov x10, d0
+; CHECK-CVT-NEXT: ucvtf s0, x8
+; CHECK-CVT-NEXT: ucvtf s5, x9
+; CHECK-CVT-NEXT: fmov x9, d3
+; CHECK-CVT-NEXT: mov x8, v3.d[1]
+; CHECK-CVT-NEXT: ucvtf s4, x10
+; CHECK-CVT-NEXT: fmov x10, d1
+; CHECK-CVT-NEXT: ucvtf s3, x9
+; CHECK-CVT-NEXT: mov x9, v1.d[1]
+; CHECK-CVT-NEXT: mov v2.s[1], v0.s[0]
+; CHECK-CVT-NEXT: ucvtf s0, x10
+; CHECK-CVT-NEXT: ucvtf s1, x8
+; CHECK-CVT-NEXT: mov v4.s[1], v5.s[0]
+; CHECK-CVT-NEXT: mov v2.s[2], v3.s[0]
+; CHECK-CVT-NEXT: ucvtf s3, x9
+; CHECK-CVT-NEXT: mov v4.s[2], v0.s[0]
+; CHECK-CVT-NEXT: movi v0.4s, #1
+; CHECK-CVT-NEXT: mov v2.s[3], v1.s[0]
+; CHECK-CVT-NEXT: movi v1.4s, #127, msl #8
+; CHECK-CVT-NEXT: mov v4.s[3], v3.s[0]
+; CHECK-CVT-NEXT: ushr v3.4s, v2.4s, #16
+; CHECK-CVT-NEXT: add v6.4s, v2.4s, v1.4s
+; CHECK-CVT-NEXT: ushr v5.4s, v4.4s, #16
+; CHECK-CVT-NEXT: add v1.4s, v4.4s, v1.4s
+; CHECK-CVT-NEXT: and v3.16b, v3.16b, v0.16b
+; CHECK-CVT-NEXT: and v0.16b, v5.16b, v0.16b
; CHECK-CVT-NEXT: fcmeq v5.4s, v2.4s, v2.4s
; CHECK-CVT-NEXT: orr v2.4s, #64, lsl #16
-; CHECK-CVT-NEXT: add v4.4s, v4.4s, v6.4s
-; CHECK-CVT-NEXT: fcmeq v6.4s, v0.4s, v0.4s
-; CHECK-CVT-NEXT: add v1.4s, v1.4s, v3.4s
-; CHECK-CVT-NEXT: orr v0.4s, #64, lsl #16
-; CHECK-CVT-NEXT: bit v2.16b, v4.16b, v5.16b
-; CHECK-CVT-NEXT: bit v0.16b, v1.16b, v6.16b
-; CHECK-CVT-NEXT: uzp2 v0.8h, v0.8h, v2.8h
+; CHECK-CVT-NEXT: add v3.4s, v3.4s, v6.4s
+; CHECK-CVT-NEXT: fcmeq v6.4s, v4.4s, v4.4s
+; CHECK-CVT-NEXT: orr v4.4s, #64, lsl #16
+; CHECK-CVT-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-CVT-NEXT: mov v1.16b, v5.16b
+; CHECK-CVT-NEXT: bif v0.16b, v4.16b, v6.16b
+; CHECK-CVT-NEXT: bsl v1.16b, v3.16b, v2.16b
+; CHECK-CVT-NEXT: uzp2 v0.8h, v0.8h, v1.8h
; CHECK-CVT-NEXT: ret
;
; CHECK-BF16-LABEL: uitofp_i64:
; CHECK-BF16: // %bb.0:
-; CHECK-BF16-NEXT: ucvtf v0.2d, v0.2d
-; CHECK-BF16-NEXT: ucvtf v2.2d, v2.2d
-; CHECK-BF16-NEXT: ucvtf v1.2d, v1.2d
-; CHECK-BF16-NEXT: ucvtf v3.2d, v3.2d
-; CHECK-BF16-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-BF16-NEXT: fcvtn v2.2s, v2.2d
-; CHECK-BF16-NEXT: fcvtn2 v0.4s, v1.2d
-; CHECK-BF16-NEXT: fcvtn2 v2.4s, v3.2d
-; CHECK-BF16-NEXT: bfcvtn v0.4h, v0.4s
-; CHECK-BF16-NEXT: bfcvtn2 v0.8h, v2.4s
+; CHECK-BF16-NEXT: mov x9, v0.d[1]
+; CHECK-BF16-NEXT: fmov x10, d0
+; CHECK-BF16-NEXT: mov x8, v2.d[1]
+; CHECK-BF16-NEXT: ucvtf s4, x10
+; CHECK-BF16-NEXT: fmov x10, d1
+; CHECK-BF16-NEXT: ucvtf s0, x9
+; CHECK-BF16-NEXT: fmov x9, d2
+; CHECK-BF16-NEXT: ucvtf s2, x8
+; CHECK-BF16-NEXT: mov x8, v1.d[1]
+; CHECK-BF16-NEXT: ucvtf s1, x9
+; CHECK-BF16-NEXT: fmov x9, d3
+; CHECK-BF16-NEXT: mov v4.s[1], v0.s[0]
+; CHECK-BF16-NEXT: ucvtf s0, x10
+; CHECK-BF16-NEXT: mov x10, v3.d[1]
+; CHECK-BF16-NEXT: ucvtf s3, x9
+; CHECK-BF16-NEXT: mov v1.s[1], v2.s[0]
+; CHECK-BF16-NEXT: ucvtf s2, x8
+; CHECK-BF16-NEXT: mov v4.s[2], v0.s[0]
+; CHECK-BF16-NEXT: ucvtf s0, x10
+; CHECK-BF16-NEXT: mov v1.s[2], v3.s[0]
+; CHECK-BF16-NEXT: mov v4.s[3], v2.s[0]
+; CHECK-BF16-NEXT: mov v1.s[3], v0.s[0]
+; CHECK-BF16-NEXT: bfcvtn v0.4h, v4.4s
+; CHECK-BF16-NEXT: bfcvtn2 v0.8h, v1.4s
; CHECK-BF16-NEXT: ret
%1 = uitofp <8 x i64> %a to <8 x bfloat>
ret <8 x bfloat> %1
diff --git a/llvm/test/CodeGen/AArch64/complex-int-to-fp.ll b/llvm/test/CodeGen/AArch64/complex-int-to-fp.ll
index ec504b4782547..baca159f9dd55 100644
--- a/llvm/test/CodeGen/AArch64/complex-int-to-fp.ll
+++ b/llvm/test/CodeGen/AArch64/complex-int-to-fp.ll
@@ -5,9 +5,12 @@ define void @autogen_SD19655(ptr %addr, ptr %addrfloat) {
; CHECK-LABEL: autogen_SD19655:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
-; CHECK-NEXT: scvtf.2d v0, v0
-; CHECK-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-NEXT: str d0, [x1]
+; CHECK-NEXT: mov.d x8, v0[1]
+; CHECK-NEXT: fmov x9, d0
+; CHECK-NEXT: scvtf s1, x9
+; CHECK-NEXT: scvtf s0, x8
+; CHECK-NEXT: mov.s v1[1], v0[0]
+; CHECK-NEXT: str d1, [x1]
; CHECK-NEXT: ret
%T = load <2 x i64>, ptr %addr
%F = sitofp <2 x i64> %T to <2 x float>
@@ -88,8 +91,12 @@ define <2 x double> @test_unsigned_v2i8_to_v2f64(<2 x i8> %v) nounwind readnone
define <2 x float> @test_signed_v2i64_to_v2f32(<2 x i64> %v) nounwind readnone {
; CHECK-LABEL: test_signed_v2i64_to_v2f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: scvtf.2d v0, v0
-; CHECK-NEXT: fcvtn v0.2s, v0.2d
+; CHECK-NEXT: mov.d x8, v0[1]
+; CHECK-NEXT: fmov x9, d0
+; CHECK-NEXT: scvtf s0, x9
+; CHECK-NEXT: scvtf s1, x8
+; CHECK-NEXT: mov.s v0[1], v1[0]
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%conv = sitofp <2 x i64> %v to <2 x float>
@@ -98,8 +105,12 @@ define <2 x float> @test_signed_v2i64_to_v2f32(<2 x i64> %v) nounwind readnone {
define <2 x float> @test_unsigned_v2i64_to_v2f32(<2 x i64> %v) nounwind readnone {
; CHECK-LABEL: test_unsigned_v2i64_to_v2f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ucvtf.2d v0, v0
-; CHECK-NEXT: fcvtn v0.2s, v0.2d
+; CHECK-NEXT: mov.d x8, v0[1]
+; CHECK-NEXT: fmov x9, d0
+; CHECK-NEXT: ucvtf s0, x9
+; CHECK-NEXT: ucvtf s1, x8
+; CHECK-NEXT: mov.s v0[1], v1[0]
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%conv = uitofp <2 x i64> %v to <2 x float>
diff --git a/llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll b/llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll
index b40c0656a60e4..b65334e2461fd 100644
--- a/llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll
+++ b/llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll
@@ -262,10 +262,13 @@ define <2 x float> @fmul_pow_shl_cnt_vec_fail_expensive_cast(<2 x i64> %cnt) nou
; CHECK-NEON-NEXT: mov w8, #2 // =0x2
; CHECK-NEON-NEXT: dup v1.2d, x8
; CHECK-NEON-NEXT: ushl v0.2d, v1.2d, v0.2d
-; CHECK-NEON-NEXT: fmov v1.2s, #15.00000000
-; CHECK-NEON-NEXT: ucvtf v0.2d, v0.2d
-; CHECK-NEON-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-NEON-NEXT: fmul v0.2s, v0.2s, v1.2s
+; CHECK-NEON-NEXT: mov x8, v0.d[1]
+; CHECK-NEON-NEXT: fmov x9, d0
+; CHECK-NEON-NEXT: ucvtf s1, x9
+; CHECK-NEON-NEXT: ucvtf s0, x8
+; CHECK-NEON-NEXT: mov v1.s[1], v0.s[0]
+; CHECK-NEON-NEXT: fmov v0.2s, #15.00000000
+; CHECK-NEON-NEXT: fmul v0.2s, v1.2s, v0.2s
; CHECK-NEON-NEXT: ret
;
; CHECK-NO-NEON-LABEL: fmul_pow_shl_cnt_vec_fail_expensive_cast:
diff --git a/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll b/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll
index 83e60c1089762..1364c47adff2d 100644
--- a/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll
@@ -193,10 +193,17 @@ define <4 x float> @uitofp_v4f32_v4i32(<4 x i32> %x) #0 {
define <4 x float> @sitofp_v4f32_v4i64(<4 x i64> %x) #0 {
; CHECK-LABEL: sitofp_v4f32_v4i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: scvtf v0.2d, v0.2d
-; CHECK-NEXT: scvtf v1.2d, v1.2d
-; CHECK-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-NEXT: fcvtn2 v0.4s, v1.2d
+; CHECK-NEXT: mov x8, v0.d[1]
+; CHECK-NEXT: fmov x9, d0
+; CHECK-NEXT: scvtf s0, x9
+; CHECK-NEXT: mov x9, v1.d[1]
+; CHECK-NEXT: scvtf s2, x8
+; CHECK-NEXT: fmov x8, d1
+; CHECK-NEXT: scvtf s1, x8
+; CHECK-NEXT: mov v0.s[1], v2.s[0]
+; CHECK-NEXT: mov v0.s[2], v1.s[0]
+; CHECK-NEXT: scvtf s1, x9
+; CHECK-NEXT: mov v0.s[3], v1.s[0]
; CHECK-NEXT: ret
%val = call <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i64(<4 x i64> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
ret <4 x float> %val
@@ -205,10 +212,38 @@ define <4 x float> @sitofp_v4f32_v4i64(<4 x i64> %x) #0 {
define <4 x float> @uitofp_v4f32_v4i64(<4 x i64> %x) #0 {
; CHECK-LABEL: uitofp_v4f32_v4i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: ucvtf v0.2d, v0.2d
-; CHECK-NEXT: ucvtf v1.2d, v1.2d
-; CHECK-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-NEXT: fcvtn2 v0.4s, v1.2d
+; CHECK-NEXT: movi v2.2d, #0x000000ffffffff
+; CHECK-NEXT: ushr v3.2d, v1.2d, #32
+; CHECK-NEXT: ushr v4.2d, v0.2d, #32
+; CHECK-NEXT: mov x8, v3.d[1]
+; CHECK-NEXT: mov x9, v4.d[1]
+; CHECK-NEXT: fmov x10, d3
+; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-NEXT: fmov x11, d4
+; CHECK-NEXT: scvtf s2, x10
+; CHECK-NEXT: mov x10, v1.d[1]
+; CHECK-NEXT: scvtf s3, x8
+; CHECK-NEXT: scvtf s4, x11
+; CHECK-NEXT: mov x8, v0.d[1]
+; CHECK-NEXT: scvtf s5, x9
+; CHECK-NEXT: mov w9, #1333788672 // =0x4f800000
+; CHECK-NEXT: fmov x11, d1
+; CHECK-NEXT: dup v1.2s, w9
+; CHECK-NEXT: fmov x9, d0
+; CHECK-NEXT: scvtf s0, x10
+; CHECK-NEXT: mov v2.s[1], v3.s[0]
+; CHECK-NEXT: scvtf s6, x11
+; CHECK-NEXT: scvtf s3, x8
+; CHECK-NEXT: mov v4.s[1], v5.s[0]
+; CHECK-NEXT: scvtf s5, x9
+; CHECK-NEXT: mov v6.s[1], v0.s[0]
+; CHECK-NEXT: fmul v0.2s, v2.2s, v1.2s
+; CHECK-NEXT: fmul v1.2s, v4.2s, v1.2s
+; CHECK-NEXT: mov v5.s[1], v3.s[0]
+; CHECK-NEXT: fadd v2.2s, v0.2s, v6.2s
+; CHECK-NEXT: fadd v0.2s, v1.2s, v5.2s
+; CHECK-NEXT: mov v0.d[1], v2.d[0]
; CHECK-NEXT: ret
%val = call <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i64(<4 x i64> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
ret <4 x float> %val
diff --git a/llvm/test/CodeGen/AArch64/fprcvt-cvtf.ll b/llvm/test/CodeGen/AArch64/fprcvt-cvtf.ll
index 0a7319b9ce11e..9da6f583cec01 100644
--- a/llvm/test/CodeGen/AArch64/fprcvt-cvtf.ll
+++ b/llvm/test/CodeGen/AArch64/fprcvt-cvtf.ll
@@ -210,15 +210,20 @@ define <1 x float> @scvtf_f32i64_simple(<1 x i64> %x) {
; CHECK-LABEL: scvtf_f32i64_simple:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: scvtf v0.2d, v0.2d
-; CHECK-NEXT: fcvtn v0.2s, v0.2d
+; CHECK-NEXT: movi d1, #0000000000000000
+; CHECK-NEXT: scvtf s0, d0
+; CHECK-NEXT: mov v1.s[0], v0.s[0]
+; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
;
; CHECK-NO-FPRCVT-LABEL: scvtf_f32i64_simple:
; CHECK-NO-FPRCVT: // %bb.0:
; CHECK-NO-FPRCVT-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NO-FPRCVT-NEXT: scvtf v0.2d, v0.2d
-; CHECK-NO-FPRCVT-NEXT: fcvtn v0.2s, v0.2d
+; CHECK-NO-FPRCVT-NEXT: fmov x8, d0
+; CHECK-NO-FPRCVT-NEXT: movi d1, #0000000000000000
+; CHECK-NO-FPRCVT-NEXT: scvtf s0, x8
+; CHECK-NO-FPRCVT-NEXT: mov v1.s[0], v0.s[0]
+; CHECK-NO-FPRCVT-NEXT: fmov d0, d1
; CHECK-NO-FPRCVT-NEXT: ret
%conv = sitofp <1 x i64> %x to <1 x float>
ret <1 x float> %conv
@@ -426,15 +431,20 @@ define <1 x float> @ucvtf_f32i64_simple(<1 x i64> %x) {
; CHECK-LABEL: ucvtf_f32i64_simple:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: ucvtf v0.2d, v0.2d
-; CHECK-NEXT: fcvtn v0.2s, v0.2d
+; CHECK-NEXT: movi d1, #0000000000000000
+; CHECK-NEXT: ucvtf s0, d0
+; CHECK-NEXT: mov v1.s[0], v0.s[0]
+; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
;
; CHECK-NO-FPRCVT-LABEL: ucvtf_f32i64_simple:
; CHECK-NO-FPRCVT: // %bb.0:
; CHECK-NO-FPRCVT-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NO-FPRCVT-NEXT: ucvtf v0.2d, v0.2d
-; CHECK-NO-FPRCVT-NEXT: fcvtn v0.2s, v0.2d
+; CHECK-NO-FPRCVT-NEXT: fmov x8, d0
+; CHECK-NO-FPRCVT-NEXT: movi d1, #0000000000000000
+; CHECK-NO-FPRCVT-NEXT: ucvtf s0, x8
+; CHECK-NO-FPRCVT-NEXT: mov v1.s[0], v0.s[0]
+; CHECK-NO-FPRCVT-NEXT: fmov d0, d1
; CHECK-NO-FPRCVT-NEXT: ret
%conv = uitofp <1 x i64> %x to <1 x float>
ret <1 x float> %conv
diff --git a/llvm/test/CodeGen/AArch64/itofp-bf16.ll b/llvm/test/CodeGen/AArch64/itofp-bf16.ll
index 58591b11c184f..42641693c4081 100644
--- a/llvm/test/CodeGen/AArch64/itofp-bf16.ll
+++ b/llvm/test/CodeGen/AArch64/itofp-bf16.ll
@@ -349,22 +349,27 @@ define <3 x bfloat> @stofp_v3i64_v3bf16(<3 x i64> %a) {
; CHECK-LABEL: stofp_v3i64_v3bf16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: fmov x8, d0
+; CHECK-NEXT: movi v3.2d, #0000000000000000
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-NEXT: scvtf v1.2d, v2.2d
-; CHECK-NEXT: movi v2.4s, #127, msl #8
-; CHECK-NEXT: scvtf v0.2d, v0.2d
-; CHECK-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-NEXT: fcvtn2 v0.4s, v1.2d
-; CHECK-NEXT: movi v1.4s, #1
-; CHECK-NEXT: ushr v3.4s, v0.4s, #16
-; CHECK-NEXT: add v2.4s, v0.4s, v2.4s
-; CHECK-NEXT: and v1.16b, v3.16b, v1.16b
-; CHECK-NEXT: fcmeq v3.4s, v0.4s, v0.4s
-; CHECK-NEXT: orr v0.4s, #64, lsl #16
-; CHECK-NEXT: add v1.4s, v1.4s, v2.4s
-; CHECK-NEXT: bit v0.16b, v1.16b, v3.16b
+; CHECK-NEXT: scvtf s0, x8
+; CHECK-NEXT: fmov x8, d1
+; CHECK-NEXT: scvtf s1, x8
+; CHECK-NEXT: fmov x8, d2
+; CHECK-NEXT: mov v3.s[0], v0.s[0]
+; CHECK-NEXT: scvtf s0, x8
+; CHECK-NEXT: mov v3.s[1], v1.s[0]
+; CHECK-NEXT: movi v1.4s, #127, msl #8
+; CHECK-NEXT: mov v3.s[2], v0.s[0]
+; CHECK-NEXT: movi v0.4s, #1
+; CHECK-NEXT: ushr v2.4s, v3.4s, #16
+; CHECK-NEXT: add v1.4s, v3.4s, v1.4s
+; CHECK-NEXT: and v0.16b, v2.16b, v0.16b
+; CHECK-NEXT: fcmeq v2.4s, v3.4s, v3.4s
+; CHECK-NEXT: orr v3.4s, #64, lsl #16
+; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: bif v0.16b, v3.16b, v2.16b
; CHECK-NEXT: shrn v0.4h, v0.4s, #16
; CHECK-NEXT: ret
entry:
@@ -376,22 +381,27 @@ define <3 x bfloat> @utofp_v3i64_v3bf16(<3 x i64> %a) {
; CHECK-LABEL: utofp_v3i64_v3bf16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: fmov x8, d0
+; CHECK-NEXT: movi v3.2d, #0000000000000000
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-NEXT: ucvtf v1.2d, v2.2d
-; CHECK-NEXT: movi v2.4s, #127, msl #8
-; CHECK-NEXT: ucvtf v0.2d, v0.2d
-; CHECK-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-NEXT: fcvtn2 v0.4s, v1.2d
-; CHECK-NEXT: movi v1.4s, #1
-; CHECK-NEXT: ushr v3.4s, v0.4s, #16
-; CHECK-NEXT: add v2.4s, v0.4s, v2.4s
-; CHECK-NEXT: and v1.16b, v3.16b, v1.16b
-; CHECK-NEXT: fcmeq v3.4s, v0.4s, v0.4s
-; CHECK-NEXT: orr v0.4s, #64, lsl #16
-; CHECK-NEXT: add v1.4s, v1.4s, v2.4s
-; CHECK-NEXT: bit v0.16b, v1.16b, v3.16b
+; CHECK-NEXT: ucvtf s0, x8
+; CHECK-NEXT: fmov x8, d1
+; CHECK-NEXT: ucvtf s1, x8
+; CHECK-NEXT: fmov x8, d2
+; CHECK-NEXT: mov v3.s[0], v0.s[0]
+; CHECK-NEXT: ucvtf s0, x8
+; CHECK-NEXT: mov v3.s[1], v1.s[0]
+; CHECK-NEXT: movi v1.4s, #127, msl #8
+; CHECK-NEXT: mov v3.s[2], v0.s[0]
+; CHECK-NEXT: movi v0.4s, #1
+; CHECK-NEXT: ushr v2.4s, v3.4s, #16
+; CHECK-NEXT: add v1.4s, v3.4s, v1.4s
+; CHECK-NEXT: and v0.16b, v2.16b, v0.16b
+; CHECK-NEXT: fcmeq v2.4s, v3.4s, v3.4s
+; CHECK-NEXT: orr v3.4s, #64, lsl #16
+; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: bif v0.16b, v3.16b, v2.16b
; CHECK-NEXT: shrn v0.4h, v0.4s, #16
; CHECK-NEXT: ret
entry:
@@ -402,19 +412,26 @@ entry:
define <4 x bfloat> @stofp_v4i64_v4bf16(<4 x i64> %a) {
; CHECK-LABEL: stofp_v4i64_v4bf16:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: scvtf v0.2d, v0.2d
-; CHECK-NEXT: scvtf v1.2d, v1.2d
-; CHECK-NEXT: movi v2.4s, #127, msl #8
-; CHECK-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-NEXT: fcvtn2 v0.4s, v1.2d
-; CHECK-NEXT: movi v1.4s, #1
-; CHECK-NEXT: ushr v3.4s, v0.4s, #16
-; CHECK-NEXT: add v2.4s, v0.4s, v2.4s
-; CHECK-NEXT: and v1.16b, v3.16b, v1.16b
-; CHECK-NEXT: fcmeq v3.4s, v0.4s, v0.4s
-; CHECK-NEXT: orr v0.4s, #64, lsl #16
-; CHECK-NEXT: add v1.4s, v1.4s, v2.4s
-; CHECK-NEXT: bit v0.16b, v1.16b, v3.16b
+; CHECK-NEXT: mov x8, v0.d[1]
+; CHECK-NEXT: fmov x9, d0
+; CHECK-NEXT: scvtf s2, x9
+; CHECK-NEXT: mov x9, v1.d[1]
+; CHECK-NEXT: scvtf s0, x8
+; CHECK-NEXT: fmov x8, d1
+; CHECK-NEXT: scvtf s1, x8
+; CHECK-NEXT: mov v2.s[1], v0.s[0]
+; CHECK-NEXT: scvtf s0, x9
+; CHECK-NEXT: mov v2.s[2], v1.s[0]
+; CHECK-NEXT: movi v1.4s, #127, msl #8
+; CHECK-NEXT: mov v2.s[3], v0.s[0]
+; CHECK-NEXT: movi v0.4s, #1
+; CHECK-NEXT: ushr v3.4s, v2.4s, #16
+; CHECK-NEXT: add v1.4s, v2.4s, v1.4s
+; CHECK-NEXT: and v0.16b, v3.16b, v0.16b
+; CHECK-NEXT: fcmeq v3.4s, v2.4s, v2.4s
+; CHECK-NEXT: orr v2.4s, #64, lsl #16
+; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: bif v0.16b, v2.16b, v3.16b
; CHECK-NEXT: shrn v0.4h, v0.4s, #16
; CHECK-NEXT: ret
entry:
@@ -425,19 +442,26 @@ entry:
define <4 x bfloat> @utofp_v4i64_v4bf16(<4 x i64> %a) {
; CHECK-LABEL: utofp_v4i64_v4bf16:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ucvtf v0.2d, v0.2d
-; CHECK-NEXT: ucvtf v1.2d, v1.2d
-; CHECK-NEXT: movi v2.4s, #127, msl #8
-; CHECK-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-NEXT: fcvtn2 v0.4s, v1.2d
-; CHECK-NEXT: movi v1.4s, #1
-; CHECK-NEXT: ushr v3.4s, v0.4s, #16
-; CHECK-NEXT: add v2.4s, v0.4s, v2.4s
-; CHECK-NEXT: and v1.16b, v3.16b, v1.16b
-; CHECK-NEXT: fcmeq v3.4s, v0.4s, v0.4s
-; CHECK-NEXT: orr v0.4s, #64, lsl #16
-; CHECK-NEXT: add v1.4s, v1.4s, v2.4s
-; CHECK-NEXT: bit v0.16b, v1.16b, v3.16b
+; CHECK-NEXT: mov x8, v0.d[1]
+; CHECK-NEXT: fmov x9, d0
+; CHECK-NEXT: ucvtf s2, x9
+; CHECK-NEXT: mov x9, v1.d[1]
+; CHECK-NEXT: ucvtf s0, x8
+; CHECK-NEXT: fmov x8, d1
+; CHECK-NEXT: ucvtf s1, x8
+; CHECK-NEXT: mov v2.s[1], v0.s[0]
+; CHECK-NEXT: ucvtf s0, x9
+; CHECK-NEXT: mov v2.s[2], v1.s[0]
+; CHECK-NEXT: movi v1.4s, #127, msl #8
+; CHECK-NEXT: mov v2.s[3], v0.s[0]
+; CHECK-NEXT: movi v0.4s, #1
+; CHECK-NEXT: ushr v3.4s, v2.4s, #16
+; CHECK-NEXT: add v1.4s, v2.4s, v1.4s
+; CHECK-NEXT: and v0.16b, v3.16b, v0.16b
+; CHECK-NEXT: fcmeq v3.4s, v2.4s, v2.4s
+; CHECK-NEXT: orr v2.4s, #64, lsl #16
+; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: bif v0.16b, v2.16b, v3.16b
; CHECK-NEXT: shrn v0.4h, v0.4s, #16
; CHECK-NEXT: ret
entry:
@@ -448,31 +472,46 @@ entry:
define <8 x bfloat> @stofp_v8i64_v8bf16(<8 x i64> %a) {
; CHECK-LABEL: stofp_v8i64_v8bf16:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: scvtf v2.2d, v2.2d
-; CHECK-NEXT: scvtf v0.2d, v0.2d
-; CHECK-NEXT: scvtf v3.2d, v3.2d
-; CHECK-NEXT: scvtf v1.2d, v1.2d
-; CHECK-NEXT: fcvtn v2.2s, v2.2d
-; CHECK-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-NEXT: fcvtn2 v2.4s, v3.2d
-; CHECK-NEXT: fcvtn2 v0.4s, v1.2d
-; CHECK-NEXT: movi v1.4s, #1
-; CHECK-NEXT: movi v3.4s, #127, msl #8
-; CHECK-NEXT: ushr v4.4s, v2.4s, #16
-; CHECK-NEXT: ushr v5.4s, v0.4s, #16
-; CHECK-NEXT: add v6.4s, v2.4s, v3.4s
-; CHECK-NEXT: add v3.4s, v0.4s, v3.4s
-; CHECK-NEXT: and v4.16b, v4.16b, v1.16b
-; CHECK-NEXT: and v1.16b, v5.16b, v1.16b
+; CHECK-NEXT: fmov x10, d2
+; CHECK-NEXT: mov x8, v2.d[1]
+; CHECK-NEXT: mov x9, v0.d[1]
+; CHECK-NEXT: scvtf s2, x10
+; CHECK-NEXT: fmov x10, d0
+; CHECK-NEXT: scvtf s0, x8
+; CHECK-NEXT: scvtf s5, x9
+; CHECK-NEXT: fmov x9, d3
+; CHECK-NEXT: mov x8, v3.d[1]
+; CHECK-NEXT: scvtf s4, x10
+; CHECK-NEXT: fmov x10, d1
+; CHECK-NEXT: scvtf s3, x9
+; CHECK-NEXT: mov x9, v1.d[1]
+; CHECK-NEXT: mov v2.s[1], v0.s[0]
+; CHECK-NEXT: scvtf s0, x10
+; CHECK-NEXT: scvtf s1, x8
+; CHECK-NEXT: mov v4.s[1], v5.s[0]
+; CHECK-NEXT: mov v2.s[2], v3.s[0]
+; CHECK-NEXT: scvtf s3, x9
+; CHECK-NEXT: mov v4.s[2], v0.s[0]
+; CHECK-NEXT: movi v0.4s, #1
+; CHECK-NEXT: mov v2.s[3], v1.s[0]
+; CHECK-NEXT: movi v1.4s, #127, msl #8
+; CHECK-NEXT: mov v4.s[3], v3.s[0]
+; CHECK-NEXT: ushr v3.4s, v2.4s, #16
+; CHECK-NEXT: add v6.4s, v2.4s, v1.4s
+; CHECK-NEXT: ushr v5.4s, v4.4s, #16
+; CHECK-NEXT: add v1.4s, v4.4s, v1.4s
+; CHECK-NEXT: and v3.16b, v3.16b, v0.16b
+; CHECK-NEXT: and v0.16b, v5.16b, v0.16b
; CHECK-NEXT: fcmeq v5.4s, v2.4s, v2.4s
; CHECK-NEXT: orr v2.4s, #64, lsl #16
-; CHECK-NEXT: add v4.4s, v4.4s, v6.4s
-; CHECK-NEXT: fcmeq v6.4s, v0.4s, v0.4s
-; CHECK-NEXT: add v1.4s, v1.4s, v3.4s
-; CHECK-NEXT: orr v0.4s, #64, lsl #16
-; CHECK-NEXT: bit v2.16b, v4.16b, v5.16b
-; CHECK-NEXT: bit v0.16b, v1.16b, v6.16b
-; CHECK-NEXT: uzp2 v0.8h, v0.8h, v2.8h
+; CHECK-NEXT: add v3.4s, v3.4s, v6.4s
+; CHECK-NEXT: fcmeq v6.4s, v4.4s, v4.4s
+; CHECK-NEXT: orr v4.4s, #64, lsl #16
+; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: mov v1.16b, v5.16b
+; CHECK-NEXT: bif v0.16b, v4.16b, v6.16b
+; CHECK-NEXT: bsl v1.16b, v3.16b, v2.16b
+; CHECK-NEXT: uzp2 v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
entry:
%c = sitofp <8 x i64> %a to <8 x bfloat>
@@ -482,31 +521,46 @@ entry:
define <8 x bfloat> @utofp_v8i64_v8bf16(<8 x i64> %a) {
; CHECK-LABEL: utofp_v8i64_v8bf16:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ucvtf v2.2d, v2.2d
-; CHECK-NEXT: ucvtf v0.2d, v0.2d
-; CHECK-NEXT: ucvtf v3.2d, v3.2d
-; CHECK-NEXT: ucvtf v1.2d, v1.2d
-; CHECK-NEXT: fcvtn v2.2s, v2.2d
-; CHECK-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-NEXT: fcvtn2 v2.4s, v3.2d
-; CHECK-NEXT: fcvtn2 v0.4s, v1.2d
-; CHECK-NEXT: movi v1.4s, #1
-; CHECK-NEXT: movi v3.4s, #127, msl #8
-; CHECK-NEXT: ushr v4.4s, v2.4s, #16
-; CHECK-NEXT: ushr v5.4s, v0.4s, #16
-; CHECK-NEXT: add v6.4s, v2.4s, v3.4s
-; CHECK-NEXT: add v3.4s, v0.4s, v3.4s
-; CHECK-NEXT: and v4.16b, v4.16b, v1.16b
-; CHECK-NEXT: and v1.16b, v5.16b, v1.16b
+; CHECK-NEXT: fmov x10, d2
+; CHECK-NEXT: mov x8, v2.d[1]
+; CHECK-NEXT: mov x9, v0.d[1]
+; CHECK-NEXT: ucvtf s2, x10
+; CHECK-NEXT: fmov x10, d0
+; CHECK-NEXT: ucvtf s0, x8
+; CHECK-NEXT: ucvtf s5, x9
+; CHECK-NEXT: fmov x9, d3
+; CHECK-NEXT: mov x8, v3.d[1]
+; CHECK-NEXT: ucvtf s4, x10
+; CHECK-NEXT: fmov x10, d1
+; CHECK-NEXT: ucvtf s3, x9
+; CHECK-NEXT: mov x9, v1.d[1]
+; CHECK-NEXT: mov v2.s[1], v0.s[0]
+; CHECK-NEXT: ucvtf s0, x10
+; CHECK-NEXT: ucvtf s1, x8
+; CHECK-NEXT: mov v4.s[1], v5.s[0]
+; CHECK-NEXT: mov v2.s[2], v3.s[0]
+; CHECK-NEXT: ucvtf s3, x9
+; CHECK-NEXT: mov v4.s[2], v0.s[0]
+; CHECK-NEXT: movi v0.4s, #1
+; CHECK-NEXT: mov v2.s[3], v1.s[0]
+; CHECK-NEXT: movi v1.4s, #127, msl #8
+; CHECK-NEXT: mov v4.s[3], v3.s[0]
+; CHECK-NEXT: ushr v3.4s, v2.4s, #16
+; CHECK-NEXT: add v6.4s, v2.4s, v1.4s
+; CHECK-NEXT: ushr v5.4s, v4.4s, #16
+; CHECK-NEXT: add v1.4s, v4.4s, v1.4s
+; CHECK-NEXT: and v3.16b, v3.16b, v0.16b
+; CHECK-NEXT: and v0.16b, v5.16b, v0.16b
; CHECK-NEXT: fcmeq v5.4s, v2.4s, v2.4s
; CHECK-NEXT: orr v2.4s, #64, lsl #16
-; CHECK-NEXT: add v4.4s, v4.4s, v6.4s
-; CHECK-NEXT: fcmeq v6.4s, v0.4s, v0.4s
-; CHECK-NEXT: add v1.4s, v1.4s, v3.4s
-; CHECK-NEXT: orr v0.4s, #64, lsl #16
-; CHECK-NEXT: bit v2.16b, v4.16b, v5.16b
-; CHECK-NEXT: bit v0.16b, v1.16b, v6.16b
-; CHECK-NEXT: uzp2 v0.8h, v0.8h, v2.8h
+; CHECK-NEXT: add v3.4s, v3.4s, v6.4s
+; CHECK-NEXT: fcmeq v6.4s, v4.4s, v4.4s
+; CHECK-NEXT: orr v4.4s, #64, lsl #16
+; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: mov v1.16b, v5.16b
+; CHECK-NEXT: bif v0.16b, v4.16b, v6.16b
+; CHECK-NEXT: bsl v1.16b, v3.16b, v2.16b
+; CHECK-NEXT: uzp2 v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
entry:
%c = uitofp <8 x i64> %a to <8 x bfloat>
@@ -516,55 +570,82 @@ entry:
define <16 x bfloat> @stofp_v16i64_v16bf16(<16 x i64> %a) {
; CHECK-LABEL: stofp_v16i64_v16bf16:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: scvtf v0.2d, v0.2d
-; CHECK-NEXT: scvtf v2.2d, v2.2d
-; CHECK-NEXT: scvtf v6.2d, v6.2d
-; CHECK-NEXT: scvtf v4.2d, v4.2d
-; CHECK-NEXT: scvtf v1.2d, v1.2d
-; CHECK-NEXT: scvtf v3.2d, v3.2d
-; CHECK-NEXT: scvtf v7.2d, v7.2d
-; CHECK-NEXT: scvtf v5.2d, v5.2d
-; CHECK-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-NEXT: fcvtn v2.2s, v2.2d
-; CHECK-NEXT: fcvtn v6.2s, v6.2d
-; CHECK-NEXT: fcvtn v4.2s, v4.2d
-; CHECK-NEXT: fcvtn2 v0.4s, v1.2d
-; CHECK-NEXT: fcvtn2 v2.4s, v3.2d
-; CHECK-NEXT: fcvtn2 v6.4s, v7.2d
-; CHECK-NEXT: fcvtn2 v4.4s, v5.2d
+; CHECK-NEXT: mov x8, v2.d[1]
+; CHECK-NEXT: fmov x11, d0
+; CHECK-NEXT: mov x9, v0.d[1]
+; CHECK-NEXT: fmov x10, d2
+; CHECK-NEXT: mov x12, v6.d[1]
+; CHECK-NEXT: scvtf s2, x11
+; CHECK-NEXT: fmov x11, d3
+; CHECK-NEXT: scvtf s16, x8
+; CHECK-NEXT: fmov x8, d6
+; CHECK-NEXT: scvtf s0, x10
+; CHECK-NEXT: mov x10, v4.d[1]
+; CHECK-NEXT: scvtf s17, x9
+; CHECK-NEXT: mov x9, v3.d[1]
+; CHECK-NEXT: scvtf s6, x12
+; CHECK-NEXT: fmov x12, d4
+; CHECK-NEXT: scvtf s4, x11
+; CHECK-NEXT: scvtf s3, x8
+; CHECK-NEXT: fmov x11, d7
+; CHECK-NEXT: mov x8, v1.d[1]
+; CHECK-NEXT: mov v0.s[1], v16.s[0]
+; CHECK-NEXT: scvtf s18, x10
+; CHECK-NEXT: scvtf s19, x12
+; CHECK-NEXT: fmov x10, d1
+; CHECK-NEXT: mov v2.s[1], v17.s[0]
+; CHECK-NEXT: mov x12, v5.d[1]
+; CHECK-NEXT: mov v3.s[1], v6.s[0]
+; CHECK-NEXT: scvtf s6, x11
+; CHECK-NEXT: fmov x11, d5
+; CHECK-NEXT: scvtf s1, x10
+; CHECK-NEXT: mov x10, v7.d[1]
+; CHECK-NEXT: scvtf s7, x9
+; CHECK-NEXT: mov v19.s[1], v18.s[0]
+; CHECK-NEXT: scvtf s16, x8
+; CHECK-NEXT: mov v0.s[2], v4.s[0]
+; CHECK-NEXT: scvtf s5, x11
+; CHECK-NEXT: mov v3.s[2], v6.s[0]
+; CHECK-NEXT: scvtf s4, x10
+; CHECK-NEXT: mov v2.s[2], v1.s[0]
+; CHECK-NEXT: scvtf s1, x12
+; CHECK-NEXT: mov v0.s[3], v7.s[0]
+; CHECK-NEXT: mov v19.s[2], v5.s[0]
+; CHECK-NEXT: mov v2.s[3], v16.s[0]
+; CHECK-NEXT: mov v3.s[3], v4.s[0]
+; CHECK-NEXT: movi v4.4s, #127, msl #8
+; CHECK-NEXT: ushr v5.4s, v0.4s, #16
+; CHECK-NEXT: mov v19.s[3], v1.s[0]
; CHECK-NEXT: movi v1.4s, #1
-; CHECK-NEXT: movi v3.4s, #127, msl #8
-; CHECK-NEXT: ushr v7.4s, v0.4s, #16
-; CHECK-NEXT: ushr v5.4s, v2.4s, #16
-; CHECK-NEXT: ushr v16.4s, v6.4s, #16
-; CHECK-NEXT: ushr v17.4s, v4.4s, #16
-; CHECK-NEXT: add v19.4s, v0.4s, v3.4s
-; CHECK-NEXT: add v18.4s, v2.4s, v3.4s
-; CHECK-NEXT: add v20.4s, v6.4s, v3.4s
-; CHECK-NEXT: add v3.4s, v4.4s, v3.4s
-; CHECK-NEXT: and v7.16b, v7.16b, v1.16b
+; CHECK-NEXT: ushr v6.4s, v2.4s, #16
+; CHECK-NEXT: ushr v7.4s, v3.4s, #16
+; CHECK-NEXT: add v17.4s, v0.4s, v4.4s
+; CHECK-NEXT: add v18.4s, v2.4s, v4.4s
+; CHECK-NEXT: add v20.4s, v3.4s, v4.4s
+; CHECK-NEXT: ushr v16.4s, v19.4s, #16
; CHECK-NEXT: and v5.16b, v5.16b, v1.16b
-; CHECK-NEXT: and v16.16b, v16.16b, v1.16b
-; CHECK-NEXT: and v1.16b, v17.16b, v1.16b
+; CHECK-NEXT: add v4.4s, v19.4s, v4.4s
+; CHECK-NEXT: and v6.16b, v6.16b, v1.16b
+; CHECK-NEXT: and v7.16b, v7.16b, v1.16b
+; CHECK-NEXT: and v1.16b, v16.16b, v1.16b
+; CHECK-NEXT: add v5.4s, v5.4s, v17.4s
+; CHECK-NEXT: fcmeq v16.4s, v0.4s, v0.4s
+; CHECK-NEXT: add v6.4s, v6.4s, v18.4s
; CHECK-NEXT: fcmeq v17.4s, v2.4s, v2.4s
-; CHECK-NEXT: orr v2.4s, #64, lsl #16
-; CHECK-NEXT: add v7.4s, v7.4s, v19.4s
-; CHECK-NEXT: fcmeq v19.4s, v6.4s, v6.4s
-; CHECK-NEXT: add v5.4s, v5.4s, v18.4s
-; CHECK-NEXT: fcmeq v18.4s, v0.4s, v0.4s
-; CHECK-NEXT: add v1.4s, v1.4s, v3.4s
-; CHECK-NEXT: fcmeq v3.4s, v4.4s, v4.4s
-; CHECK-NEXT: add v16.4s, v16.4s, v20.4s
+; CHECK-NEXT: fcmeq v18.4s, v3.4s, v3.4s
; CHECK-NEXT: orr v0.4s, #64, lsl #16
-; CHECK-NEXT: orr v6.4s, #64, lsl #16
-; CHECK-NEXT: orr v4.4s, #64, lsl #16
-; CHECK-NEXT: bit v2.16b, v5.16b, v17.16b
-; CHECK-NEXT: mov v5.16b, v19.16b
-; CHECK-NEXT: bit v0.16b, v7.16b, v18.16b
-; CHECK-NEXT: bif v1.16b, v4.16b, v3.16b
-; CHECK-NEXT: bsl v5.16b, v16.16b, v6.16b
-; CHECK-NEXT: uzp2 v0.8h, v0.8h, v2.8h
-; CHECK-NEXT: uzp2 v1.8h, v1.8h, v5.8h
+; CHECK-NEXT: add v7.4s, v7.4s, v20.4s
+; CHECK-NEXT: orr v2.4s, #64, lsl #16
+; CHECK-NEXT: add v1.4s, v1.4s, v4.4s
+; CHECK-NEXT: fcmeq v4.4s, v19.4s, v19.4s
+; CHECK-NEXT: orr v3.4s, #64, lsl #16
+; CHECK-NEXT: orr v19.4s, #64, lsl #16
+; CHECK-NEXT: bit v0.16b, v5.16b, v16.16b
+; CHECK-NEXT: bit v2.16b, v6.16b, v17.16b
+; CHECK-NEXT: bit v3.16b, v7.16b, v18.16b
+; CHECK-NEXT: bif v1.16b, v19.16b, v4.16b
+; CHECK-NEXT: uzp2 v0.8h, v2.8h, v0.8h
+; CHECK-NEXT: uzp2 v1.8h, v1.8h, v3.8h
; CHECK-NEXT: ret
entry:
%c = sitofp <16 x i64> %a to <16 x bfloat>
@@ -574,55 +655,82 @@ entry:
define <16 x bfloat> @utofp_v16i64_v16bf16(<16 x i64> %a) {
; CHECK-LABEL: utofp_v16i64_v16bf16:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ucvtf v0.2d, v0.2d
-; CHECK-NEXT: ucvtf v2.2d, v2.2d
-; CHECK-NEXT: ucvtf v6.2d, v6.2d
-; CHECK-NEXT: ucvtf v4.2d, v4.2d
-; CHECK-NEXT: ucvtf v1.2d, v1.2d
-; CHECK-NEXT: ucvtf v3.2d, v3.2d
-; CHECK-NEXT: ucvtf v7.2d, v7.2d
-; CHECK-NEXT: ucvtf v5.2d, v5.2d
-; CHECK-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-NEXT: fcvtn v2.2s, v2.2d
-; CHECK-NEXT: fcvtn v6.2s, v6.2d
-; CHECK-NEXT: fcvtn v4.2s, v4.2d
-; CHECK-NEXT: fcvtn2 v0.4s, v1.2d
-; CHECK-NEXT: fcvtn2 v2.4s, v3.2d
-; CHECK-NEXT: fcvtn2 v6.4s, v7.2d
-; CHECK-NEXT: fcvtn2 v4.4s, v5.2d
+; CHECK-NEXT: mov x8, v2.d[1]
+; CHECK-NEXT: fmov x11, d0
+; CHECK-NEXT: mov x9, v0.d[1]
+; CHECK-NEXT: fmov x10, d2
+; CHECK-NEXT: mov x12, v6.d[1]
+; CHECK-NEXT: ucvtf s2, x11
+; CHECK-NEXT: fmov x11, d3
+; CHECK-NEXT: ucvtf s16, x8
+; CHECK-NEXT: fmov x8, d6
+; CHECK-NEXT: ucvtf s0, x10
+; CHECK-NEXT: mov x10, v4.d[1]
+; CHECK-NEXT: ucvtf s17, x9
+; CHECK-NEXT: mov x9, v3.d[1]
+; CHECK-NEXT: ucvtf s6, x12
+; CHECK-NEXT: fmov x12, d4
+; CHECK-NEXT: ucvtf s4, x11
+; CHECK-NEXT: ucvtf s3, x8
+; CHECK-NEXT: fmov x11, d7
+; CHECK-NEXT: mov x8, v1.d[1]
+; CHECK-NEXT: mov v0.s[1], v16.s[0]
+; CHECK-NEXT: ucvtf s18, x10
+; CHECK-NEXT: ucvtf s19, x12
+; CHECK-NEXT: fmov x10, d1
+; CHECK-NEXT: mov v2.s[1], v17.s[0]
+; CHECK-NEXT: mov x12, v5.d[1]
+; CHECK-NEXT: mov v3.s[1], v6.s[0]
+; CHECK-NEXT: ucvtf s6, x11
+; CHECK-NEXT: fmov x11, d5
+; CHECK-NEXT: ucvtf s1, x10
+; CHECK-NEXT: mov x10, v7.d[1]
+; CHECK-NEXT: ucvtf s7, x9
+; CHECK-NEXT: mov v19.s[1], v18.s[0]
+; CHECK-NEXT: ucvtf s16, x8
+; CHECK-NEXT: mov v0.s[2], v4.s[0]
+; CHECK-NEXT: ucvtf s5, x11
+; CHECK-NEXT: mov v3.s[2], v6.s[0]
+; CHECK-NEXT: ucvtf s4, x10
+; CHECK-NEXT: mov v2.s[2], v1.s[0]
+; CHECK-NEXT: ucvtf s1, x12
+; CHECK-NEXT: mov v0.s[3], v7.s[0]
+; CHECK-NEXT: mov v19.s[2], v5.s[0]
+; CHECK-NEXT: mov v2.s[3], v16.s[0]
+; CHECK-NEXT: mov v3.s[3], v4.s[0]
+; CHECK-NEXT: movi v4.4s, #127, msl #8
+; CHECK-NEXT: ushr v5.4s, v0.4s, #16
+; CHECK-NEXT: mov v19.s[3], v1.s[0]
; CHECK-NEXT: movi v1.4s, #1
-; CHECK-NEXT: movi v3.4s, #127, msl #8
-; CHECK-NEXT: ushr v7.4s, v0.4s, #16
-; CHECK-NEXT: ushr v5.4s, v2.4s, #16
-; CHECK-NEXT: ushr v16.4s, v6.4s, #16
-; CHECK-NEXT: ushr v17.4s, v4.4s, #16
-; CHECK-NEXT: add v19.4s, v0.4s, v3.4s
-; CHECK-NEXT: add v18.4s, v2.4s, v3.4s
-; CHECK-NEXT: add v20.4s, v6.4s, v3.4s
-; CHECK-NEXT: add v3.4s, v4.4s, v3.4s
-; CHECK-NEXT: and v7.16b, v7.16b, v1.16b
+; CHECK-NEXT: ushr v6.4s, v2.4s, #16
+; CHECK-NEXT: ushr v7.4s, v3.4s, #16
+; CHECK-NEXT: add v17.4s, v0.4s, v4.4s
+; CHECK-NEXT: add v18.4s, v2.4s, v4.4s
+; CHECK-NEXT: add v20.4s, v3.4s, v4.4s
+; CHECK-NEXT: ushr v16.4s, v19.4s, #16
; CHECK-NEXT: and v5.16b, v5.16b, v1.16b
-; CHECK-NEXT: and v16.16b, v16.16b, v1.16b
-; CHECK-NEXT: and v1.16b, v17.16b, v1.16b
+; CHECK-NEXT: add v4.4s, v19.4s, v4.4s
+; CHECK-NEXT: and v6.16b, v6.16b, v1.16b
+; CHECK-NEXT: and v7.16b, v7.16b, v1.16b
+; CHECK-NEXT: and v1.16b, v16.16b, v1.16b
+; CHECK-NEXT: add v5.4s, v5.4s, v17.4s
+; CHECK-NEXT: fcmeq v16.4s, v0.4s, v0.4s
+; CHECK-NEXT: add v6.4s, v6.4s, v18.4s
; CHECK-NEXT: fcmeq v17.4s, v2.4s, v2.4s
-; CHECK-NEXT: orr v2.4s, #64, lsl #16
-; CHECK-NEXT: add v7.4s, v7.4s, v19.4s
-; CHECK-NEXT: fcmeq v19.4s, v6.4s, v6.4s
-; CHECK-NEXT: add v5.4s, v5.4s, v18.4s
-; CHECK-NEXT: fcmeq v18.4s, v0.4s, v0.4s
-; CHECK-NEXT: add v1.4s, v1.4s, v3.4s
-; CHECK-NEXT: fcmeq v3.4s, v4.4s, v4.4s
-; CHECK-NEXT: add v16.4s, v16.4s, v20.4s
+; CHECK-NEXT: fcmeq v18.4s, v3.4s, v3.4s
; CHECK-NEXT: orr v0.4s, #64, lsl #16
-; CHECK-NEXT: orr v6.4s, #64, lsl #16
-; CHECK-NEXT: orr v4.4s, #64, lsl #16
-; CHECK-NEXT: bit v2.16b, v5.16b, v17.16b
-; CHECK-NEXT: mov v5.16b, v19.16b
-; CHECK-NEXT: bit v0.16b, v7.16b, v18.16b
-; CHECK-NEXT: bif v1.16b, v4.16b, v3.16b
-; CHECK-NEXT: bsl v5.16b, v16.16b, v6.16b
-; CHECK-NEXT: uzp2 v0.8h, v0.8h, v2.8h
-; CHECK-NEXT: uzp2 v1.8h, v1.8h, v5.8h
+; CHECK-NEXT: add v7.4s, v7.4s, v20.4s
+; CHECK-NEXT: orr v2.4s, #64, lsl #16
+; CHECK-NEXT: add v1.4s, v1.4s, v4.4s
+; CHECK-NEXT: fcmeq v4.4s, v19.4s, v19.4s
+; CHECK-NEXT: orr v3.4s, #64, lsl #16
+; CHECK-NEXT: orr v19.4s, #64, lsl #16
+; CHECK-NEXT: bit v0.16b, v5.16b, v16.16b
+; CHECK-NEXT: bit v2.16b, v6.16b, v17.16b
+; CHECK-NEXT: bit v3.16b, v7.16b, v18.16b
+; CHECK-NEXT: bif v1.16b, v19.16b, v4.16b
+; CHECK-NEXT: uzp2 v0.8h, v2.8h, v0.8h
+; CHECK-NEXT: uzp2 v1.8h, v1.8h, v3.8h
; CHECK-NEXT: ret
entry:
%c = uitofp <16 x i64> %a to <16 x bfloat>
@@ -632,107 +740,162 @@ entry:
define <32 x bfloat> @stofp_v32i64_v32bf16(<32 x i64> %a) {
; CHECK-LABEL: stofp_v32i64_v32bf16:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: scvtf v17.2d, v2.2d
-; CHECK-NEXT: scvtf v18.2d, v0.2d
-; CHECK-NEXT: scvtf v19.2d, v3.2d
-; CHECK-NEXT: scvtf v3.2d, v6.2d
-; CHECK-NEXT: ldp q21, q20, [sp, #32]
-; CHECK-NEXT: scvtf v4.2d, v4.2d
-; CHECK-NEXT: scvtf v6.2d, v7.2d
-; CHECK-NEXT: scvtf v5.2d, v5.2d
-; CHECK-NEXT: ldp q24, q23, [sp, #64]
-; CHECK-NEXT: movi v16.4s, #1
-; CHECK-NEXT: fcvtn v0.2s, v17.2d
-; CHECK-NEXT: scvtf v17.2d, v1.2d
-; CHECK-NEXT: fcvtn v1.2s, v18.2d
-; CHECK-NEXT: fcvtn v3.2s, v3.2d
-; CHECK-NEXT: ldp q18, q7, [sp]
-; CHECK-NEXT: scvtf v21.2d, v21.2d
-; CHECK-NEXT: fcvtn v4.2s, v4.2d
-; CHECK-NEXT: movi v2.4s, #127, msl #8
-; CHECK-NEXT: scvtf v20.2d, v20.2d
-; CHECK-NEXT: fcvtn2 v0.4s, v19.2d
-; CHECK-NEXT: ldp q22, q19, [sp, #96]
-; CHECK-NEXT: fcvtn2 v1.4s, v17.2d
-; CHECK-NEXT: fcvtn2 v3.4s, v6.2d
-; CHECK-NEXT: scvtf v18.2d, v18.2d
-; CHECK-NEXT: scvtf v17.2d, v24.2d
-; CHECK-NEXT: fcvtn v6.2s, v21.2d
-; CHECK-NEXT: fcvtn2 v4.4s, v5.2d
-; CHECK-NEXT: scvtf v22.2d, v22.2d
-; CHECK-NEXT: scvtf v21.2d, v23.2d
-; CHECK-NEXT: scvtf v7.2d, v7.2d
-; CHECK-NEXT: ushr v24.4s, v0.4s, #16
-; CHECK-NEXT: add v5.4s, v0.4s, v2.4s
-; CHECK-NEXT: scvtf v19.2d, v19.2d
-; CHECK-NEXT: ushr v23.4s, v1.4s, #16
-; CHECK-NEXT: ushr v25.4s, v3.4s, #16
-; CHECK-NEXT: fcvtn v18.2s, v18.2d
-; CHECK-NEXT: fcvtn2 v6.4s, v20.2d
-; CHECK-NEXT: add v26.4s, v1.4s, v2.4s
-; CHECK-NEXT: fcvtn v17.2s, v17.2d
-; CHECK-NEXT: and v24.16b, v24.16b, v16.16b
-; CHECK-NEXT: fcvtn v22.2s, v22.2d
-; CHECK-NEXT: fcmeq v20.4s, v0.4s, v0.4s
-; CHECK-NEXT: and v23.16b, v23.16b, v16.16b
-; CHECK-NEXT: orr v0.4s, #64, lsl #16
-; CHECK-NEXT: fcmeq v27.4s, v3.4s, v3.4s
-; CHECK-NEXT: fcvtn2 v18.4s, v7.2d
-; CHECK-NEXT: add v7.4s, v3.4s, v2.4s
-; CHECK-NEXT: orr v3.4s, #64, lsl #16
-; CHECK-NEXT: add v5.4s, v24.4s, v5.4s
-; CHECK-NEXT: and v24.16b, v25.16b, v16.16b
-; CHECK-NEXT: ushr v25.4s, v4.4s, #16
-; CHECK-NEXT: fcvtn2 v22.4s, v19.2d
-; CHECK-NEXT: add v19.4s, v23.4s, v26.4s
-; CHECK-NEXT: ushr v26.4s, v6.4s, #16
-; CHECK-NEXT: fcvtn2 v17.4s, v21.2d
-; CHECK-NEXT: fcmeq v21.4s, v1.4s, v1.4s
+; CHECK-NEXT: fmov x10, d2
+; CHECK-NEXT: mov x9, v3.d[1]
+; CHECK-NEXT: mov x8, v2.d[1]
+; CHECK-NEXT: fmov x11, d3
+; CHECK-NEXT: fmov x12, d0
+; CHECK-NEXT: movi v3.4s, #1
+; CHECK-NEXT: scvtf s2, x10
+; CHECK-NEXT: mov x10, v0.d[1]
+; CHECK-NEXT: scvtf s19, x9
+; CHECK-NEXT: fmov x9, d1
+; CHECK-NEXT: scvtf s16, x11
+; CHECK-NEXT: mov x11, v6.d[1]
+; CHECK-NEXT: scvtf s0, x12
+; CHECK-NEXT: scvtf s18, x8
+; CHECK-NEXT: mov x8, v1.d[1]
+; CHECK-NEXT: scvtf s20, x10
+; CHECK-NEXT: scvtf s17, x9
+; CHECK-NEXT: mov x9, v7.d[1]
+; CHECK-NEXT: mov x10, v4.d[1]
+; CHECK-NEXT: scvtf s21, x11
+; CHECK-NEXT: fmov x11, d6
+; CHECK-NEXT: mov v2.s[1], v18.s[0]
+; CHECK-NEXT: scvtf s25, x8
+; CHECK-NEXT: movi v6.4s, #127, msl #8
+; CHECK-NEXT: mov v0.s[1], v20.s[0]
+; CHECK-NEXT: ldp q24, q20, [sp, #32]
+; CHECK-NEXT: scvtf s22, x9
+; CHECK-NEXT: fmov x9, d4
+; CHECK-NEXT: scvtf s1, x11
+; CHECK-NEXT: scvtf s26, x10
+; CHECK-NEXT: fmov x11, d7
+; CHECK-NEXT: mov v2.s[2], v16.s[0]
+; CHECK-NEXT: ldp q18, q16, [sp]
+; CHECK-NEXT: mov x8, v24.d[1]
+; CHECK-NEXT: scvtf s4, x9
+; CHECK-NEXT: fmov x9, d5
+; CHECK-NEXT: mov v0.s[2], v17.s[0]
+; CHECK-NEXT: mov v1.s[1], v21.s[0]
+; CHECK-NEXT: scvtf s23, x11
+; CHECK-NEXT: mov x11, v5.d[1]
+; CHECK-NEXT: mov v2.s[3], v19.s[0]
+; CHECK-NEXT: scvtf s21, x8
+; CHECK-NEXT: mov x8, v20.d[1]
+; CHECK-NEXT: scvtf s17, x9
+; CHECK-NEXT: fmov x9, d24
+; CHECK-NEXT: mov v4.s[1], v26.s[0]
+; CHECK-NEXT: mov v0.s[3], v25.s[0]
+; CHECK-NEXT: ldp q26, q24, [sp, #96]
+; CHECK-NEXT: mov v1.s[2], v23.s[0]
+; CHECK-NEXT: ldp q25, q23, [sp, #64]
+; CHECK-NEXT: scvtf s7, x11
+; CHECK-NEXT: scvtf s27, x8
+; CHECK-NEXT: fmov x8, d18
+; CHECK-NEXT: scvtf s5, x9
+; CHECK-NEXT: mov x10, v26.d[1]
+; CHECK-NEXT: mov x9, v18.d[1]
+; CHECK-NEXT: fmov x11, d20
+; CHECK-NEXT: mov v4.s[2], v17.s[0]
+; CHECK-NEXT: mov v1.s[3], v22.s[0]
+; CHECK-NEXT: ushr v19.4s, v2.4s, #16
+; CHECK-NEXT: scvtf s17, x8
+; CHECK-NEXT: fmov x8, d26
+; CHECK-NEXT: add v26.4s, v2.4s, v6.4s
+; CHECK-NEXT: scvtf s22, x11
+; CHECK-NEXT: mov x11, v25.d[1]
+; CHECK-NEXT: mov v5.s[1], v21.s[0]
+; CHECK-NEXT: scvtf s28, x10
+; CHECK-NEXT: fmov x10, d16
+; CHECK-NEXT: scvtf s21, x9
+; CHECK-NEXT: fmov x9, d25
+; CHECK-NEXT: scvtf s18, x8
+; CHECK-NEXT: mov x8, v16.d[1]
+; CHECK-NEXT: mov v4.s[3], v7.s[0]
+; CHECK-NEXT: and v19.16b, v19.16b, v3.16b
+; CHECK-NEXT: scvtf s16, x10
+; CHECK-NEXT: fmov x10, d24
+; CHECK-NEXT: scvtf s25, x11
+; CHECK-NEXT: scvtf s20, x9
+; CHECK-NEXT: mov x9, v24.d[1]
+; CHECK-NEXT: mov v17.s[1], v21.s[0]
+; CHECK-NEXT: fmov x11, d23
+; CHECK-NEXT: mov v18.s[1], v28.s[0]
+; CHECK-NEXT: scvtf s24, x8
+; CHECK-NEXT: scvtf s21, x10
+; CHECK-NEXT: mov x10, v23.d[1]
+; CHECK-NEXT: mov v5.s[2], v22.s[0]
+; CHECK-NEXT: ushr v22.4s, v1.4s, #16
+; CHECK-NEXT: ushr v28.4s, v0.4s, #16
+; CHECK-NEXT: scvtf s23, x11
+; CHECK-NEXT: mov v20.s[1], v25.s[0]
+; CHECK-NEXT: scvtf s25, x9
+; CHECK-NEXT: mov v17.s[2], v16.s[0]
+; CHECK-NEXT: add v16.4s, v19.4s, v26.4s
+; CHECK-NEXT: ushr v26.4s, v4.4s, #16
+; CHECK-NEXT: mov v18.s[2], v21.s[0]
+; CHECK-NEXT: scvtf s7, x10
+; CHECK-NEXT: and v22.16b, v22.16b, v3.16b
+; CHECK-NEXT: mov v5.s[3], v27.s[0]
+; CHECK-NEXT: and v21.16b, v28.16b, v3.16b
+; CHECK-NEXT: fcmeq v19.4s, v2.4s, v2.4s
+; CHECK-NEXT: mov v20.s[2], v23.s[0]
+; CHECK-NEXT: add v23.4s, v0.4s, v6.4s
+; CHECK-NEXT: orr v2.4s, #64, lsl #16
+; CHECK-NEXT: mov v17.s[3], v24.s[0]
+; CHECK-NEXT: add v24.4s, v1.4s, v6.4s
+; CHECK-NEXT: fcmeq v27.4s, v1.4s, v1.4s
+; CHECK-NEXT: mov v18.s[3], v25.s[0]
+; CHECK-NEXT: add v25.4s, v4.4s, v6.4s
; CHECK-NEXT: orr v1.4s, #64, lsl #16
-; CHECK-NEXT: and v23.16b, v25.16b, v16.16b
-; CHECK-NEXT: add v25.4s, v4.4s, v2.4s
-; CHECK-NEXT: add v7.4s, v24.4s, v7.4s
-; CHECK-NEXT: ushr v24.4s, v18.4s, #16
-; CHECK-NEXT: add v30.4s, v18.4s, v2.4s
-; CHECK-NEXT: bit v0.16b, v5.16b, v20.16b
-; CHECK-NEXT: ushr v28.4s, v22.4s, #16
-; CHECK-NEXT: add v31.4s, v22.4s, v2.4s
+; CHECK-NEXT: bit v2.16b, v16.16b, v19.16b
+; CHECK-NEXT: mov v20.s[3], v7.s[0]
+; CHECK-NEXT: add v22.4s, v22.4s, v24.4s
+; CHECK-NEXT: add v7.4s, v21.4s, v23.4s
+; CHECK-NEXT: ushr v24.4s, v17.4s, #16
+; CHECK-NEXT: and v23.16b, v26.16b, v3.16b
+; CHECK-NEXT: ushr v26.4s, v5.4s, #16
+; CHECK-NEXT: ushr v28.4s, v18.4s, #16
+; CHECK-NEXT: add v30.4s, v17.4s, v6.4s
+; CHECK-NEXT: add v31.4s, v18.4s, v6.4s
+; CHECK-NEXT: fcmeq v21.4s, v0.4s, v0.4s
+; CHECK-NEXT: orr v0.4s, #64, lsl #16
+; CHECK-NEXT: bit v1.16b, v22.16b, v27.16b
+; CHECK-NEXT: ushr v29.4s, v20.4s, #16
+; CHECK-NEXT: and v24.16b, v24.16b, v3.16b
; CHECK-NEXT: add v23.4s, v23.4s, v25.4s
-; CHECK-NEXT: and v25.16b, v26.16b, v16.16b
-; CHECK-NEXT: add v26.4s, v6.4s, v2.4s
-; CHECK-NEXT: ushr v29.4s, v17.4s, #16
-; CHECK-NEXT: and v24.16b, v24.16b, v16.16b
-; CHECK-NEXT: add v2.4s, v17.4s, v2.4s
-; CHECK-NEXT: and v28.16b, v28.16b, v16.16b
-; CHECK-NEXT: bit v3.16b, v7.16b, v27.16b
-; CHECK-NEXT: bit v1.16b, v19.16b, v21.16b
-; CHECK-NEXT: add v25.4s, v25.4s, v26.4s
-; CHECK-NEXT: fcmeq v26.4s, v6.4s, v6.4s
-; CHECK-NEXT: orr v6.4s, #64, lsl #16
-; CHECK-NEXT: and v16.16b, v29.16b, v16.16b
+; CHECK-NEXT: and v28.16b, v28.16b, v3.16b
+; CHECK-NEXT: and v25.16b, v26.16b, v3.16b
+; CHECK-NEXT: add v26.4s, v5.4s, v6.4s
+; CHECK-NEXT: add v6.4s, v20.4s, v6.4s
+; CHECK-NEXT: and v3.16b, v29.16b, v3.16b
; CHECK-NEXT: add v24.4s, v24.4s, v30.4s
-; CHECK-NEXT: fcmeq v30.4s, v18.4s, v18.4s
+; CHECK-NEXT: fcmeq v30.4s, v17.4s, v17.4s
; CHECK-NEXT: add v28.4s, v28.4s, v31.4s
-; CHECK-NEXT: fcmeq v31.4s, v22.4s, v22.4s
+; CHECK-NEXT: fcmeq v31.4s, v18.4s, v18.4s
; CHECK-NEXT: fcmeq v29.4s, v4.4s, v4.4s
+; CHECK-NEXT: add v25.4s, v25.4s, v26.4s
+; CHECK-NEXT: fcmeq v26.4s, v5.4s, v5.4s
; CHECK-NEXT: orr v4.4s, #64, lsl #16
-; CHECK-NEXT: orr v18.4s, #64, lsl #16
-; CHECK-NEXT: orr v22.4s, #64, lsl #16
-; CHECK-NEXT: mov v5.16b, v26.16b
-; CHECK-NEXT: add v2.4s, v16.4s, v2.4s
-; CHECK-NEXT: fcmeq v16.4s, v17.4s, v17.4s
+; CHECK-NEXT: add v3.4s, v3.4s, v6.4s
+; CHECK-NEXT: fcmeq v6.4s, v20.4s, v20.4s
+; CHECK-NEXT: orr v5.4s, #64, lsl #16
; CHECK-NEXT: orr v17.4s, #64, lsl #16
-; CHECK-NEXT: uzp2 v0.8h, v1.8h, v0.8h
-; CHECK-NEXT: mov v7.16b, v31.16b
+; CHECK-NEXT: orr v18.4s, #64, lsl #16
+; CHECK-NEXT: orr v20.4s, #64, lsl #16
+; CHECK-NEXT: bit v0.16b, v7.16b, v21.16b
+; CHECK-NEXT: mov v7.16b, v30.16b
+; CHECK-NEXT: mov v16.16b, v31.16b
; CHECK-NEXT: bit v4.16b, v23.16b, v29.16b
-; CHECK-NEXT: bsl v5.16b, v25.16b, v6.16b
-; CHECK-NEXT: mov v6.16b, v30.16b
-; CHECK-NEXT: bsl v16.16b, v2.16b, v17.16b
-; CHECK-NEXT: bsl v7.16b, v28.16b, v22.16b
-; CHECK-NEXT: bsl v6.16b, v24.16b, v18.16b
-; CHECK-NEXT: uzp2 v1.8h, v4.8h, v3.8h
-; CHECK-NEXT: uzp2 v3.8h, v16.8h, v7.8h
-; CHECK-NEXT: uzp2 v2.8h, v6.8h, v5.8h
+; CHECK-NEXT: bit v5.16b, v25.16b, v26.16b
+; CHECK-NEXT: bif v3.16b, v20.16b, v6.16b
+; CHECK-NEXT: bsl v7.16b, v24.16b, v17.16b
+; CHECK-NEXT: bsl v16.16b, v28.16b, v18.16b
+; CHECK-NEXT: uzp2 v0.8h, v0.8h, v2.8h
+; CHECK-NEXT: uzp2 v1.8h, v4.8h, v1.8h
+; CHECK-NEXT: uzp2 v2.8h, v7.8h, v5.8h
+; CHECK-NEXT: uzp2 v3.8h, v3.8h, v16.8h
; CHECK-NEXT: ret
entry:
%c = sitofp <32 x i64> %a to <32 x bfloat>
@@ -742,107 +905,162 @@ entry:
define <32 x bfloat> @utofp_v32i64_v32bf16(<32 x i64> %a) {
; CHECK-LABEL: utofp_v32i64_v32bf16:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ucvtf v17.2d, v2.2d
-; CHECK-NEXT: ucvtf v18.2d, v0.2d
-; CHECK-NEXT: ucvtf v19.2d, v3.2d
-; CHECK-NEXT: ucvtf v3.2d, v6.2d
-; CHECK-NEXT: ldp q21, q20, [sp, #32]
-; CHECK-NEXT: ucvtf v4.2d, v4.2d
-; CHECK-NEXT: ucvtf v6.2d, v7.2d
-; CHECK-NEXT: ucvtf v5.2d, v5.2d
-; CHECK-NEXT: ldp q24, q23, [sp, #64]
-; CHECK-NEXT: movi v16.4s, #1
-; CHECK-NEXT: fcvtn v0.2s, v17.2d
-; CHECK-NEXT: ucvtf v17.2d, v1.2d
-; CHECK-NEXT: fcvtn v1.2s, v18.2d
-; CHECK-NEXT: fcvtn v3.2s, v3.2d
-; CHECK-NEXT: ldp q18, q7, [sp]
-; CHECK-NEXT: ucvtf v21.2d, v21.2d
-; CHECK-NEXT: fcvtn v4.2s, v4.2d
-; CHECK-NEXT: movi v2.4s, #127, msl #8
-; CHECK-NEXT: ucvtf v20.2d, v20.2d
-; CHECK-NEXT: fcvtn2 v0.4s, v19.2d
-; CHECK-NEXT: ldp q22, q19, [sp, #96]
-; CHECK-NEXT: fcvtn2 v1.4s, v17.2d
-; CHECK-NEXT: fcvtn2 v3.4s, v6.2d
-; CHECK-NEXT: ucvtf v18.2d, v18.2d
-; CHECK-NEXT: ucvtf v17.2d, v24.2d
-; CHECK-NEXT: fcvtn v6.2s, v21.2d
-; CHECK-NEXT: fcvtn2 v4.4s, v5.2d
-; CHECK-NEXT: ucvtf v22.2d, v22.2d
-; CHECK-NEXT: ucvtf v21.2d, v23.2d
-; CHECK-NEXT: ucvtf v7.2d, v7.2d
-; CHECK-NEXT: ushr v24.4s, v0.4s, #16
-; CHECK-NEXT: add v5.4s, v0.4s, v2.4s
-; CHECK-NEXT: ucvtf v19.2d, v19.2d
-; CHECK-NEXT: ushr v23.4s, v1.4s, #16
-; CHECK-NEXT: ushr v25.4s, v3.4s, #16
-; CHECK-NEXT: fcvtn v18.2s, v18.2d
-; CHECK-NEXT: fcvtn2 v6.4s, v20.2d
-; CHECK-NEXT: add v26.4s, v1.4s, v2.4s
-; CHECK-NEXT: fcvtn v17.2s, v17.2d
-; CHECK-NEXT: and v24.16b, v24.16b, v16.16b
-; CHECK-NEXT: fcvtn v22.2s, v22.2d
-; CHECK-NEXT: fcmeq v20.4s, v0.4s, v0.4s
-; CHECK-NEXT: and v23.16b, v23.16b, v16.16b
-; CHECK-NEXT: orr v0.4s, #64, lsl #16
-; CHECK-NEXT: fcmeq v27.4s, v3.4s, v3.4s
-; CHECK-NEXT: fcvtn2 v18.4s, v7.2d
-; CHECK-NEXT: add v7.4s, v3.4s, v2.4s
-; CHECK-NEXT: orr v3.4s, #64, lsl #16
-; CHECK-NEXT: add v5.4s, v24.4s, v5.4s
-; CHECK-NEXT: and v24.16b, v25.16b, v16.16b
-; CHECK-NEXT: ushr v25.4s, v4.4s, #16
-; CHECK-NEXT: fcvtn2 v22.4s, v19.2d
-; CHECK-NEXT: add v19.4s, v23.4s, v26.4s
-; CHECK-NEXT: ushr v26.4s, v6.4s, #16
-; CHECK-NEXT: fcvtn2 v17.4s, v21.2d
-; CHECK-NEXT: fcmeq v21.4s, v1.4s, v1.4s
+; CHECK-NEXT: fmov x10, d2
+; CHECK-NEXT: mov x9, v3.d[1]
+; CHECK-NEXT: mov x8, v2.d[1]
+; CHECK-NEXT: fmov x11, d3
+; CHECK-NEXT: fmov x12, d0
+; CHECK-NEXT: movi v3.4s, #1
+; CHECK-NEXT: ucvtf s2, x10
+; CHECK-NEXT: mov x10, v0.d[1]
+; CHECK-NEXT: ucvtf s19, x9
+; CHECK-NEXT: fmov x9, d1
+; CHECK-NEXT: ucvtf s16, x11
+; CHECK-NEXT: mov x11, v6.d[1]
+; CHECK-NEXT: ucvtf s0, x12
+; CHECK-NEXT: ucvtf s18, x8
+; CHECK-NEXT: mov x8, v1.d[1]
+; CHECK-NEXT: ucvtf s20, x10
+; CHECK-NEXT: ucvtf s17, x9
+; CHECK-NEXT: mov x9, v7.d[1]
+; CHECK-NEXT: mov x10, v4.d[1]
+; CHECK-NEXT: ucvtf s21, x11
+; CHECK-NEXT: fmov x11, d6
+; CHECK-NEXT: mov v2.s[1], v18.s[0]
+; CHECK-NEXT: ucvtf s25, x8
+; CHECK-NEXT: movi v6.4s, #127, msl #8
+; CHECK-NEXT: mov v0.s[1], v20.s[0]
+; CHECK-NEXT: ldp q24, q20, [sp, #32]
+; CHECK-NEXT: ucvtf s22, x9
+; CHECK-NEXT: fmov x9, d4
+; CHECK-NEXT: ucvtf s1, x11
+; CHECK-NEXT: ucvtf s26, x10
+; CHECK-NEXT: fmov x11, d7
+; CHECK-NEXT: mov v2.s[2], v16.s[0]
+; CHECK-NEXT: ldp q18, q16, [sp]
+; CHECK-NEXT: mov x8, v24.d[1]
+; CHECK-NEXT: ucvtf s4, x9
+; CHECK-NEXT: fmov x9, d5
+; CHECK-NEXT: mov v0.s[2], v17.s[0]
+; CHECK-NEXT: mov v1.s[1], v21.s[0]
+; CHECK-NEXT: ucvtf s23, x11
+; CHECK-NEXT: mov x11, v5.d[1]
+; CHECK-NEXT: mov v2.s[3], v19.s[0]
+; CHECK-NEXT: ucvtf s21, x8
+; CHECK-NEXT: mov x8, v20.d[1]
+; CHECK-NEXT: ucvtf s17, x9
+; CHECK-NEXT: fmov x9, d24
+; CHECK-NEXT: mov v4.s[1], v26.s[0]
+; CHECK-NEXT: mov v0.s[3], v25.s[0]
+; CHECK-NEXT: ldp q26, q24, [sp, #96]
+; CHECK-NEXT: mov v1.s[2], v23.s[0]
+; CHECK-NEXT: ldp q25, q23, [sp, #64]
+; CHECK-NEXT: ucvtf s7, x11
+; CHECK-NEXT: ucvtf s27, x8
+; CHECK-NEXT: fmov x8, d18
+; CHECK-NEXT: ucvtf s5, x9
+; CHECK-NEXT: mov x10, v26.d[1]
+; CHECK-NEXT: mov x9, v18.d[1]
+; CHECK-NEXT: fmov x11, d20
+; CHECK-NEXT: mov v4.s[2], v17.s[0]
+; CHECK-NEXT: mov v1.s[3], v22.s[0]
+; CHECK-NEXT: ushr v19.4s, v2.4s, #16
+; CHECK-NEXT: ucvtf s17, x8
+; CHECK-NEXT: fmov x8, d26
+; CHECK-NEXT: add v26.4s, v2.4s, v6.4s
+; CHECK-NEXT: ucvtf s22, x11
+; CHECK-NEXT: mov x11, v25.d[1]
+; CHECK-NEXT: mov v5.s[1], v21.s[0]
+; CHECK-NEXT: ucvtf s28, x10
+; CHECK-NEXT: fmov x10, d16
+; CHECK-NEXT: ucvtf s21, x9
+; CHECK-NEXT: fmov x9, d25
+; CHECK-NEXT: ucvtf s18, x8
+; CHECK-NEXT: mov x8, v16.d[1]
+; CHECK-NEXT: mov v4.s[3], v7.s[0]
+; CHECK-NEXT: and v19.16b, v19.16b, v3.16b
+; CHECK-NEXT: ucvtf s16, x10
+; CHECK-NEXT: fmov x10, d24
+; CHECK-NEXT: ucvtf s25, x11
+; CHECK-NEXT: ucvtf s20, x9
+; CHECK-NEXT: mov x9, v24.d[1]
+; CHECK-NEXT: mov v17.s[1], v21.s[0]
+; CHECK-NEXT: fmov x11, d23
+; CHECK-NEXT: mov v18.s[1], v28.s[0]
+; CHECK-NEXT: ucvtf s24, x8
+; CHECK-NEXT: ucvtf s21, x10
+; CHECK-NEXT: mov x10, v23.d[1]
+; CHECK-NEXT: mov v5.s[2], v22.s[0]
+; CHECK-NEXT: ushr v22.4s, v1.4s, #16
+; CHECK-NEXT: ushr v28.4s, v0.4s, #16
+; CHECK-NEXT: ucvtf s23, x11
+; CHECK-NEXT: mov v20.s[1], v25.s[0]
+; CHECK-NEXT: ucvtf s25, x9
+; CHECK-NEXT: mov v17.s[2], v16.s[0]
+; CHECK-NEXT: add v16.4s, v19.4s, v26.4s
+; CHECK-NEXT: ushr v26.4s, v4.4s, #16
+; CHECK-NEXT: mov v18.s[2], v21.s[0]
+; CHECK-NEXT: ucvtf s7, x10
+; CHECK-NEXT: and v22.16b, v22.16b, v3.16b
+; CHECK-NEXT: mov v5.s[3], v27.s[0]
+; CHECK-NEXT: and v21.16b, v28.16b, v3.16b
+; CHECK-NEXT: fcmeq v19.4s, v2.4s, v2.4s
+; CHECK-NEXT: mov v20.s[2], v23.s[0]
+; CHECK-NEXT: add v23.4s, v0.4s, v6.4s
+; CHECK-NEXT: orr v2.4s, #64, lsl #16
+; CHECK-NEXT: mov v17.s[3], v24.s[0]
+; CHECK-NEXT: add v24.4s, v1.4s, v6.4s
+; CHECK-NEXT: fcmeq v27.4s, v1.4s, v1.4s
+; CHECK-NEXT: mov v18.s[3], v25.s[0]
+; CHECK-NEXT: add v25.4s, v4.4s, v6.4s
; CHECK-NEXT: orr v1.4s, #64, lsl #16
-; CHECK-NEXT: and v23.16b, v25.16b, v16.16b
-; CHECK-NEXT: add v25.4s, v4.4s, v2.4s
-; CHECK-NEXT: add v7.4s, v24.4s, v7.4s
-; CHECK-NEXT: ushr v24.4s, v18.4s, #16
-; CHECK-NEXT: add v30.4s, v18.4s, v2.4s
-; CHECK-NEXT: bit v0.16b, v5.16b, v20.16b
-; CHECK-NEXT: ushr v28.4s, v22.4s, #16
-; CHECK-NEXT: add v31.4s, v22.4s, v2.4s
+; CHECK-NEXT: bit v2.16b, v16.16b, v19.16b
+; CHECK-NEXT: mov v20.s[3], v7.s[0]
+; CHECK-NEXT: add v22.4s, v22.4s, v24.4s
+; CHECK-NEXT: add v7.4s, v21.4s, v23.4s
+; CHECK-NEXT: ushr v24.4s, v17.4s, #16
+; CHECK-NEXT: and v23.16b, v26.16b, v3.16b
+; CHECK-NEXT: ushr v26.4s, v5.4s, #16
+; CHECK-NEXT: ushr v28.4s, v18.4s, #16
+; CHECK-NEXT: add v30.4s, v17.4s, v6.4s
+; CHECK-NEXT: add v31.4s, v18.4s, v6.4s
+; CHECK-NEXT: fcmeq v21.4s, v0.4s, v0.4s
+; CHECK-NEXT: orr v0.4s, #64, lsl #16
+; CHECK-NEXT: bit v1.16b, v22.16b, v27.16b
+; CHECK-NEXT: ushr v29.4s, v20.4s, #16
+; CHECK-NEXT: and v24.16b, v24.16b, v3.16b
; CHECK-NEXT: add v23.4s, v23.4s, v25.4s
-; CHECK-NEXT: and v25.16b, v26.16b, v16.16b
-; CHECK-NEXT: add v26.4s, v6.4s, v2.4s
-; CHECK-NEXT: ushr v29.4s, v17.4s, #16
-; CHECK-NEXT: and v24.16b, v24.16b, v16.16b
-; CHECK-NEXT: add v2.4s, v17.4s, v2.4s
-; CHECK-NEXT: and v28.16b, v28.16b, v16.16b
-; CHECK-NEXT: bit v3.16b, v7.16b, v27.16b
-; CHECK-NEXT: bit v1.16b, v19.16b, v21.16b
-; CHECK-NEXT: add v25.4s, v25.4s, v26.4s
-; CHECK-NEXT: fcmeq v26.4s, v6.4s, v6.4s
-; CHECK-NEXT: orr v6.4s, #64, lsl #16
-; CHECK-NEXT: and v16.16b, v29.16b, v16.16b
+; CHECK-NEXT: and v28.16b, v28.16b, v3.16b
+; CHECK-NEXT: and v25.16b, v26.16b, v3.16b
+; CHECK-NEXT: add v26.4s, v5.4s, v6.4s
+; CHECK-NEXT: add v6.4s, v20.4s, v6.4s
+; CHECK-NEXT: and v3.16b, v29.16b, v3.16b
; CHECK-NEXT: add v24.4s, v24.4s, v30.4s
-; CHECK-NEXT: fcmeq v30.4s, v18.4s, v18.4s
+; CHECK-NEXT: fcmeq v30.4s, v17.4s, v17.4s
; CHECK-NEXT: add v28.4s, v28.4s, v31.4s
-; CHECK-NEXT: fcmeq v31.4s, v22.4s, v22.4s
+; CHECK-NEXT: fcmeq v31.4s, v18.4s, v18.4s
; CHECK-NEXT: fcmeq v29.4s, v4.4s, v4.4s
+; CHECK-NEXT: add v25.4s, v25.4s, v26.4s
+; CHECK-NEXT: fcmeq v26.4s, v5.4s, v5.4s
; CHECK-NEXT: orr v4.4s, #64, lsl #16
-; CHECK-NEXT: orr v18.4s, #64, lsl #16
-; CHECK-NEXT: orr v22.4s, #64, lsl #16
-; CHECK-NEXT: mov v5.16b, v26.16b
-; CHECK-NEXT: add v2.4s, v16.4s, v2.4s
-; CHECK-NEXT: fcmeq v16.4s, v17.4s, v17.4s
+; CHECK-NEXT: add v3.4s, v3.4s, v6.4s
+; CHECK-NEXT: fcmeq v6.4s, v20.4s, v20.4s
+; CHECK-NEXT: orr v5.4s, #64, lsl #16
; CHECK-NEXT: orr v17.4s, #64, lsl #16
-; CHECK-NEXT: uzp2 v0.8h, v1.8h, v0.8h
-; CHECK-NEXT: mov v7.16b, v31.16b
+; CHECK-NEXT: orr v18.4s, #64, lsl #16
+; CHECK-NEXT: orr v20.4s, #64, lsl #16
+; CHECK-NEXT: bit v0.16b, v7.16b, v21.16b
+; CHECK-NEXT: mov v7.16b, v30.16b
+; CHECK-NEXT: mov v16.16b, v31.16b
; CHECK-NEXT: bit v4.16b, v23.16b, v29.16b
-; CHECK-NEXT: bsl v5.16b, v25.16b, v6.16b
-; CHECK-NEXT: mov v6.16b, v30.16b
-; CHECK-NEXT: bsl v16.16b, v2.16b, v17.16b
-; CHECK-NEXT: bsl v7.16b, v28.16b, v22.16b
-; CHECK-NEXT: bsl v6.16b, v24.16b, v18.16b
-; CHECK-NEXT: uzp2 v1.8h, v4.8h, v3.8h
-; CHECK-NEXT: uzp2 v3.8h, v16.8h, v7.8h
-; CHECK-NEXT: uzp2 v2.8h, v6.8h, v5.8h
+; CHECK-NEXT: bit v5.16b, v25.16b, v26.16b
+; CHECK-NEXT: bif v3.16b, v20.16b, v6.16b
+; CHECK-NEXT: bsl v7.16b, v24.16b, v17.16b
+; CHECK-NEXT: bsl v16.16b, v28.16b, v18.16b
+; CHECK-NEXT: uzp2 v0.8h, v0.8h, v2.8h
+; CHECK-NEXT: uzp2 v1.8h, v4.8h, v1.8h
+; CHECK-NEXT: uzp2 v2.8h, v7.8h, v5.8h
+; CHECK-NEXT: uzp2 v3.8h, v3.8h, v16.8h
; CHECK-NEXT: ret
entry:
%c = uitofp <32 x i64> %a to <32 x bfloat>
diff --git a/llvm/test/CodeGen/AArch64/itofp.ll b/llvm/test/CodeGen/AArch64/itofp.ll
index 81c1a64f2d434..07957c117868d 100644
--- a/llvm/test/CodeGen/AArch64/itofp.ll
+++ b/llvm/test/CodeGen/AArch64/itofp.ll
@@ -4421,22 +4421,42 @@ entry:
}
define <2 x float> @stofp_v2i64_v2f32(<2 x i64> %a) {
-; CHECK-LABEL: stofp_v2i64_v2f32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: scvtf v0.2d, v0.2d
-; CHECK-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: stofp_v2i64_v2f32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: mov x8, v0.d[1]
+; CHECK-SD-NEXT: fmov x9, d0
+; CHECK-SD-NEXT: scvtf s0, x9
+; CHECK-SD-NEXT: scvtf s1, x8
+; CHECK-SD-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: stofp_v2i64_v2f32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: scvtf v0.2d, v0.2d
+; CHECK-GI-NEXT: fcvtn v0.2s, v0.2d
+; CHECK-GI-NEXT: ret
entry:
%c = sitofp <2 x i64> %a to <2 x float>
ret <2 x float> %c
}
define <2 x float> @utofp_v2i64_v2f32(<2 x i64> %a) {
-; CHECK-LABEL: utofp_v2i64_v2f32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ucvtf v0.2d, v0.2d
-; CHECK-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: utofp_v2i64_v2f32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: mov x8, v0.d[1]
+; CHECK-SD-NEXT: fmov x9, d0
+; CHECK-SD-NEXT: ucvtf s0, x9
+; CHECK-SD-NEXT: ucvtf s1, x8
+; CHECK-SD-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: utofp_v2i64_v2f32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ucvtf v0.2d, v0.2d
+; CHECK-GI-NEXT: fcvtn v0.2s, v0.2d
+; CHECK-GI-NEXT: ret
entry:
%c = uitofp <2 x i64> %a to <2 x float>
ret <2 x float> %c
@@ -4446,13 +4466,18 @@ define <3 x float> @stofp_v3i64_v3f32(<3 x i64> %a) {
; CHECK-SD-LABEL: stofp_v3i64_v3f32:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: fmov x8, d0
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-SD-NEXT: scvtf v1.2d, v2.2d
-; CHECK-SD-NEXT: scvtf v0.2d, v0.2d
-; CHECK-SD-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-SD-NEXT: fcvtn2 v0.4s, v1.2d
+; CHECK-SD-NEXT: movi v0.2d, #0000000000000000
+; CHECK-SD-NEXT: scvtf s3, x8
+; CHECK-SD-NEXT: fmov x8, d1
+; CHECK-SD-NEXT: scvtf s1, x8
+; CHECK-SD-NEXT: fmov x8, d2
+; CHECK-SD-NEXT: mov v0.s[0], v3.s[0]
+; CHECK-SD-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-SD-NEXT: scvtf s1, x8
+; CHECK-SD-NEXT: mov v0.s[2], v1.s[0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: stofp_v3i64_v3f32:
@@ -4478,13 +4503,18 @@ define <3 x float> @utofp_v3i64_v3f32(<3 x i64> %a) {
; CHECK-SD-LABEL: utofp_v3i64_v3f32:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: fmov x8, d0
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-SD-NEXT: ucvtf v1.2d, v2.2d
-; CHECK-SD-NEXT: ucvtf v0.2d, v0.2d
-; CHECK-SD-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-SD-NEXT: fcvtn2 v0.4s, v1.2d
+; CHECK-SD-NEXT: movi v0.2d, #0000000000000000
+; CHECK-SD-NEXT: ucvtf s3, x8
+; CHECK-SD-NEXT: fmov x8, d1
+; CHECK-SD-NEXT: ucvtf s1, x8
+; CHECK-SD-NEXT: fmov x8, d2
+; CHECK-SD-NEXT: mov v0.s[0], v3.s[0]
+; CHECK-SD-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-SD-NEXT: ucvtf s1, x8
+; CHECK-SD-NEXT: mov v0.s[2], v1.s[0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: utofp_v3i64_v3f32:
@@ -4507,26 +4537,56 @@ entry:
}
define <4 x float> @stofp_v4i64_v4f32(<4 x i64> %a) {
-; CHECK-LABEL: stofp_v4i64_v4f32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: scvtf v0.2d, v0.2d
-; CHECK-NEXT: scvtf v1.2d, v1.2d
-; CHECK-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-NEXT: fcvtn2 v0.4s, v1.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: stofp_v4i64_v4f32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: mov x8, v0.d[1]
+; CHECK-SD-NEXT: fmov x9, d0
+; CHECK-SD-NEXT: scvtf s0, x9
+; CHECK-SD-NEXT: mov x9, v1.d[1]
+; CHECK-SD-NEXT: scvtf s2, x8
+; CHECK-SD-NEXT: fmov x8, d1
+; CHECK-SD-NEXT: scvtf s1, x8
+; CHECK-SD-NEXT: mov v0.s[1], v2.s[0]
+; CHECK-SD-NEXT: mov v0.s[2], v1.s[0]
+; CHECK-SD-NEXT: scvtf s1, x9
+; CHECK-SD-NEXT: mov v0.s[3], v1.s[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: stofp_v4i64_v4f32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: scvtf v0.2d, v0.2d
+; CHECK-GI-NEXT: scvtf v1.2d, v1.2d
+; CHECK-GI-NEXT: fcvtn v0.2s, v0.2d
+; CHECK-GI-NEXT: fcvtn2 v0.4s, v1.2d
+; CHECK-GI-NEXT: ret
entry:
%c = sitofp <4 x i64> %a to <4 x float>
ret <4 x float> %c
}
define <4 x float> @utofp_v4i64_v4f32(<4 x i64> %a) {
-; CHECK-LABEL: utofp_v4i64_v4f32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ucvtf v0.2d, v0.2d
-; CHECK-NEXT: ucvtf v1.2d, v1.2d
-; CHECK-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-NEXT: fcvtn2 v0.4s, v1.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: utofp_v4i64_v4f32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: mov x8, v0.d[1]
+; CHECK-SD-NEXT: fmov x9, d0
+; CHECK-SD-NEXT: ucvtf s0, x9
+; CHECK-SD-NEXT: mov x9, v1.d[1]
+; CHECK-SD-NEXT: ucvtf s2, x8
+; CHECK-SD-NEXT: fmov x8, d1
+; CHECK-SD-NEXT: ucvtf s1, x8
+; CHECK-SD-NEXT: mov v0.s[1], v2.s[0]
+; CHECK-SD-NEXT: mov v0.s[2], v1.s[0]
+; CHECK-SD-NEXT: ucvtf s1, x9
+; CHECK-SD-NEXT: mov v0.s[3], v1.s[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: utofp_v4i64_v4f32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ucvtf v0.2d, v0.2d
+; CHECK-GI-NEXT: ucvtf v1.2d, v1.2d
+; CHECK-GI-NEXT: fcvtn v0.2s, v0.2d
+; CHECK-GI-NEXT: fcvtn2 v0.4s, v1.2d
+; CHECK-GI-NEXT: ret
entry:
%c = uitofp <4 x i64> %a to <4 x float>
ret <4 x float> %c
@@ -4535,14 +4595,29 @@ entry:
define <8 x float> @stofp_v8i64_v8f32(<8 x i64> %a) {
; CHECK-SD-LABEL: stofp_v8i64_v8f32:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: scvtf v0.2d, v0.2d
-; CHECK-SD-NEXT: scvtf v2.2d, v2.2d
-; CHECK-SD-NEXT: scvtf v4.2d, v1.2d
-; CHECK-SD-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-SD-NEXT: fcvtn v1.2s, v2.2d
-; CHECK-SD-NEXT: scvtf v2.2d, v3.2d
-; CHECK-SD-NEXT: fcvtn2 v0.4s, v4.2d
-; CHECK-SD-NEXT: fcvtn2 v1.4s, v2.2d
+; CHECK-SD-NEXT: mov x8, v0.d[1]
+; CHECK-SD-NEXT: mov x9, v2.d[1]
+; CHECK-SD-NEXT: fmov x10, d0
+; CHECK-SD-NEXT: fmov x11, d2
+; CHECK-SD-NEXT: scvtf s0, x10
+; CHECK-SD-NEXT: mov x10, v3.d[1]
+; CHECK-SD-NEXT: scvtf s4, x8
+; CHECK-SD-NEXT: scvtf s5, x9
+; CHECK-SD-NEXT: scvtf s2, x11
+; CHECK-SD-NEXT: fmov x9, d1
+; CHECK-SD-NEXT: fmov x11, d3
+; CHECK-SD-NEXT: mov x8, v1.d[1]
+; CHECK-SD-NEXT: scvtf s1, x9
+; CHECK-SD-NEXT: mov v0.s[1], v4.s[0]
+; CHECK-SD-NEXT: scvtf s3, x11
+; CHECK-SD-NEXT: mov v2.s[1], v5.s[0]
+; CHECK-SD-NEXT: scvtf s4, x8
+; CHECK-SD-NEXT: mov v0.s[2], v1.s[0]
+; CHECK-SD-NEXT: scvtf s1, x10
+; CHECK-SD-NEXT: mov v2.s[2], v3.s[0]
+; CHECK-SD-NEXT: mov v0.s[3], v4.s[0]
+; CHECK-SD-NEXT: mov v2.s[3], v1.s[0]
+; CHECK-SD-NEXT: mov v1.16b, v2.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: stofp_v8i64_v8f32:
@@ -4564,14 +4639,29 @@ entry:
define <8 x float> @utofp_v8i64_v8f32(<8 x i64> %a) {
; CHECK-SD-LABEL: utofp_v8i64_v8f32:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: ucvtf v0.2d, v0.2d
-; CHECK-SD-NEXT: ucvtf v2.2d, v2.2d
-; CHECK-SD-NEXT: ucvtf v4.2d, v1.2d
-; CHECK-SD-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-SD-NEXT: fcvtn v1.2s, v2.2d
-; CHECK-SD-NEXT: ucvtf v2.2d, v3.2d
-; CHECK-SD-NEXT: fcvtn2 v0.4s, v4.2d
-; CHECK-SD-NEXT: fcvtn2 v1.4s, v2.2d
+; CHECK-SD-NEXT: mov x8, v0.d[1]
+; CHECK-SD-NEXT: mov x9, v2.d[1]
+; CHECK-SD-NEXT: fmov x10, d0
+; CHECK-SD-NEXT: fmov x11, d2
+; CHECK-SD-NEXT: ucvtf s0, x10
+; CHECK-SD-NEXT: mov x10, v3.d[1]
+; CHECK-SD-NEXT: ucvtf s4, x8
+; CHECK-SD-NEXT: ucvtf s5, x9
+; CHECK-SD-NEXT: ucvtf s2, x11
+; CHECK-SD-NEXT: fmov x9, d1
+; CHECK-SD-NEXT: fmov x11, d3
+; CHECK-SD-NEXT: mov x8, v1.d[1]
+; CHECK-SD-NEXT: ucvtf s1, x9
+; CHECK-SD-NEXT: mov v0.s[1], v4.s[0]
+; CHECK-SD-NEXT: ucvtf s3, x11
+; CHECK-SD-NEXT: mov v2.s[1], v5.s[0]
+; CHECK-SD-NEXT: ucvtf s4, x8
+; CHECK-SD-NEXT: mov v0.s[2], v1.s[0]
+; CHECK-SD-NEXT: ucvtf s1, x10
+; CHECK-SD-NEXT: mov v2.s[2], v3.s[0]
+; CHECK-SD-NEXT: mov v0.s[3], v4.s[0]
+; CHECK-SD-NEXT: mov v2.s[3], v1.s[0]
+; CHECK-SD-NEXT: mov v1.16b, v2.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: utofp_v8i64_v8f32:
@@ -4591,50 +4681,148 @@ entry:
}
define <16 x float> @stofp_v16i64_v16f32(<16 x i64> %a) {
-; CHECK-LABEL: stofp_v16i64_v16f32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: scvtf v0.2d, v0.2d
-; CHECK-NEXT: scvtf v2.2d, v2.2d
-; CHECK-NEXT: scvtf v4.2d, v4.2d
-; CHECK-NEXT: scvtf v6.2d, v6.2d
-; CHECK-NEXT: scvtf v16.2d, v1.2d
-; CHECK-NEXT: scvtf v17.2d, v3.2d
-; CHECK-NEXT: scvtf v5.2d, v5.2d
-; CHECK-NEXT: scvtf v7.2d, v7.2d
-; CHECK-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-NEXT: fcvtn v1.2s, v2.2d
-; CHECK-NEXT: fcvtn v2.2s, v4.2d
-; CHECK-NEXT: fcvtn v3.2s, v6.2d
-; CHECK-NEXT: fcvtn2 v0.4s, v16.2d
-; CHECK-NEXT: fcvtn2 v1.4s, v17.2d
-; CHECK-NEXT: fcvtn2 v2.4s, v5.2d
-; CHECK-NEXT: fcvtn2 v3.4s, v7.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: stofp_v16i64_v16f32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fmov x13, d2
+; CHECK-SD-NEXT: mov x9, v0.d[1]
+; CHECK-SD-NEXT: mov x10, v2.d[1]
+; CHECK-SD-NEXT: fmov x11, d0
+; CHECK-SD-NEXT: mov x12, v4.d[1]
+; CHECK-SD-NEXT: mov x8, v1.d[1]
+; CHECK-SD-NEXT: scvtf s16, x13
+; CHECK-SD-NEXT: fmov x13, d4
+; CHECK-SD-NEXT: scvtf s0, x11
+; CHECK-SD-NEXT: mov x11, v6.d[1]
+; CHECK-SD-NEXT: scvtf s17, x9
+; CHECK-SD-NEXT: scvtf s18, x10
+; CHECK-SD-NEXT: fmov x9, d1
+; CHECK-SD-NEXT: scvtf s1, x12
+; CHECK-SD-NEXT: fmov x12, d6
+; CHECK-SD-NEXT: scvtf s2, x13
+; CHECK-SD-NEXT: fmov x13, d3
+; CHECK-SD-NEXT: mov x10, v3.d[1]
+; CHECK-SD-NEXT: scvtf s4, x11
+; CHECK-SD-NEXT: mov v0.s[1], v17.s[0]
+; CHECK-SD-NEXT: scvtf s6, x9
+; CHECK-SD-NEXT: scvtf s3, x12
+; CHECK-SD-NEXT: mov v16.s[1], v18.s[0]
+; CHECK-SD-NEXT: mov x9, v5.d[1]
+; CHECK-SD-NEXT: fmov x11, d5
+; CHECK-SD-NEXT: scvtf s5, x13
+; CHECK-SD-NEXT: fmov x13, d7
+; CHECK-SD-NEXT: mov x12, v7.d[1]
+; CHECK-SD-NEXT: mov v2.s[1], v1.s[0]
+; CHECK-SD-NEXT: mov v0.s[2], v6.s[0]
+; CHECK-SD-NEXT: scvtf s6, x10
+; CHECK-SD-NEXT: scvtf s7, x11
+; CHECK-SD-NEXT: scvtf s1, x13
+; CHECK-SD-NEXT: mov v3.s[1], v4.s[0]
+; CHECK-SD-NEXT: mov v16.s[2], v5.s[0]
+; CHECK-SD-NEXT: scvtf s4, x8
+; CHECK-SD-NEXT: scvtf s5, x9
+; CHECK-SD-NEXT: mov v2.s[2], v7.s[0]
+; CHECK-SD-NEXT: mov v3.s[2], v1.s[0]
+; CHECK-SD-NEXT: scvtf s1, x12
+; CHECK-SD-NEXT: mov v16.s[3], v6.s[0]
+; CHECK-SD-NEXT: mov v0.s[3], v4.s[0]
+; CHECK-SD-NEXT: mov v2.s[3], v5.s[0]
+; CHECK-SD-NEXT: mov v3.s[3], v1.s[0]
+; CHECK-SD-NEXT: mov v1.16b, v16.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: stofp_v16i64_v16f32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: scvtf v0.2d, v0.2d
+; CHECK-GI-NEXT: scvtf v2.2d, v2.2d
+; CHECK-GI-NEXT: scvtf v4.2d, v4.2d
+; CHECK-GI-NEXT: scvtf v6.2d, v6.2d
+; CHECK-GI-NEXT: scvtf v16.2d, v1.2d
+; CHECK-GI-NEXT: scvtf v17.2d, v3.2d
+; CHECK-GI-NEXT: scvtf v5.2d, v5.2d
+; CHECK-GI-NEXT: scvtf v7.2d, v7.2d
+; CHECK-GI-NEXT: fcvtn v0.2s, v0.2d
+; CHECK-GI-NEXT: fcvtn v1.2s, v2.2d
+; CHECK-GI-NEXT: fcvtn v2.2s, v4.2d
+; CHECK-GI-NEXT: fcvtn v3.2s, v6.2d
+; CHECK-GI-NEXT: fcvtn2 v0.4s, v16.2d
+; CHECK-GI-NEXT: fcvtn2 v1.4s, v17.2d
+; CHECK-GI-NEXT: fcvtn2 v2.4s, v5.2d
+; CHECK-GI-NEXT: fcvtn2 v3.4s, v7.2d
+; CHECK-GI-NEXT: ret
entry:
%c = sitofp <16 x i64> %a to <16 x float>
ret <16 x float> %c
}
define <16 x float> @utofp_v16i64_v16f32(<16 x i64> %a) {
-; CHECK-LABEL: utofp_v16i64_v16f32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ucvtf v0.2d, v0.2d
-; CHECK-NEXT: ucvtf v2.2d, v2.2d
-; CHECK-NEXT: ucvtf v4.2d, v4.2d
-; CHECK-NEXT: ucvtf v6.2d, v6.2d
-; CHECK-NEXT: ucvtf v16.2d, v1.2d
-; CHECK-NEXT: ucvtf v17.2d, v3.2d
-; CHECK-NEXT: ucvtf v5.2d, v5.2d
-; CHECK-NEXT: ucvtf v7.2d, v7.2d
-; CHECK-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-NEXT: fcvtn v1.2s, v2.2d
-; CHECK-NEXT: fcvtn v2.2s, v4.2d
-; CHECK-NEXT: fcvtn v3.2s, v6.2d
-; CHECK-NEXT: fcvtn2 v0.4s, v16.2d
-; CHECK-NEXT: fcvtn2 v1.4s, v17.2d
-; CHECK-NEXT: fcvtn2 v2.4s, v5.2d
-; CHECK-NEXT: fcvtn2 v3.4s, v7.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: utofp_v16i64_v16f32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fmov x13, d2
+; CHECK-SD-NEXT: mov x9, v0.d[1]
+; CHECK-SD-NEXT: mov x10, v2.d[1]
+; CHECK-SD-NEXT: fmov x11, d0
+; CHECK-SD-NEXT: mov x12, v4.d[1]
+; CHECK-SD-NEXT: mov x8, v1.d[1]
+; CHECK-SD-NEXT: ucvtf s16, x13
+; CHECK-SD-NEXT: fmov x13, d4
+; CHECK-SD-NEXT: ucvtf s0, x11
+; CHECK-SD-NEXT: mov x11, v6.d[1]
+; CHECK-SD-NEXT: ucvtf s17, x9
+; CHECK-SD-NEXT: ucvtf s18, x10
+; CHECK-SD-NEXT: fmov x9, d1
+; CHECK-SD-NEXT: ucvtf s1, x12
+; CHECK-SD-NEXT: fmov x12, d6
+; CHECK-SD-NEXT: ucvtf s2, x13
+; CHECK-SD-NEXT: fmov x13, d3
+; CHECK-SD-NEXT: mov x10, v3.d[1]
+; CHECK-SD-NEXT: ucvtf s4, x11
+; CHECK-SD-NEXT: mov v0.s[1], v17.s[0]
+; CHECK-SD-NEXT: ucvtf s6, x9
+; CHECK-SD-NEXT: ucvtf s3, x12
+; CHECK-SD-NEXT: mov v16.s[1], v18.s[0]
+; CHECK-SD-NEXT: mov x9, v5.d[1]
+; CHECK-SD-NEXT: fmov x11, d5
+; CHECK-SD-NEXT: ucvtf s5, x13
+; CHECK-SD-NEXT: fmov x13, d7
+; CHECK-SD-NEXT: mov x12, v7.d[1]
+; CHECK-SD-NEXT: mov v2.s[1], v1.s[0]
+; CHECK-SD-NEXT: mov v0.s[2], v6.s[0]
+; CHECK-SD-NEXT: ucvtf s6, x10
+; CHECK-SD-NEXT: ucvtf s7, x11
+; CHECK-SD-NEXT: ucvtf s1, x13
+; CHECK-SD-NEXT: mov v3.s[1], v4.s[0]
+; CHECK-SD-NEXT: mov v16.s[2], v5.s[0]
+; CHECK-SD-NEXT: ucvtf s4, x8
+; CHECK-SD-NEXT: ucvtf s5, x9
+; CHECK-SD-NEXT: mov v2.s[2], v7.s[0]
+; CHECK-SD-NEXT: mov v3.s[2], v1.s[0]
+; CHECK-SD-NEXT: ucvtf s1, x12
+; CHECK-SD-NEXT: mov v16.s[3], v6.s[0]
+; CHECK-SD-NEXT: mov v0.s[3], v4.s[0]
+; CHECK-SD-NEXT: mov v2.s[3], v5.s[0]
+; CHECK-SD-NEXT: mov v3.s[3], v1.s[0]
+; CHECK-SD-NEXT: mov v1.16b, v16.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: utofp_v16i64_v16f32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ucvtf v0.2d, v0.2d
+; CHECK-GI-NEXT: ucvtf v2.2d, v2.2d
+; CHECK-GI-NEXT: ucvtf v4.2d, v4.2d
+; CHECK-GI-NEXT: ucvtf v6.2d, v6.2d
+; CHECK-GI-NEXT: ucvtf v16.2d, v1.2d
+; CHECK-GI-NEXT: ucvtf v17.2d, v3.2d
+; CHECK-GI-NEXT: ucvtf v5.2d, v5.2d
+; CHECK-GI-NEXT: ucvtf v7.2d, v7.2d
+; CHECK-GI-NEXT: fcvtn v0.2s, v0.2d
+; CHECK-GI-NEXT: fcvtn v1.2s, v2.2d
+; CHECK-GI-NEXT: fcvtn v2.2s, v4.2d
+; CHECK-GI-NEXT: fcvtn v3.2s, v6.2d
+; CHECK-GI-NEXT: fcvtn2 v0.4s, v16.2d
+; CHECK-GI-NEXT: fcvtn2 v1.4s, v17.2d
+; CHECK-GI-NEXT: fcvtn2 v2.4s, v5.2d
+; CHECK-GI-NEXT: fcvtn2 v3.4s, v7.2d
+; CHECK-GI-NEXT: ret
entry:
%c = uitofp <16 x i64> %a to <16 x float>
ret <16 x float> %c
@@ -4643,42 +4831,99 @@ entry:
define <32 x float> @stofp_v32i64_v32f32(<32 x i64> %a) {
; CHECK-SD-LABEL: stofp_v32i64_v32f32:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: ldp q17, q16, [sp, #64]
-; CHECK-SD-NEXT: scvtf v0.2d, v0.2d
-; CHECK-SD-NEXT: ldp q19, q18, [sp, #32]
-; CHECK-SD-NEXT: scvtf v2.2d, v2.2d
-; CHECK-SD-NEXT: ldp q21, q20, [sp]
-; CHECK-SD-NEXT: scvtf v4.2d, v4.2d
-; CHECK-SD-NEXT: ldp q23, q22, [sp, #96]
-; CHECK-SD-NEXT: scvtf v6.2d, v6.2d
-; CHECK-SD-NEXT: scvtf v19.2d, v19.2d
-; CHECK-SD-NEXT: scvtf v17.2d, v17.2d
-; CHECK-SD-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-SD-NEXT: scvtf v21.2d, v21.2d
-; CHECK-SD-NEXT: scvtf v24.2d, v1.2d
-; CHECK-SD-NEXT: fcvtn v1.2s, v2.2d
-; CHECK-SD-NEXT: scvtf v23.2d, v23.2d
-; CHECK-SD-NEXT: scvtf v25.2d, v3.2d
-; CHECK-SD-NEXT: fcvtn v2.2s, v4.2d
-; CHECK-SD-NEXT: scvtf v26.2d, v5.2d
-; CHECK-SD-NEXT: fcvtn v3.2s, v6.2d
-; CHECK-SD-NEXT: scvtf v27.2d, v7.2d
-; CHECK-SD-NEXT: scvtf v20.2d, v20.2d
-; CHECK-SD-NEXT: fcvtn v5.2s, v19.2d
-; CHECK-SD-NEXT: scvtf v18.2d, v18.2d
-; CHECK-SD-NEXT: fcvtn v4.2s, v21.2d
-; CHECK-SD-NEXT: fcvtn v6.2s, v17.2d
-; CHECK-SD-NEXT: scvtf v16.2d, v16.2d
-; CHECK-SD-NEXT: fcvtn v7.2s, v23.2d
-; CHECK-SD-NEXT: scvtf v17.2d, v22.2d
-; CHECK-SD-NEXT: fcvtn2 v0.4s, v24.2d
-; CHECK-SD-NEXT: fcvtn2 v1.4s, v25.2d
-; CHECK-SD-NEXT: fcvtn2 v2.4s, v26.2d
-; CHECK-SD-NEXT: fcvtn2 v3.4s, v27.2d
-; CHECK-SD-NEXT: fcvtn2 v5.4s, v18.2d
-; CHECK-SD-NEXT: fcvtn2 v4.4s, v20.2d
-; CHECK-SD-NEXT: fcvtn2 v6.4s, v16.2d
-; CHECK-SD-NEXT: fcvtn2 v7.4s, v17.2d
+; CHECK-SD-NEXT: mov x8, v0.d[1]
+; CHECK-SD-NEXT: fmov x10, d0
+; CHECK-SD-NEXT: mov v16.16b, v1.16b
+; CHECK-SD-NEXT: fmov x11, d2
+; CHECK-SD-NEXT: ldp q24, q20, [sp]
+; CHECK-SD-NEXT: mov x9, v2.d[1]
+; CHECK-SD-NEXT: fmov x12, d3
+; CHECK-SD-NEXT: fmov x13, d4
+; CHECK-SD-NEXT: scvtf s0, x10
+; CHECK-SD-NEXT: ldp q21, q18, [sp, #32]
+; CHECK-SD-NEXT: scvtf s2, x8
+; CHECK-SD-NEXT: scvtf s1, x11
+; CHECK-SD-NEXT: mov x10, v4.d[1]
+; CHECK-SD-NEXT: fmov x11, d16
+; CHECK-SD-NEXT: ldp q19, q17, [sp, #96]
+; CHECK-SD-NEXT: scvtf s22, x9
+; CHECK-SD-NEXT: mov x8, v3.d[1]
+; CHECK-SD-NEXT: scvtf s4, x12
+; CHECK-SD-NEXT: mov x12, v24.d[1]
+; CHECK-SD-NEXT: mov x9, v16.d[1]
+; CHECK-SD-NEXT: scvtf s3, x11
+; CHECK-SD-NEXT: ldp q23, q16, [sp, #64]
+; CHECK-SD-NEXT: mov v0.s[1], v2.s[0]
+; CHECK-SD-NEXT: scvtf s25, x10
+; CHECK-SD-NEXT: fmov x10, d6
+; CHECK-SD-NEXT: mov v1.s[1], v22.s[0]
+; CHECK-SD-NEXT: mov x11, v6.d[1]
+; CHECK-SD-NEXT: scvtf s2, x13
+; CHECK-SD-NEXT: mov x13, v21.d[1]
+; CHECK-SD-NEXT: fmov x14, d19
+; CHECK-SD-NEXT: scvtf s22, x9
+; CHECK-SD-NEXT: mov x9, v5.d[1]
+; CHECK-SD-NEXT: fmov x15, d17
+; CHECK-SD-NEXT: mov v0.s[2], v3.s[0]
+; CHECK-SD-NEXT: scvtf s3, x10
+; CHECK-SD-NEXT: fmov x10, d24
+; CHECK-SD-NEXT: mov v1.s[2], v4.s[0]
+; CHECK-SD-NEXT: scvtf s24, x12
+; CHECK-SD-NEXT: scvtf s6, x11
+; CHECK-SD-NEXT: fmov x11, d5
+; CHECK-SD-NEXT: fmov x12, d7
+; CHECK-SD-NEXT: mov v2.s[1], v25.s[0]
+; CHECK-SD-NEXT: scvtf s4, x10
+; CHECK-SD-NEXT: fmov x10, d21
+; CHECK-SD-NEXT: scvtf s21, x8
+; CHECK-SD-NEXT: mov x8, v23.d[1]
+; CHECK-SD-NEXT: scvtf s25, x13
+; CHECK-SD-NEXT: mov x13, v19.d[1]
+; CHECK-SD-NEXT: scvtf s26, x11
+; CHECK-SD-NEXT: mov x11, v20.d[1]
+; CHECK-SD-NEXT: mov v3.s[1], v6.s[0]
+; CHECK-SD-NEXT: scvtf s5, x10
+; CHECK-SD-NEXT: mov x10, v7.d[1]
+; CHECK-SD-NEXT: scvtf s7, x14
+; CHECK-SD-NEXT: mov v4.s[1], v24.s[0]
+; CHECK-SD-NEXT: scvtf s24, x12
+; CHECK-SD-NEXT: fmov x12, d20
+; CHECK-SD-NEXT: scvtf s20, x8
+; CHECK-SD-NEXT: fmov x8, d23
+; CHECK-SD-NEXT: scvtf s19, x13
+; CHECK-SD-NEXT: fmov x13, d18
+; CHECK-SD-NEXT: fmov x14, d16
+; CHECK-SD-NEXT: mov v2.s[2], v26.s[0]
+; CHECK-SD-NEXT: mov v5.s[1], v25.s[0]
+; CHECK-SD-NEXT: scvtf s23, x10
+; CHECK-SD-NEXT: mov v0.s[3], v22.s[0]
+; CHECK-SD-NEXT: scvtf s6, x8
+; CHECK-SD-NEXT: mov x8, v18.d[1]
+; CHECK-SD-NEXT: scvtf s18, x12
+; CHECK-SD-NEXT: mov x12, v16.d[1]
+; CHECK-SD-NEXT: scvtf s16, x13
+; CHECK-SD-NEXT: mov x13, v17.d[1]
+; CHECK-SD-NEXT: scvtf s17, x14
+; CHECK-SD-NEXT: mov v7.s[1], v19.s[0]
+; CHECK-SD-NEXT: scvtf s19, x9
+; CHECK-SD-NEXT: mov v3.s[2], v24.s[0]
+; CHECK-SD-NEXT: scvtf s24, x11
+; CHECK-SD-NEXT: mov v1.s[3], v21.s[0]
+; CHECK-SD-NEXT: mov v6.s[1], v20.s[0]
+; CHECK-SD-NEXT: scvtf s20, x15
+; CHECK-SD-NEXT: mov v4.s[2], v18.s[0]
+; CHECK-SD-NEXT: scvtf s18, x8
+; CHECK-SD-NEXT: mov v5.s[2], v16.s[0]
+; CHECK-SD-NEXT: scvtf s16, x12
+; CHECK-SD-NEXT: mov v2.s[3], v19.s[0]
+; CHECK-SD-NEXT: mov v3.s[3], v23.s[0]
+; CHECK-SD-NEXT: mov v6.s[2], v17.s[0]
+; CHECK-SD-NEXT: mov v7.s[2], v20.s[0]
+; CHECK-SD-NEXT: scvtf s17, x13
+; CHECK-SD-NEXT: mov v4.s[3], v24.s[0]
+; CHECK-SD-NEXT: mov v5.s[3], v18.s[0]
+; CHECK-SD-NEXT: mov v6.s[3], v16.s[0]
+; CHECK-SD-NEXT: mov v7.s[3], v17.s[0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: stofp_v32i64_v32f32:
@@ -4728,42 +4973,99 @@ entry:
define <32 x float> @utofp_v32i64_v32f32(<32 x i64> %a) {
; CHECK-SD-LABEL: utofp_v32i64_v32f32:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: ldp q17, q16, [sp, #64]
-; CHECK-SD-NEXT: ucvtf v0.2d, v0.2d
-; CHECK-SD-NEXT: ldp q19, q18, [sp, #32]
-; CHECK-SD-NEXT: ucvtf v2.2d, v2.2d
-; CHECK-SD-NEXT: ldp q21, q20, [sp]
-; CHECK-SD-NEXT: ucvtf v4.2d, v4.2d
-; CHECK-SD-NEXT: ldp q23, q22, [sp, #96]
-; CHECK-SD-NEXT: ucvtf v6.2d, v6.2d
-; CHECK-SD-NEXT: ucvtf v19.2d, v19.2d
-; CHECK-SD-NEXT: ucvtf v17.2d, v17.2d
-; CHECK-SD-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-SD-NEXT: ucvtf v21.2d, v21.2d
-; CHECK-SD-NEXT: ucvtf v24.2d, v1.2d
-; CHECK-SD-NEXT: fcvtn v1.2s, v2.2d
-; CHECK-SD-NEXT: ucvtf v23.2d, v23.2d
-; CHECK-SD-NEXT: ucvtf v25.2d, v3.2d
-; CHECK-SD-NEXT: fcvtn v2.2s, v4.2d
-; CHECK-SD-NEXT: ucvtf v26.2d, v5.2d
-; CHECK-SD-NEXT: fcvtn v3.2s, v6.2d
-; CHECK-SD-NEXT: ucvtf v27.2d, v7.2d
-; CHECK-SD-NEXT: ucvtf v20.2d, v20.2d
-; CHECK-SD-NEXT: fcvtn v5.2s, v19.2d
-; CHECK-SD-NEXT: ucvtf v18.2d, v18.2d
-; CHECK-SD-NEXT: fcvtn v4.2s, v21.2d
-; CHECK-SD-NEXT: fcvtn v6.2s, v17.2d
-; CHECK-SD-NEXT: ucvtf v16.2d, v16.2d
-; CHECK-SD-NEXT: fcvtn v7.2s, v23.2d
-; CHECK-SD-NEXT: ucvtf v17.2d, v22.2d
-; CHECK-SD-NEXT: fcvtn2 v0.4s, v24.2d
-; CHECK-SD-NEXT: fcvtn2 v1.4s, v25.2d
-; CHECK-SD-NEXT: fcvtn2 v2.4s, v26.2d
-; CHECK-SD-NEXT: fcvtn2 v3.4s, v27.2d
-; CHECK-SD-NEXT: fcvtn2 v5.4s, v18.2d
-; CHECK-SD-NEXT: fcvtn2 v4.4s, v20.2d
-; CHECK-SD-NEXT: fcvtn2 v6.4s, v16.2d
-; CHECK-SD-NEXT: fcvtn2 v7.4s, v17.2d
+; CHECK-SD-NEXT: mov x8, v0.d[1]
+; CHECK-SD-NEXT: fmov x10, d0
+; CHECK-SD-NEXT: mov v16.16b, v1.16b
+; CHECK-SD-NEXT: fmov x11, d2
+; CHECK-SD-NEXT: ldp q24, q20, [sp]
+; CHECK-SD-NEXT: mov x9, v2.d[1]
+; CHECK-SD-NEXT: fmov x12, d3
+; CHECK-SD-NEXT: fmov x13, d4
+; CHECK-SD-NEXT: ucvtf s0, x10
+; CHECK-SD-NEXT: ldp q21, q18, [sp, #32]
+; CHECK-SD-NEXT: ucvtf s2, x8
+; CHECK-SD-NEXT: ucvtf s1, x11
+; CHECK-SD-NEXT: mov x10, v4.d[1]
+; CHECK-SD-NEXT: fmov x11, d16
+; CHECK-SD-NEXT: ldp q19, q17, [sp, #96]
+; CHECK-SD-NEXT: ucvtf s22, x9
+; CHECK-SD-NEXT: mov x8, v3.d[1]
+; CHECK-SD-NEXT: ucvtf s4, x12
+; CHECK-SD-NEXT: mov x12, v24.d[1]
+; CHECK-SD-NEXT: mov x9, v16.d[1]
+; CHECK-SD-NEXT: ucvtf s3, x11
+; CHECK-SD-NEXT: ldp q23, q16, [sp, #64]
+; CHECK-SD-NEXT: mov v0.s[1], v2.s[0]
+; CHECK-SD-NEXT: ucvtf s25, x10
+; CHECK-SD-NEXT: fmov x10, d6
+; CHECK-SD-NEXT: mov v1.s[1], v22.s[0]
+; CHECK-SD-NEXT: mov x11, v6.d[1]
+; CHECK-SD-NEXT: ucvtf s2, x13
+; CHECK-SD-NEXT: mov x13, v21.d[1]
+; CHECK-SD-NEXT: fmov x14, d19
+; CHECK-SD-NEXT: ucvtf s22, x9
+; CHECK-SD-NEXT: mov x9, v5.d[1]
+; CHECK-SD-NEXT: fmov x15, d17
+; CHECK-SD-NEXT: mov v0.s[2], v3.s[0]
+; CHECK-SD-NEXT: ucvtf s3, x10
+; CHECK-SD-NEXT: fmov x10, d24
+; CHECK-SD-NEXT: mov v1.s[2], v4.s[0]
+; CHECK-SD-NEXT: ucvtf s24, x12
+; CHECK-SD-NEXT: ucvtf s6, x11
+; CHECK-SD-NEXT: fmov x11, d5
+; CHECK-SD-NEXT: fmov x12, d7
+; CHECK-SD-NEXT: mov v2.s[1], v25.s[0]
+; CHECK-SD-NEXT: ucvtf s4, x10
+; CHECK-SD-NEXT: fmov x10, d21
+; CHECK-SD-NEXT: ucvtf s21, x8
+; CHECK-SD-NEXT: mov x8, v23.d[1]
+; CHECK-SD-NEXT: ucvtf s25, x13
+; CHECK-SD-NEXT: mov x13, v19.d[1]
+; CHECK-SD-NEXT: ucvtf s26, x11
+; CHECK-SD-NEXT: mov x11, v20.d[1]
+; CHECK-SD-NEXT: mov v3.s[1], v6.s[0]
+; CHECK-SD-NEXT: ucvtf s5, x10
+; CHECK-SD-NEXT: mov x10, v7.d[1]
+; CHECK-SD-NEXT: ucvtf s7, x14
+; CHECK-SD-NEXT: mov v4.s[1], v24.s[0]
+; CHECK-SD-NEXT: ucvtf s24, x12
+; CHECK-SD-NEXT: fmov x12, d20
+; CHECK-SD-NEXT: ucvtf s20, x8
+; CHECK-SD-NEXT: fmov x8, d23
+; CHECK-SD-NEXT: ucvtf s19, x13
+; CHECK-SD-NEXT: fmov x13, d18
+; CHECK-SD-NEXT: fmov x14, d16
+; CHECK-SD-NEXT: mov v2.s[2], v26.s[0]
+; CHECK-SD-NEXT: mov v5.s[1], v25.s[0]
+; CHECK-SD-NEXT: ucvtf s23, x10
+; CHECK-SD-NEXT: mov v0.s[3], v22.s[0]
+; CHECK-SD-NEXT: ucvtf s6, x8
+; CHECK-SD-NEXT: mov x8, v18.d[1]
+; CHECK-SD-NEXT: ucvtf s18, x12
+; CHECK-SD-NEXT: mov x12, v16.d[1]
+; CHECK-SD-NEXT: ucvtf s16, x13
+; CHECK-SD-NEXT: mov x13, v17.d[1]
+; CHECK-SD-NEXT: ucvtf s17, x14
+; CHECK-SD-NEXT: mov v7.s[1], v19.s[0]
+; CHECK-SD-NEXT: ucvtf s19, x9
+; CHECK-SD-NEXT: mov v3.s[2], v24.s[0]
+; CHECK-SD-NEXT: ucvtf s24, x11
+; CHECK-SD-NEXT: mov v1.s[3], v21.s[0]
+; CHECK-SD-NEXT: mov v6.s[1], v20.s[0]
+; CHECK-SD-NEXT: ucvtf s20, x15
+; CHECK-SD-NEXT: mov v4.s[2], v18.s[0]
+; CHECK-SD-NEXT: ucvtf s18, x8
+; CHECK-SD-NEXT: mov v5.s[2], v16.s[0]
+; CHECK-SD-NEXT: ucvtf s16, x12
+; CHECK-SD-NEXT: mov v2.s[3], v19.s[0]
+; CHECK-SD-NEXT: mov v3.s[3], v23.s[0]
+; CHECK-SD-NEXT: mov v6.s[2], v17.s[0]
+; CHECK-SD-NEXT: mov v7.s[2], v20.s[0]
+; CHECK-SD-NEXT: ucvtf s17, x13
+; CHECK-SD-NEXT: mov v4.s[3], v24.s[0]
+; CHECK-SD-NEXT: mov v5.s[3], v18.s[0]
+; CHECK-SD-NEXT: mov v6.s[3], v16.s[0]
+; CHECK-SD-NEXT: mov v7.s[3], v17.s[0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: utofp_v32i64_v32f32:
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll
index 573fe3d8b8a77..1d9e01f4ecfdf 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll
@@ -722,8 +722,11 @@ define <1 x float> @ucvtf_v1i64_v1f32(<1 x i64> %op1) vscale_range(2,0) #0 {
; CHECK-LABEL: ucvtf_v1i64_v1f32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: ucvtf v0.2d, v0.2d
-; CHECK-NEXT: fcvtn v0.2s, v0.2d
+; CHECK-NEXT: fmov x8, d0
+; CHECK-NEXT: movi d1, #0000000000000000
+; CHECK-NEXT: ucvtf s0, x8
+; CHECK-NEXT: mov v1.s[0], v0.s[0]
+; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
%res = uitofp <1 x i64> %op1 to <1 x float>
ret <1 x float> %res
@@ -733,8 +736,12 @@ define <1 x float> @ucvtf_v1i64_v1f32(<1 x i64> %op1) vscale_range(2,0) #0 {
define <2 x float> @ucvtf_v2i64_v2f32(<2 x i64> %op1) vscale_range(2,0) #0 {
; CHECK-LABEL: ucvtf_v2i64_v2f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ucvtf v0.2d, v0.2d
-; CHECK-NEXT: fcvtn v0.2s, v0.2d
+; CHECK-NEXT: mov x8, v0.d[1]
+; CHECK-NEXT: fmov x9, d0
+; CHECK-NEXT: ucvtf s0, x9
+; CHECK-NEXT: ucvtf s1, x8
+; CHECK-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%res = uitofp <2 x i64> %op1 to <2 x float>
ret <2 x float> %res
@@ -1646,8 +1653,11 @@ define <1 x float> @scvtf_v1i64_v1f32(<1 x i64> %op1) vscale_range(2,0) #0 {
; CHECK-LABEL: scvtf_v1i64_v1f32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: scvtf v0.2d, v0.2d
-; CHECK-NEXT: fcvtn v0.2s, v0.2d
+; CHECK-NEXT: fmov x8, d0
+; CHECK-NEXT: movi d1, #0000000000000000
+; CHECK-NEXT: scvtf s0, x8
+; CHECK-NEXT: mov v1.s[0], v0.s[0]
+; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
%res = sitofp <1 x i64> %op1 to <1 x float>
ret <1 x float> %res
@@ -1657,8 +1667,12 @@ define <1 x float> @scvtf_v1i64_v1f32(<1 x i64> %op1) vscale_range(2,0) #0 {
define <2 x float> @scvtf_v2i64_v2f32(<2 x i64> %op1) vscale_range(2,0) #0 {
; CHECK-LABEL: scvtf_v2i64_v2f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: scvtf v0.2d, v0.2d
-; CHECK-NEXT: fcvtn v0.2s, v0.2d
+; CHECK-NEXT: mov x8, v0.d[1]
+; CHECK-NEXT: fmov x9, d0
+; CHECK-NEXT: scvtf s0, x9
+; CHECK-NEXT: scvtf s1, x8
+; CHECK-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%res = sitofp <2 x i64> %op1 to <2 x float>
ret <2 x float> %res
diff --git a/llvm/test/CodeGen/AArch64/vector-fcvt.ll b/llvm/test/CodeGen/AArch64/vector-fcvt.ll
index 8f38bdbedc629..a6b43d514594e 100644
--- a/llvm/test/CodeGen/AArch64/vector-fcvt.ll
+++ b/llvm/test/CodeGen/AArch64/vector-fcvt.ll
@@ -87,14 +87,29 @@ define <8 x float> @sitofp_i32_float(<8 x i32> %a) {
define <8 x float> @sitofp_i64_float(<8 x i64> %a) {
; CHECK-LABEL: sitofp_i64_float:
; CHECK: // %bb.0:
-; CHECK-NEXT: scvtf v0.2d, v0.2d
-; CHECK-NEXT: scvtf v2.2d, v2.2d
-; CHECK-NEXT: scvtf v4.2d, v1.2d
-; CHECK-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-NEXT: fcvtn v1.2s, v2.2d
-; CHECK-NEXT: scvtf v2.2d, v3.2d
-; CHECK-NEXT: fcvtn2 v0.4s, v4.2d
-; CHECK-NEXT: fcvtn2 v1.4s, v2.2d
+; CHECK-NEXT: mov x8, v0.d[1]
+; CHECK-NEXT: mov x9, v2.d[1]
+; CHECK-NEXT: fmov x10, d0
+; CHECK-NEXT: fmov x11, d2
+; CHECK-NEXT: scvtf s0, x10
+; CHECK-NEXT: mov x10, v3.d[1]
+; CHECK-NEXT: scvtf s4, x8
+; CHECK-NEXT: scvtf s5, x9
+; CHECK-NEXT: scvtf s2, x11
+; CHECK-NEXT: fmov x9, d1
+; CHECK-NEXT: fmov x11, d3
+; CHECK-NEXT: mov x8, v1.d[1]
+; CHECK-NEXT: scvtf s1, x9
+; CHECK-NEXT: mov v0.s[1], v4.s[0]
+; CHECK-NEXT: scvtf s3, x11
+; CHECK-NEXT: mov v2.s[1], v5.s[0]
+; CHECK-NEXT: scvtf s4, x8
+; CHECK-NEXT: mov v0.s[2], v1.s[0]
+; CHECK-NEXT: scvtf s1, x10
+; CHECK-NEXT: mov v2.s[2], v3.s[0]
+; CHECK-NEXT: mov v0.s[3], v4.s[0]
+; CHECK-NEXT: mov v2.s[3], v1.s[0]
+; CHECK-NEXT: mov v1.16b, v2.16b
; CHECK-NEXT: ret
%1 = sitofp <8 x i64> %a to <8 x float>
ret <8 x float> %1
@@ -177,14 +192,29 @@ define <8 x float> @uitofp_i32_float(<8 x i32> %a) {
define <8 x float> @uitofp_i64_float(<8 x i64> %a) {
; CHECK-LABEL: uitofp_i64_float:
; CHECK: // %bb.0:
-; CHECK-NEXT: ucvtf v0.2d, v0.2d
-; CHECK-NEXT: ucvtf v2.2d, v2.2d
-; CHECK-NEXT: ucvtf v4.2d, v1.2d
-; CHECK-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-NEXT: fcvtn v1.2s, v2.2d
-; CHECK-NEXT: ucvtf v2.2d, v3.2d
-; CHECK-NEXT: fcvtn2 v0.4s, v4.2d
-; CHECK-NEXT: fcvtn2 v1.4s, v2.2d
+; CHECK-NEXT: mov x8, v0.d[1]
+; CHECK-NEXT: mov x9, v2.d[1]
+; CHECK-NEXT: fmov x10, d0
+; CHECK-NEXT: fmov x11, d2
+; CHECK-NEXT: ucvtf s0, x10
+; CHECK-NEXT: mov x10, v3.d[1]
+; CHECK-NEXT: ucvtf s4, x8
+; CHECK-NEXT: ucvtf s5, x9
+; CHECK-NEXT: ucvtf s2, x11
+; CHECK-NEXT: fmov x9, d1
+; CHECK-NEXT: fmov x11, d3
+; CHECK-NEXT: mov x8, v1.d[1]
+; CHECK-NEXT: ucvtf s1, x9
+; CHECK-NEXT: mov v0.s[1], v4.s[0]
+; CHECK-NEXT: ucvtf s3, x11
+; CHECK-NEXT: mov v2.s[1], v5.s[0]
+; CHECK-NEXT: ucvtf s4, x8
+; CHECK-NEXT: mov v0.s[2], v1.s[0]
+; CHECK-NEXT: ucvtf s1, x10
+; CHECK-NEXT: mov v2.s[2], v3.s[0]
+; CHECK-NEXT: mov v0.s[3], v4.s[0]
+; CHECK-NEXT: mov v2.s[3], v1.s[0]
+; CHECK-NEXT: mov v1.16b, v2.16b
; CHECK-NEXT: ret
%1 = uitofp <8 x i64> %a to <8 x float>
ret <8 x float> %1
More information about the llvm-commits
mailing list