[llvm] [AArch64] Extend v2i64 fptosi.sat to v2f64 (PR #91714)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Mon May 13 00:33:00 PDT 2024
https://github.com/davemgreen updated https://github.com/llvm/llvm-project/pull/91714
>From e75687aa56725c092765a5960e588a4089298ecf Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Mon, 13 May 2024 08:32:44 +0100
Subject: [PATCH] [AArch64] Extend v2i64 fptosi.sat to v2f64
This helps it produce a single instruction for the saturate, as opposed to
having to scalarize.
---
.../Target/AArch64/AArch64ISelLowering.cpp | 9 +
llvm/test/CodeGen/AArch64/fcvt_combine.ll | 7 +-
llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll | 16 +-
.../test/CodeGen/AArch64/fptosi-sat-vector.ll | 23 +-
.../test/CodeGen/AArch64/fptoui-sat-vector.ll | 23 +-
.../AArch64/sve-fixed-vector-llrint.ll | 7 +-
.../CodeGen/AArch64/sve-fixed-vector-lrint.ll | 7 +-
llvm/test/CodeGen/AArch64/vector-llrint.ll | 283 ++++++------------
llvm/test/CodeGen/AArch64/vector-lrint.ll | 283 ++++++------------
9 files changed, 221 insertions(+), 437 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index a6c020c6b823d..1e0071fffe666 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4286,6 +4286,15 @@ AArch64TargetLowering::LowerVectorFP_TO_INT_SAT(SDValue Op,
return SDValue();
SDLoc DL(Op);
+ // Expand to f64 if we are saturating to i64, to help produce keep the lanes
+ // the same width and produce a fcvtzu.
+ if (SatWidth == 64 && SrcElementWidth < 64) {
+ MVT F64VT = MVT::getVectorVT(MVT::f64, SrcVT.getVectorNumElements());
+ SrcVal = DAG.getNode(ISD::FP_EXTEND, DL, F64VT, SrcVal);
+ SrcVT = F64VT;
+ SrcElementVT = MVT::f64;
+ SrcElementWidth = 64;
+ }
// Cases that we can emit directly.
if (SrcElementWidth == DstElementWidth && SrcElementWidth == SatWidth)
return DAG.getNode(Op.getOpcode(), DL, DstVT, SrcVal,
diff --git a/llvm/test/CodeGen/AArch64/fcvt_combine.ll b/llvm/test/CodeGen/AArch64/fcvt_combine.ll
index 29170aab96566..62669a6d99eae 100644
--- a/llvm/test/CodeGen/AArch64/fcvt_combine.ll
+++ b/llvm/test/CodeGen/AArch64/fcvt_combine.ll
@@ -345,11 +345,8 @@ define <2 x i64> @test6_sat(<2 x float> %f) {
; CHECK: // %bb.0:
; CHECK-NEXT: fmov v1.2s, #16.00000000
; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
-; CHECK-NEXT: mov s1, v0.s[1]
-; CHECK-NEXT: fcvtzs x8, s0
-; CHECK-NEXT: fcvtzs x9, s1
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: mov v0.d[1], x9
+; CHECK-NEXT: fcvtl v0.2d, v0.2s
+; CHECK-NEXT: fcvtzs v0.2d, v0.2d
; CHECK-NEXT: ret
%mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
%vcvt.i = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float> %mul.i)
diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
index 2ea581359af6f..4e8bfcd9d7516 100644
--- a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
@@ -436,12 +436,8 @@ entry:
define <2 x i64> @stest_f32i64(<2 x float> %x) {
; CHECK-LABEL: stest_f32i64:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov s1, v0.s[1]
-; CHECK-NEXT: fcvtzs x8, s0
-; CHECK-NEXT: fcvtzs x9, s1
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: mov v0.d[1], x9
+; CHECK-NEXT: fcvtl v0.2d, v0.2s
+; CHECK-NEXT: fcvtzs v0.2d, v0.2d
; CHECK-NEXT: ret
entry:
%conv = fptosi <2 x float> %x to <2 x i128>
@@ -1056,12 +1052,8 @@ entry:
define <2 x i64> @stest_f32i64_mm(<2 x float> %x) {
; CHECK-LABEL: stest_f32i64_mm:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov s1, v0.s[1]
-; CHECK-NEXT: fcvtzs x8, s0
-; CHECK-NEXT: fcvtzs x9, s1
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: mov v0.d[1], x9
+; CHECK-NEXT: fcvtl v0.2d, v0.2s
+; CHECK-NEXT: fcvtzs v0.2d, v0.2d
; CHECK-NEXT: ret
entry:
%conv = fptosi <2 x float> %x to <2 x i128>
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
index c45885a38f159..d620a8851ee44 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
@@ -793,12 +793,8 @@ define <2 x i50> @test_signed_v2f32_v2i50(<2 x float> %f) {
define <2 x i64> @test_signed_v2f32_v2i64(<2 x float> %f) {
; CHECK-LABEL: test_signed_v2f32_v2i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov s1, v0.s[1]
-; CHECK-NEXT: fcvtzs x8, s0
-; CHECK-NEXT: fcvtzs x9, s1
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: mov v0.d[1], x9
+; CHECK-NEXT: fcvtl v0.2d, v0.2s
+; CHECK-NEXT: fcvtzs v0.2d, v0.2d
; CHECK-NEXT: ret
%x = call <2 x i64> @llvm.fptosi.sat.v2f32.v2i64(<2 x float> %f)
ret <2 x i64> %x
@@ -1060,17 +1056,10 @@ define <4 x i50> @test_signed_v4f32_v4i50(<4 x float> %f) {
define <4 x i64> @test_signed_v4f32_v4i64(<4 x float> %f) {
; CHECK-LABEL: test_signed_v4f32_v4i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: mov s3, v0.s[1]
-; CHECK-NEXT: fcvtzs x9, s0
-; CHECK-NEXT: mov s2, v1.s[1]
-; CHECK-NEXT: fcvtzs x8, s1
-; CHECK-NEXT: fcvtzs x11, s3
-; CHECK-NEXT: fmov d0, x9
-; CHECK-NEXT: fcvtzs x10, s2
-; CHECK-NEXT: fmov d1, x8
-; CHECK-NEXT: mov v0.d[1], x11
-; CHECK-NEXT: mov v1.d[1], x10
+; CHECK-NEXT: fcvtl2 v1.2d, v0.4s
+; CHECK-NEXT: fcvtl v0.2d, v0.2s
+; CHECK-NEXT: fcvtzs v1.2d, v1.2d
+; CHECK-NEXT: fcvtzs v0.2d, v0.2d
; CHECK-NEXT: ret
%x = call <4 x i64> @llvm.fptosi.sat.v4f32.v4i64(<4 x float> %f)
ret <4 x i64> %x
diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
index c94db3484994c..16e04070b6543 100644
--- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
@@ -707,12 +707,8 @@ define <2 x i50> @test_unsigned_v2f32_v2i50(<2 x float> %f) {
define <2 x i64> @test_unsigned_v2f32_v2i64(<2 x float> %f) {
; CHECK-LABEL: test_unsigned_v2f32_v2i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov s1, v0.s[1]
-; CHECK-NEXT: fcvtzu x8, s0
-; CHECK-NEXT: fcvtzu x9, s1
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: mov v0.d[1], x9
+; CHECK-NEXT: fcvtl v0.2d, v0.2s
+; CHECK-NEXT: fcvtzu v0.2d, v0.2d
; CHECK-NEXT: ret
%x = call <2 x i64> @llvm.fptoui.sat.v2f32.v2i64(<2 x float> %f)
ret <2 x i64> %x
@@ -927,17 +923,10 @@ define <4 x i50> @test_unsigned_v4f32_v4i50(<4 x float> %f) {
define <4 x i64> @test_unsigned_v4f32_v4i64(<4 x float> %f) {
; CHECK-LABEL: test_unsigned_v4f32_v4i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: mov s3, v0.s[1]
-; CHECK-NEXT: fcvtzu x9, s0
-; CHECK-NEXT: mov s2, v1.s[1]
-; CHECK-NEXT: fcvtzu x8, s1
-; CHECK-NEXT: fcvtzu x11, s3
-; CHECK-NEXT: fmov d0, x9
-; CHECK-NEXT: fcvtzu x10, s2
-; CHECK-NEXT: fmov d1, x8
-; CHECK-NEXT: mov v0.d[1], x11
-; CHECK-NEXT: mov v1.d[1], x10
+; CHECK-NEXT: fcvtl2 v1.2d, v0.4s
+; CHECK-NEXT: fcvtl v0.2d, v0.2s
+; CHECK-NEXT: fcvtzu v1.2d, v1.2d
+; CHECK-NEXT: fcvtzu v0.2d, v0.2d
; CHECK-NEXT: ret
%x = call <4 x i64> @llvm.fptoui.sat.v4f32.v4i64(<4 x float> %f)
ret <4 x i64> %x
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll b/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll
index 9137eae269d91..c77861509e4a1 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll
@@ -295,11 +295,8 @@ define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) {
; CHECK-LABEL: llrint_v2i64_v2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: frintx v0.2s, v0.2s
-; CHECK-NEXT: mov s1, v0.s[1]
-; CHECK-NEXT: fcvtzs x8, s0
-; CHECK-NEXT: fcvtzs x9, s1
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: mov v0.d[1], x9
+; CHECK-NEXT: fcvtl v0.2d, v0.2s
+; CHECK-NEXT: fcvtzs v0.2d, v0.2d
; CHECK-NEXT: ret
%a = call <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float> %x)
ret <2 x i64> %a
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll b/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll
index 9bdbe9b8ac62d..6a97e7ad64bf3 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll
@@ -534,11 +534,8 @@ define <2 x iXLen> @lrint_v2f32(<2 x float> %x) {
; CHECK-i64-LABEL: lrint_v2f32:
; CHECK-i64: // %bb.0:
; CHECK-i64-NEXT: frintx v0.2s, v0.2s
-; CHECK-i64-NEXT: mov s1, v0.s[1]
-; CHECK-i64-NEXT: fcvtzs x8, s0
-; CHECK-i64-NEXT: fcvtzs x9, s1
-; CHECK-i64-NEXT: fmov d0, x8
-; CHECK-i64-NEXT: mov v0.d[1], x9
+; CHECK-i64-NEXT: fcvtl v0.2d, v0.2s
+; CHECK-i64-NEXT: fcvtzs v0.2d, v0.2d
; CHECK-i64-NEXT: ret
%a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float> %x)
ret <2 x iXLen> %a
diff --git a/llvm/test/CodeGen/AArch64/vector-llrint.ll b/llvm/test/CodeGen/AArch64/vector-llrint.ll
index b7e743b5085f2..5503de2b4c5db 100644
--- a/llvm/test/CodeGen/AArch64/vector-llrint.ll
+++ b/llvm/test/CodeGen/AArch64/vector-llrint.ll
@@ -387,11 +387,8 @@ define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) {
; CHECK-LABEL: llrint_v2i64_v2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: frintx v0.2s, v0.2s
-; CHECK-NEXT: mov s1, v0.s[1]
-; CHECK-NEXT: fcvtzs x8, s0
-; CHECK-NEXT: fcvtzs x9, s1
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: mov v0.d[1], x9
+; CHECK-NEXT: fcvtl v0.2d, v0.2s
+; CHECK-NEXT: fcvtzs v0.2d, v0.2d
; CHECK-NEXT: ret
%a = call <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float> %x)
ret <2 x i64> %a
@@ -404,16 +401,10 @@ define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) {
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: frintx v0.2s, v0.2s
; CHECK-NEXT: frintx v1.2s, v1.2s
-; CHECK-NEXT: mov s2, v0.s[1]
-; CHECK-NEXT: fcvtzs x8, s0
-; CHECK-NEXT: mov s3, v1.s[1]
-; CHECK-NEXT: fcvtzs x9, s1
-; CHECK-NEXT: fcvtzs x10, s2
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: fcvtzs x11, s3
-; CHECK-NEXT: fmov d1, x9
-; CHECK-NEXT: mov v0.d[1], x10
-; CHECK-NEXT: mov v1.d[1], x11
+; CHECK-NEXT: fcvtl v0.2d, v0.2s
+; CHECK-NEXT: fcvtl v1.2d, v1.2s
+; CHECK-NEXT: fcvtzs v0.2d, v0.2d
+; CHECK-NEXT: fcvtzs v1.2d, v1.2d
; CHECK-NEXT: ret
%a = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> %x)
ret <4 x i64> %a
@@ -429,26 +420,14 @@ define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) {
; CHECK-NEXT: frintx v1.2s, v1.2s
; CHECK-NEXT: frintx v2.2s, v2.2s
; CHECK-NEXT: frintx v3.2s, v3.2s
-; CHECK-NEXT: mov s4, v0.s[1]
-; CHECK-NEXT: mov s5, v1.s[1]
-; CHECK-NEXT: fcvtzs x8, s0
-; CHECK-NEXT: fcvtzs x10, s1
-; CHECK-NEXT: mov s6, v2.s[1]
-; CHECK-NEXT: mov s7, v3.s[1]
-; CHECK-NEXT: fcvtzs x11, s2
-; CHECK-NEXT: fcvtzs x12, s3
-; CHECK-NEXT: fcvtzs x9, s4
-; CHECK-NEXT: fcvtzs x13, s5
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: fmov d2, x10
-; CHECK-NEXT: fcvtzs x14, s6
-; CHECK-NEXT: fcvtzs x15, s7
-; CHECK-NEXT: fmov d1, x11
-; CHECK-NEXT: fmov d3, x12
-; CHECK-NEXT: mov v0.d[1], x9
-; CHECK-NEXT: mov v2.d[1], x13
-; CHECK-NEXT: mov v1.d[1], x14
-; CHECK-NEXT: mov v3.d[1], x15
+; CHECK-NEXT: fcvtl v0.2d, v0.2s
+; CHECK-NEXT: fcvtl v1.2d, v1.2s
+; CHECK-NEXT: fcvtl v4.2d, v2.2s
+; CHECK-NEXT: fcvtl v3.2d, v3.2s
+; CHECK-NEXT: fcvtzs v0.2d, v0.2d
+; CHECK-NEXT: fcvtzs v2.2d, v1.2d
+; CHECK-NEXT: fcvtzs v1.2d, v4.2d
+; CHECK-NEXT: fcvtzs v3.2d, v3.2d
; CHECK-NEXT: ret
%a = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> %x)
ret <8 x i64> %a
@@ -458,58 +437,34 @@ declare <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float>)
define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) {
; CHECK-LABEL: llrint_v16i64_v16f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: frintx v4.2s, v0.2s
-; CHECK-NEXT: frintx v5.2s, v1.2s
-; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: ext v1.16b, v1.16b, v1.16b, #8
+; CHECK-NEXT: ext v4.16b, v1.16b, v1.16b, #8
+; CHECK-NEXT: ext v5.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: ext v6.16b, v2.16b, v2.16b, #8
; CHECK-NEXT: ext v7.16b, v3.16b, v3.16b, #8
-; CHECK-NEXT: frintx v2.2s, v2.2s
-; CHECK-NEXT: frintx v3.2s, v3.2s
-; CHECK-NEXT: mov s16, v4.s[1]
-; CHECK-NEXT: mov s17, v5.s[1]
-; CHECK-NEXT: fcvtzs x8, s4
; CHECK-NEXT: frintx v0.2s, v0.2s
; CHECK-NEXT: frintx v1.2s, v1.2s
-; CHECK-NEXT: fcvtzs x9, s5
-; CHECK-NEXT: frintx v4.2s, v6.2s
-; CHECK-NEXT: frintx v5.2s, v7.2s
-; CHECK-NEXT: fcvtzs x10, s2
-; CHECK-NEXT: mov s6, v2.s[1]
-; CHECK-NEXT: fcvtzs x13, s3
-; CHECK-NEXT: mov s3, v3.s[1]
-; CHECK-NEXT: fcvtzs x11, s16
-; CHECK-NEXT: fcvtzs x12, s17
-; CHECK-NEXT: mov s7, v0.s[1]
-; CHECK-NEXT: mov s16, v1.s[1]
-; CHECK-NEXT: fcvtzs x15, s1
-; CHECK-NEXT: mov s1, v4.s[1]
-; CHECK-NEXT: mov s17, v5.s[1]
-; CHECK-NEXT: fcvtzs x14, s0
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: fcvtzs x8, s4
-; CHECK-NEXT: fmov d4, x10
-; CHECK-NEXT: fcvtzs x10, s5
-; CHECK-NEXT: fmov d2, x9
-; CHECK-NEXT: fcvtzs x9, s6
-; CHECK-NEXT: fmov d6, x13
-; CHECK-NEXT: fcvtzs x13, s7
-; CHECK-NEXT: fcvtzs x16, s16
-; CHECK-NEXT: fcvtzs x17, s3
-; CHECK-NEXT: fcvtzs x18, s1
-; CHECK-NEXT: fcvtzs x0, s17
-; CHECK-NEXT: fmov d1, x14
-; CHECK-NEXT: fmov d3, x15
-; CHECK-NEXT: fmov d5, x8
-; CHECK-NEXT: fmov d7, x10
-; CHECK-NEXT: mov v0.d[1], x11
-; CHECK-NEXT: mov v2.d[1], x12
-; CHECK-NEXT: mov v4.d[1], x9
-; CHECK-NEXT: mov v1.d[1], x13
-; CHECK-NEXT: mov v3.d[1], x16
-; CHECK-NEXT: mov v6.d[1], x17
-; CHECK-NEXT: mov v5.d[1], x18
-; CHECK-NEXT: mov v7.d[1], x0
+; CHECK-NEXT: frintx v2.2s, v2.2s
+; CHECK-NEXT: frintx v3.2s, v3.2s
+; CHECK-NEXT: frintx v5.2s, v5.2s
+; CHECK-NEXT: frintx v4.2s, v4.2s
+; CHECK-NEXT: frintx v6.2s, v6.2s
+; CHECK-NEXT: frintx v7.2s, v7.2s
+; CHECK-NEXT: fcvtl v0.2d, v0.2s
+; CHECK-NEXT: fcvtl v1.2d, v1.2s
+; CHECK-NEXT: fcvtl v16.2d, v2.2s
+; CHECK-NEXT: fcvtl v18.2d, v3.2s
+; CHECK-NEXT: fcvtl v5.2d, v5.2s
+; CHECK-NEXT: fcvtl v17.2d, v4.2s
+; CHECK-NEXT: fcvtl v19.2d, v6.2s
+; CHECK-NEXT: fcvtl v7.2d, v7.2s
+; CHECK-NEXT: fcvtzs v0.2d, v0.2d
+; CHECK-NEXT: fcvtzs v2.2d, v1.2d
+; CHECK-NEXT: fcvtzs v4.2d, v16.2d
+; CHECK-NEXT: fcvtzs v6.2d, v18.2d
+; CHECK-NEXT: fcvtzs v1.2d, v5.2d
+; CHECK-NEXT: fcvtzs v3.2d, v17.2d
+; CHECK-NEXT: fcvtzs v5.2d, v19.2d
+; CHECK-NEXT: fcvtzs v7.2d, v7.2d
; CHECK-NEXT: ret
%a = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> %x)
ret <16 x i64> %a
@@ -519,118 +474,70 @@ declare <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float>)
define <32 x i64> @llrint_v32i64_v32f32(<32 x float> %x) {
; CHECK-LABEL: llrint_v32i64_v32f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ext v17.16b, v3.16b, v3.16b, #8
-; CHECK-NEXT: ext v18.16b, v4.16b, v4.16b, #8
-; CHECK-NEXT: ext v19.16b, v5.16b, v5.16b, #8
-; CHECK-NEXT: ext v21.16b, v7.16b, v7.16b, #8
-; CHECK-NEXT: ext v16.16b, v2.16b, v2.16b, #8
-; CHECK-NEXT: ext v20.16b, v6.16b, v6.16b, #8
+; CHECK-NEXT: ext v16.16b, v7.16b, v7.16b, #8
+; CHECK-NEXT: ext v17.16b, v6.16b, v6.16b, #8
; CHECK-NEXT: frintx v7.2s, v7.2s
-; CHECK-NEXT: frintx v24.2s, v6.2s
-; CHECK-NEXT: frintx v23.2s, v5.2s
+; CHECK-NEXT: frintx v6.2s, v6.2s
+; CHECK-NEXT: ext v18.16b, v5.16b, v5.16b, #8
+; CHECK-NEXT: ext v21.16b, v4.16b, v4.16b, #8
+; CHECK-NEXT: ext v22.16b, v2.16b, v2.16b, #8
+; CHECK-NEXT: frintx v5.2s, v5.2s
+; CHECK-NEXT: ext v23.16b, v3.16b, v3.16b, #8
; CHECK-NEXT: frintx v4.2s, v4.2s
-; CHECK-NEXT: frintx v3.2s, v3.2s
+; CHECK-NEXT: ext v19.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: ext v20.16b, v1.16b, v1.16b, #8
+; CHECK-NEXT: frintx v16.2s, v16.2s
; CHECK-NEXT: frintx v17.2s, v17.2s
+; CHECK-NEXT: fcvtl v7.2d, v7.2s
+; CHECK-NEXT: fcvtl v6.2d, v6.2s
; CHECK-NEXT: frintx v18.2s, v18.2s
-; CHECK-NEXT: frintx v22.2s, v19.2s
; CHECK-NEXT: frintx v21.2s, v21.2s
-; CHECK-NEXT: frintx v16.2s, v16.2s
-; CHECK-NEXT: frintx v20.2s, v20.2s
-; CHECK-NEXT: mov s25, v7.s[1]
-; CHECK-NEXT: fcvtzs x15, s7
-; CHECK-NEXT: frintx v19.2s, v1.2s
-; CHECK-NEXT: fcvtzs x16, s24
-; CHECK-NEXT: ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-NEXT: fcvtzs x10, s17
-; CHECK-NEXT: fcvtzs x11, s18
-; CHECK-NEXT: mov s26, v22.s[1]
-; CHECK-NEXT: fcvtzs x12, s22
-; CHECK-NEXT: mov s22, v21.s[1]
-; CHECK-NEXT: fcvtzs x14, s21
-; CHECK-NEXT: mov s21, v24.s[1]
-; CHECK-NEXT: fcvtzs x9, s16
-; CHECK-NEXT: fcvtzs x13, s20
-; CHECK-NEXT: mov s20, v20.s[1]
-; CHECK-NEXT: fmov d24, x15
-; CHECK-NEXT: mov s18, v18.s[1]
-; CHECK-NEXT: fmov d6, x10
-; CHECK-NEXT: fmov d7, x11
-; CHECK-NEXT: fcvtzs x10, s25
-; CHECK-NEXT: fcvtzs x11, s22
-; CHECK-NEXT: fmov d25, x12
-; CHECK-NEXT: frintx v22.2s, v2.2s
-; CHECK-NEXT: fcvtzs x15, s21
-; CHECK-NEXT: fmov d21, x14
-; CHECK-NEXT: fmov d5, x9
-; CHECK-NEXT: fcvtzs x9, s26
-; CHECK-NEXT: fmov d26, x13
-; CHECK-NEXT: fcvtzs x12, s20
-; CHECK-NEXT: fcvtzs x13, s19
-; CHECK-NEXT: mov s20, v23.s[1]
-; CHECK-NEXT: mov v24.d[1], x10
-; CHECK-NEXT: mov v21.d[1], x11
-; CHECK-NEXT: fcvtzs x11, s23
-; CHECK-NEXT: fcvtzs x10, s22
-; CHECK-NEXT: mov s17, v17.s[1]
+; CHECK-NEXT: frintx v2.2s, v2.2s
+; CHECK-NEXT: frintx v3.2s, v3.2s
+; CHECK-NEXT: fcvtl v5.2d, v5.2s
+; CHECK-NEXT: frintx v23.2s, v23.2s
+; CHECK-NEXT: fcvtl v4.2d, v4.2s
; CHECK-NEXT: frintx v1.2s, v1.2s
-; CHECK-NEXT: mov s22, v22.s[1]
-; CHECK-NEXT: mov v26.d[1], x12
-; CHECK-NEXT: fcvtzs x12, s18
-; CHECK-NEXT: mov v25.d[1], x9
-; CHECK-NEXT: fmov d2, x13
-; CHECK-NEXT: fcvtzs x13, s20
-; CHECK-NEXT: fmov d20, x16
-; CHECK-NEXT: stp q24, q21, [x8, #224]
-; CHECK-NEXT: ext v21.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: fmov d18, x11
-; CHECK-NEXT: fcvtzs x11, s4
-; CHECK-NEXT: mov s4, v4.s[1]
-; CHECK-NEXT: fmov d23, x10
-; CHECK-NEXT: mov v20.d[1], x15
-; CHECK-NEXT: fcvtzs x10, s3
-; CHECK-NEXT: mov s3, v3.s[1]
-; CHECK-NEXT: mov v18.d[1], x13
+; CHECK-NEXT: fcvtl v16.2d, v16.2s
+; CHECK-NEXT: fcvtl v17.2d, v17.2s
+; CHECK-NEXT: fcvtzs v7.2d, v7.2d
+; CHECK-NEXT: fcvtzs v6.2d, v6.2d
+; CHECK-NEXT: fcvtl v18.2d, v18.2s
+; CHECK-NEXT: fcvtl v21.2d, v21.2s
+; CHECK-NEXT: frintx v20.2s, v20.2s
+; CHECK-NEXT: fcvtl v3.2d, v3.2s
+; CHECK-NEXT: fcvtzs v5.2d, v5.2d
; CHECK-NEXT: frintx v0.2s, v0.2s
-; CHECK-NEXT: mov s16, v16.s[1]
-; CHECK-NEXT: frintx v21.2s, v21.2s
-; CHECK-NEXT: fcvtzs x13, s17
-; CHECK-NEXT: fcvtzs x14, s22
-; CHECK-NEXT: fcvtzs x9, s4
-; CHECK-NEXT: fmov d4, x11
-; CHECK-NEXT: mov v7.d[1], x12
-; CHECK-NEXT: stp q20, q26, [x8, #192]
-; CHECK-NEXT: fmov d20, x10
-; CHECK-NEXT: fcvtzs x10, s3
-; CHECK-NEXT: stp q18, q25, [x8, #160]
-; CHECK-NEXT: mov s18, v19.s[1]
-; CHECK-NEXT: mov s3, v1.s[1]
-; CHECK-NEXT: mov s17, v0.s[1]
-; CHECK-NEXT: mov s19, v21.s[1]
-; CHECK-NEXT: fcvtzs x11, s21
-; CHECK-NEXT: mov v4.d[1], x9
-; CHECK-NEXT: fcvtzs x9, s16
-; CHECK-NEXT: fcvtzs x12, s1
-; CHECK-NEXT: mov v6.d[1], x13
-; CHECK-NEXT: fcvtzs x13, s0
-; CHECK-NEXT: mov v20.d[1], x10
-; CHECK-NEXT: fcvtzs x15, s18
-; CHECK-NEXT: fcvtzs x10, s3
-; CHECK-NEXT: mov v23.d[1], x14
-; CHECK-NEXT: fcvtzs x14, s17
-; CHECK-NEXT: fmov d3, x11
-; CHECK-NEXT: stp q4, q7, [x8, #128]
-; CHECK-NEXT: mov v5.d[1], x9
-; CHECK-NEXT: fcvtzs x9, s19
-; CHECK-NEXT: stp q20, q6, [x8, #96]
-; CHECK-NEXT: fmov d0, x12
-; CHECK-NEXT: fmov d1, x13
-; CHECK-NEXT: mov v2.d[1], x15
-; CHECK-NEXT: stp q23, q5, [x8, #64]
-; CHECK-NEXT: mov v0.d[1], x10
-; CHECK-NEXT: mov v1.d[1], x14
-; CHECK-NEXT: mov v3.d[1], x9
-; CHECK-NEXT: stp q2, q0, [x8, #32]
-; CHECK-NEXT: stp q1, q3, [x8]
+; CHECK-NEXT: fcvtl v2.2d, v2.2s
+; CHECK-NEXT: fcvtzs v4.2d, v4.2d
+; CHECK-NEXT: fcvtzs v16.2d, v16.2d
+; CHECK-NEXT: fcvtzs v17.2d, v17.2d
+; CHECK-NEXT: fcvtl v1.2d, v1.2s
+; CHECK-NEXT: fcvtzs v3.2d, v3.2d
+; CHECK-NEXT: fcvtl v0.2d, v0.2s
+; CHECK-NEXT: fcvtzs v2.2d, v2.2d
+; CHECK-NEXT: stp q6, q17, [x8, #192]
+; CHECK-NEXT: fcvtl v6.2d, v23.2s
+; CHECK-NEXT: frintx v17.2s, v19.2s
+; CHECK-NEXT: stp q7, q16, [x8, #224]
+; CHECK-NEXT: frintx v7.2s, v22.2s
+; CHECK-NEXT: fcvtzs v16.2d, v18.2d
+; CHECK-NEXT: fcvtzs v18.2d, v21.2d
+; CHECK-NEXT: fcvtzs v1.2d, v1.2d
+; CHECK-NEXT: fcvtzs v0.2d, v0.2d
+; CHECK-NEXT: fcvtzs v6.2d, v6.2d
+; CHECK-NEXT: stp q5, q16, [x8, #160]
+; CHECK-NEXT: fcvtl v7.2d, v7.2s
+; CHECK-NEXT: fcvtl v5.2d, v20.2s
+; CHECK-NEXT: stp q4, q18, [x8, #128]
+; CHECK-NEXT: fcvtl v4.2d, v17.2s
+; CHECK-NEXT: stp q3, q6, [x8, #96]
+; CHECK-NEXT: fcvtzs v7.2d, v7.2d
+; CHECK-NEXT: fcvtzs v3.2d, v5.2d
+; CHECK-NEXT: stp q1, q3, [x8, #32]
+; CHECK-NEXT: stp q2, q7, [x8, #64]
+; CHECK-NEXT: fcvtzs v2.2d, v4.2d
+; CHECK-NEXT: stp q0, q2, [x8]
; CHECK-NEXT: ret
%a = call <32 x i64> @llvm.llrint.v32i64.v32f32(<32 x float> %x)
ret <32 x i64> %a
diff --git a/llvm/test/CodeGen/AArch64/vector-lrint.ll b/llvm/test/CodeGen/AArch64/vector-lrint.ll
index 44f29f1420fe2..602643264e7be 100644
--- a/llvm/test/CodeGen/AArch64/vector-lrint.ll
+++ b/llvm/test/CodeGen/AArch64/vector-lrint.ll
@@ -784,11 +784,8 @@ define <2 x iXLen> @lrint_v2f32(<2 x float> %x) {
; CHECK-i64-LABEL: lrint_v2f32:
; CHECK-i64: // %bb.0:
; CHECK-i64-NEXT: frintx v0.2s, v0.2s
-; CHECK-i64-NEXT: mov s1, v0.s[1]
-; CHECK-i64-NEXT: fcvtzs x8, s0
-; CHECK-i64-NEXT: fcvtzs x9, s1
-; CHECK-i64-NEXT: fmov d0, x8
-; CHECK-i64-NEXT: mov v0.d[1], x9
+; CHECK-i64-NEXT: fcvtl v0.2d, v0.2s
+; CHECK-i64-NEXT: fcvtzs v0.2d, v0.2d
; CHECK-i64-NEXT: ret
%a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float> %x)
ret <2 x iXLen> %a
@@ -807,16 +804,10 @@ define <4 x iXLen> @lrint_v4f32(<4 x float> %x) {
; CHECK-i64-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-i64-NEXT: frintx v0.2s, v0.2s
; CHECK-i64-NEXT: frintx v1.2s, v1.2s
-; CHECK-i64-NEXT: mov s2, v0.s[1]
-; CHECK-i64-NEXT: fcvtzs x8, s0
-; CHECK-i64-NEXT: mov s3, v1.s[1]
-; CHECK-i64-NEXT: fcvtzs x9, s1
-; CHECK-i64-NEXT: fcvtzs x10, s2
-; CHECK-i64-NEXT: fmov d0, x8
-; CHECK-i64-NEXT: fcvtzs x11, s3
-; CHECK-i64-NEXT: fmov d1, x9
-; CHECK-i64-NEXT: mov v0.d[1], x10
-; CHECK-i64-NEXT: mov v1.d[1], x11
+; CHECK-i64-NEXT: fcvtl v0.2d, v0.2s
+; CHECK-i64-NEXT: fcvtl v1.2d, v1.2s
+; CHECK-i64-NEXT: fcvtzs v0.2d, v0.2d
+; CHECK-i64-NEXT: fcvtzs v1.2d, v1.2d
; CHECK-i64-NEXT: ret
%a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f32(<4 x float> %x)
ret <4 x iXLen> %a
@@ -840,26 +831,14 @@ define <8 x iXLen> @lrint_v8f32(<8 x float> %x) {
; CHECK-i64-NEXT: frintx v1.2s, v1.2s
; CHECK-i64-NEXT: frintx v2.2s, v2.2s
; CHECK-i64-NEXT: frintx v3.2s, v3.2s
-; CHECK-i64-NEXT: mov s4, v0.s[1]
-; CHECK-i64-NEXT: mov s5, v1.s[1]
-; CHECK-i64-NEXT: fcvtzs x8, s0
-; CHECK-i64-NEXT: fcvtzs x10, s1
-; CHECK-i64-NEXT: mov s6, v2.s[1]
-; CHECK-i64-NEXT: mov s7, v3.s[1]
-; CHECK-i64-NEXT: fcvtzs x11, s2
-; CHECK-i64-NEXT: fcvtzs x12, s3
-; CHECK-i64-NEXT: fcvtzs x9, s4
-; CHECK-i64-NEXT: fcvtzs x13, s5
-; CHECK-i64-NEXT: fmov d0, x8
-; CHECK-i64-NEXT: fmov d2, x10
-; CHECK-i64-NEXT: fcvtzs x14, s6
-; CHECK-i64-NEXT: fcvtzs x15, s7
-; CHECK-i64-NEXT: fmov d1, x11
-; CHECK-i64-NEXT: fmov d3, x12
-; CHECK-i64-NEXT: mov v0.d[1], x9
-; CHECK-i64-NEXT: mov v2.d[1], x13
-; CHECK-i64-NEXT: mov v1.d[1], x14
-; CHECK-i64-NEXT: mov v3.d[1], x15
+; CHECK-i64-NEXT: fcvtl v0.2d, v0.2s
+; CHECK-i64-NEXT: fcvtl v1.2d, v1.2s
+; CHECK-i64-NEXT: fcvtl v4.2d, v2.2s
+; CHECK-i64-NEXT: fcvtl v3.2d, v3.2s
+; CHECK-i64-NEXT: fcvtzs v0.2d, v0.2d
+; CHECK-i64-NEXT: fcvtzs v2.2d, v1.2d
+; CHECK-i64-NEXT: fcvtzs v1.2d, v4.2d
+; CHECK-i64-NEXT: fcvtzs v3.2d, v3.2d
; CHECK-i64-NEXT: ret
%a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f32(<8 x float> %x)
ret <8 x iXLen> %a
@@ -881,58 +860,34 @@ define <16 x iXLen> @lrint_v16f32(<16 x float> %x) {
;
; CHECK-i64-LABEL: lrint_v16f32:
; CHECK-i64: // %bb.0:
-; CHECK-i64-NEXT: frintx v4.2s, v0.2s
-; CHECK-i64-NEXT: frintx v5.2s, v1.2s
-; CHECK-i64-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-i64-NEXT: ext v1.16b, v1.16b, v1.16b, #8
+; CHECK-i64-NEXT: ext v4.16b, v1.16b, v1.16b, #8
+; CHECK-i64-NEXT: ext v5.16b, v0.16b, v0.16b, #8
; CHECK-i64-NEXT: ext v6.16b, v2.16b, v2.16b, #8
; CHECK-i64-NEXT: ext v7.16b, v3.16b, v3.16b, #8
-; CHECK-i64-NEXT: frintx v2.2s, v2.2s
-; CHECK-i64-NEXT: frintx v3.2s, v3.2s
-; CHECK-i64-NEXT: mov s16, v4.s[1]
-; CHECK-i64-NEXT: mov s17, v5.s[1]
-; CHECK-i64-NEXT: fcvtzs x8, s4
; CHECK-i64-NEXT: frintx v0.2s, v0.2s
; CHECK-i64-NEXT: frintx v1.2s, v1.2s
-; CHECK-i64-NEXT: fcvtzs x9, s5
-; CHECK-i64-NEXT: frintx v4.2s, v6.2s
-; CHECK-i64-NEXT: frintx v5.2s, v7.2s
-; CHECK-i64-NEXT: fcvtzs x10, s2
-; CHECK-i64-NEXT: mov s6, v2.s[1]
-; CHECK-i64-NEXT: fcvtzs x13, s3
-; CHECK-i64-NEXT: mov s3, v3.s[1]
-; CHECK-i64-NEXT: fcvtzs x11, s16
-; CHECK-i64-NEXT: fcvtzs x12, s17
-; CHECK-i64-NEXT: mov s7, v0.s[1]
-; CHECK-i64-NEXT: mov s16, v1.s[1]
-; CHECK-i64-NEXT: fcvtzs x15, s1
-; CHECK-i64-NEXT: mov s1, v4.s[1]
-; CHECK-i64-NEXT: mov s17, v5.s[1]
-; CHECK-i64-NEXT: fcvtzs x14, s0
-; CHECK-i64-NEXT: fmov d0, x8
-; CHECK-i64-NEXT: fcvtzs x8, s4
-; CHECK-i64-NEXT: fmov d4, x10
-; CHECK-i64-NEXT: fcvtzs x10, s5
-; CHECK-i64-NEXT: fmov d2, x9
-; CHECK-i64-NEXT: fcvtzs x9, s6
-; CHECK-i64-NEXT: fmov d6, x13
-; CHECK-i64-NEXT: fcvtzs x13, s7
-; CHECK-i64-NEXT: fcvtzs x16, s16
-; CHECK-i64-NEXT: fcvtzs x17, s3
-; CHECK-i64-NEXT: fcvtzs x18, s1
-; CHECK-i64-NEXT: fcvtzs x0, s17
-; CHECK-i64-NEXT: fmov d1, x14
-; CHECK-i64-NEXT: fmov d3, x15
-; CHECK-i64-NEXT: fmov d5, x8
-; CHECK-i64-NEXT: fmov d7, x10
-; CHECK-i64-NEXT: mov v0.d[1], x11
-; CHECK-i64-NEXT: mov v2.d[1], x12
-; CHECK-i64-NEXT: mov v4.d[1], x9
-; CHECK-i64-NEXT: mov v1.d[1], x13
-; CHECK-i64-NEXT: mov v3.d[1], x16
-; CHECK-i64-NEXT: mov v6.d[1], x17
-; CHECK-i64-NEXT: mov v5.d[1], x18
-; CHECK-i64-NEXT: mov v7.d[1], x0
+; CHECK-i64-NEXT: frintx v2.2s, v2.2s
+; CHECK-i64-NEXT: frintx v3.2s, v3.2s
+; CHECK-i64-NEXT: frintx v5.2s, v5.2s
+; CHECK-i64-NEXT: frintx v4.2s, v4.2s
+; CHECK-i64-NEXT: frintx v6.2s, v6.2s
+; CHECK-i64-NEXT: frintx v7.2s, v7.2s
+; CHECK-i64-NEXT: fcvtl v0.2d, v0.2s
+; CHECK-i64-NEXT: fcvtl v1.2d, v1.2s
+; CHECK-i64-NEXT: fcvtl v16.2d, v2.2s
+; CHECK-i64-NEXT: fcvtl v18.2d, v3.2s
+; CHECK-i64-NEXT: fcvtl v5.2d, v5.2s
+; CHECK-i64-NEXT: fcvtl v17.2d, v4.2s
+; CHECK-i64-NEXT: fcvtl v19.2d, v6.2s
+; CHECK-i64-NEXT: fcvtl v7.2d, v7.2s
+; CHECK-i64-NEXT: fcvtzs v0.2d, v0.2d
+; CHECK-i64-NEXT: fcvtzs v2.2d, v1.2d
+; CHECK-i64-NEXT: fcvtzs v4.2d, v16.2d
+; CHECK-i64-NEXT: fcvtzs v6.2d, v18.2d
+; CHECK-i64-NEXT: fcvtzs v1.2d, v5.2d
+; CHECK-i64-NEXT: fcvtzs v3.2d, v17.2d
+; CHECK-i64-NEXT: fcvtzs v5.2d, v19.2d
+; CHECK-i64-NEXT: fcvtzs v7.2d, v7.2d
; CHECK-i64-NEXT: ret
%a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float> %x)
ret <16 x iXLen> %a
@@ -962,118 +917,70 @@ define <32 x iXLen> @lrint_v32f32(<32 x float> %x) {
;
; CHECK-i64-LABEL: lrint_v32f32:
; CHECK-i64: // %bb.0:
-; CHECK-i64-NEXT: ext v17.16b, v3.16b, v3.16b, #8
-; CHECK-i64-NEXT: ext v18.16b, v4.16b, v4.16b, #8
-; CHECK-i64-NEXT: ext v19.16b, v5.16b, v5.16b, #8
-; CHECK-i64-NEXT: ext v21.16b, v7.16b, v7.16b, #8
-; CHECK-i64-NEXT: ext v16.16b, v2.16b, v2.16b, #8
-; CHECK-i64-NEXT: ext v20.16b, v6.16b, v6.16b, #8
+; CHECK-i64-NEXT: ext v16.16b, v7.16b, v7.16b, #8
+; CHECK-i64-NEXT: ext v17.16b, v6.16b, v6.16b, #8
; CHECK-i64-NEXT: frintx v7.2s, v7.2s
-; CHECK-i64-NEXT: frintx v24.2s, v6.2s
-; CHECK-i64-NEXT: frintx v23.2s, v5.2s
+; CHECK-i64-NEXT: frintx v6.2s, v6.2s
+; CHECK-i64-NEXT: ext v18.16b, v5.16b, v5.16b, #8
+; CHECK-i64-NEXT: ext v21.16b, v4.16b, v4.16b, #8
+; CHECK-i64-NEXT: ext v22.16b, v2.16b, v2.16b, #8
+; CHECK-i64-NEXT: frintx v5.2s, v5.2s
+; CHECK-i64-NEXT: ext v23.16b, v3.16b, v3.16b, #8
; CHECK-i64-NEXT: frintx v4.2s, v4.2s
-; CHECK-i64-NEXT: frintx v3.2s, v3.2s
+; CHECK-i64-NEXT: ext v19.16b, v0.16b, v0.16b, #8
+; CHECK-i64-NEXT: ext v20.16b, v1.16b, v1.16b, #8
+; CHECK-i64-NEXT: frintx v16.2s, v16.2s
; CHECK-i64-NEXT: frintx v17.2s, v17.2s
+; CHECK-i64-NEXT: fcvtl v7.2d, v7.2s
+; CHECK-i64-NEXT: fcvtl v6.2d, v6.2s
; CHECK-i64-NEXT: frintx v18.2s, v18.2s
-; CHECK-i64-NEXT: frintx v22.2s, v19.2s
; CHECK-i64-NEXT: frintx v21.2s, v21.2s
-; CHECK-i64-NEXT: frintx v16.2s, v16.2s
-; CHECK-i64-NEXT: frintx v20.2s, v20.2s
-; CHECK-i64-NEXT: mov s25, v7.s[1]
-; CHECK-i64-NEXT: fcvtzs x15, s7
-; CHECK-i64-NEXT: frintx v19.2s, v1.2s
-; CHECK-i64-NEXT: fcvtzs x16, s24
-; CHECK-i64-NEXT: ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-i64-NEXT: fcvtzs x10, s17
-; CHECK-i64-NEXT: fcvtzs x11, s18
-; CHECK-i64-NEXT: mov s26, v22.s[1]
-; CHECK-i64-NEXT: fcvtzs x12, s22
-; CHECK-i64-NEXT: mov s22, v21.s[1]
-; CHECK-i64-NEXT: fcvtzs x14, s21
-; CHECK-i64-NEXT: mov s21, v24.s[1]
-; CHECK-i64-NEXT: fcvtzs x9, s16
-; CHECK-i64-NEXT: fcvtzs x13, s20
-; CHECK-i64-NEXT: mov s20, v20.s[1]
-; CHECK-i64-NEXT: fmov d24, x15
-; CHECK-i64-NEXT: mov s18, v18.s[1]
-; CHECK-i64-NEXT: fmov d6, x10
-; CHECK-i64-NEXT: fmov d7, x11
-; CHECK-i64-NEXT: fcvtzs x10, s25
-; CHECK-i64-NEXT: fcvtzs x11, s22
-; CHECK-i64-NEXT: fmov d25, x12
-; CHECK-i64-NEXT: frintx v22.2s, v2.2s
-; CHECK-i64-NEXT: fcvtzs x15, s21
-; CHECK-i64-NEXT: fmov d21, x14
-; CHECK-i64-NEXT: fmov d5, x9
-; CHECK-i64-NEXT: fcvtzs x9, s26
-; CHECK-i64-NEXT: fmov d26, x13
-; CHECK-i64-NEXT: fcvtzs x12, s20
-; CHECK-i64-NEXT: fcvtzs x13, s19
-; CHECK-i64-NEXT: mov s20, v23.s[1]
-; CHECK-i64-NEXT: mov v24.d[1], x10
-; CHECK-i64-NEXT: mov v21.d[1], x11
-; CHECK-i64-NEXT: fcvtzs x11, s23
-; CHECK-i64-NEXT: fcvtzs x10, s22
-; CHECK-i64-NEXT: mov s17, v17.s[1]
+; CHECK-i64-NEXT: frintx v2.2s, v2.2s
+; CHECK-i64-NEXT: frintx v3.2s, v3.2s
+; CHECK-i64-NEXT: fcvtl v5.2d, v5.2s
+; CHECK-i64-NEXT: frintx v23.2s, v23.2s
+; CHECK-i64-NEXT: fcvtl v4.2d, v4.2s
; CHECK-i64-NEXT: frintx v1.2s, v1.2s
-; CHECK-i64-NEXT: mov s22, v22.s[1]
-; CHECK-i64-NEXT: mov v26.d[1], x12
-; CHECK-i64-NEXT: fcvtzs x12, s18
-; CHECK-i64-NEXT: mov v25.d[1], x9
-; CHECK-i64-NEXT: fmov d2, x13
-; CHECK-i64-NEXT: fcvtzs x13, s20
-; CHECK-i64-NEXT: fmov d20, x16
-; CHECK-i64-NEXT: stp q24, q21, [x8, #224]
-; CHECK-i64-NEXT: ext v21.16b, v0.16b, v0.16b, #8
-; CHECK-i64-NEXT: fmov d18, x11
-; CHECK-i64-NEXT: fcvtzs x11, s4
-; CHECK-i64-NEXT: mov s4, v4.s[1]
-; CHECK-i64-NEXT: fmov d23, x10
-; CHECK-i64-NEXT: mov v20.d[1], x15
-; CHECK-i64-NEXT: fcvtzs x10, s3
-; CHECK-i64-NEXT: mov s3, v3.s[1]
-; CHECK-i64-NEXT: mov v18.d[1], x13
+; CHECK-i64-NEXT: fcvtl v16.2d, v16.2s
+; CHECK-i64-NEXT: fcvtl v17.2d, v17.2s
+; CHECK-i64-NEXT: fcvtzs v7.2d, v7.2d
+; CHECK-i64-NEXT: fcvtzs v6.2d, v6.2d
+; CHECK-i64-NEXT: fcvtl v18.2d, v18.2s
+; CHECK-i64-NEXT: fcvtl v21.2d, v21.2s
+; CHECK-i64-NEXT: frintx v20.2s, v20.2s
+; CHECK-i64-NEXT: fcvtl v3.2d, v3.2s
+; CHECK-i64-NEXT: fcvtzs v5.2d, v5.2d
; CHECK-i64-NEXT: frintx v0.2s, v0.2s
-; CHECK-i64-NEXT: mov s16, v16.s[1]
-; CHECK-i64-NEXT: frintx v21.2s, v21.2s
-; CHECK-i64-NEXT: fcvtzs x13, s17
-; CHECK-i64-NEXT: fcvtzs x14, s22
-; CHECK-i64-NEXT: fcvtzs x9, s4
-; CHECK-i64-NEXT: fmov d4, x11
-; CHECK-i64-NEXT: mov v7.d[1], x12
-; CHECK-i64-NEXT: stp q20, q26, [x8, #192]
-; CHECK-i64-NEXT: fmov d20, x10
-; CHECK-i64-NEXT: fcvtzs x10, s3
-; CHECK-i64-NEXT: stp q18, q25, [x8, #160]
-; CHECK-i64-NEXT: mov s18, v19.s[1]
-; CHECK-i64-NEXT: mov s3, v1.s[1]
-; CHECK-i64-NEXT: mov s17, v0.s[1]
-; CHECK-i64-NEXT: mov s19, v21.s[1]
-; CHECK-i64-NEXT: fcvtzs x11, s21
-; CHECK-i64-NEXT: mov v4.d[1], x9
-; CHECK-i64-NEXT: fcvtzs x9, s16
-; CHECK-i64-NEXT: fcvtzs x12, s1
-; CHECK-i64-NEXT: mov v6.d[1], x13
-; CHECK-i64-NEXT: fcvtzs x13, s0
-; CHECK-i64-NEXT: mov v20.d[1], x10
-; CHECK-i64-NEXT: fcvtzs x15, s18
-; CHECK-i64-NEXT: fcvtzs x10, s3
-; CHECK-i64-NEXT: mov v23.d[1], x14
-; CHECK-i64-NEXT: fcvtzs x14, s17
-; CHECK-i64-NEXT: fmov d3, x11
-; CHECK-i64-NEXT: stp q4, q7, [x8, #128]
-; CHECK-i64-NEXT: mov v5.d[1], x9
-; CHECK-i64-NEXT: fcvtzs x9, s19
-; CHECK-i64-NEXT: stp q20, q6, [x8, #96]
-; CHECK-i64-NEXT: fmov d0, x12
-; CHECK-i64-NEXT: fmov d1, x13
-; CHECK-i64-NEXT: mov v2.d[1], x15
-; CHECK-i64-NEXT: stp q23, q5, [x8, #64]
-; CHECK-i64-NEXT: mov v0.d[1], x10
-; CHECK-i64-NEXT: mov v1.d[1], x14
-; CHECK-i64-NEXT: mov v3.d[1], x9
-; CHECK-i64-NEXT: stp q2, q0, [x8, #32]
-; CHECK-i64-NEXT: stp q1, q3, [x8]
+; CHECK-i64-NEXT: fcvtl v2.2d, v2.2s
+; CHECK-i64-NEXT: fcvtzs v4.2d, v4.2d
+; CHECK-i64-NEXT: fcvtzs v16.2d, v16.2d
+; CHECK-i64-NEXT: fcvtzs v17.2d, v17.2d
+; CHECK-i64-NEXT: fcvtl v1.2d, v1.2s
+; CHECK-i64-NEXT: fcvtzs v3.2d, v3.2d
+; CHECK-i64-NEXT: fcvtl v0.2d, v0.2s
+; CHECK-i64-NEXT: fcvtzs v2.2d, v2.2d
+; CHECK-i64-NEXT: stp q6, q17, [x8, #192]
+; CHECK-i64-NEXT: fcvtl v6.2d, v23.2s
+; CHECK-i64-NEXT: frintx v17.2s, v19.2s
+; CHECK-i64-NEXT: stp q7, q16, [x8, #224]
+; CHECK-i64-NEXT: frintx v7.2s, v22.2s
+; CHECK-i64-NEXT: fcvtzs v16.2d, v18.2d
+; CHECK-i64-NEXT: fcvtzs v18.2d, v21.2d
+; CHECK-i64-NEXT: fcvtzs v1.2d, v1.2d
+; CHECK-i64-NEXT: fcvtzs v0.2d, v0.2d
+; CHECK-i64-NEXT: fcvtzs v6.2d, v6.2d
+; CHECK-i64-NEXT: stp q5, q16, [x8, #160]
+; CHECK-i64-NEXT: fcvtl v7.2d, v7.2s
+; CHECK-i64-NEXT: fcvtl v5.2d, v20.2s
+; CHECK-i64-NEXT: stp q4, q18, [x8, #128]
+; CHECK-i64-NEXT: fcvtl v4.2d, v17.2s
+; CHECK-i64-NEXT: stp q3, q6, [x8, #96]
+; CHECK-i64-NEXT: fcvtzs v7.2d, v7.2d
+; CHECK-i64-NEXT: fcvtzs v3.2d, v5.2d
+; CHECK-i64-NEXT: stp q1, q3, [x8, #32]
+; CHECK-i64-NEXT: stp q2, q7, [x8, #64]
+; CHECK-i64-NEXT: fcvtzs v2.2d, v4.2d
+; CHECK-i64-NEXT: stp q0, q2, [x8]
; CHECK-i64-NEXT: ret
%a = call <32 x iXLen> @llvm.lrint.v32iXLen.v32f32(<32 x float> %x)
ret <32 x iXLen> %a
More information about the llvm-commits
mailing list