[llvm] [AArch64] Extend v2i64 fptosi.sat to v2f64 (PR #91714)

David Green via llvm-commits llvm-commits at lists.llvm.org
Mon May 13 00:33:00 PDT 2024


https://github.com/davemgreen updated https://github.com/llvm/llvm-project/pull/91714

>From e75687aa56725c092765a5960e588a4089298ecf Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Mon, 13 May 2024 08:32:44 +0100
Subject: [PATCH] [AArch64] Extend v2i64 fptosi.sat to v2f64

This helps it produce a single instruction for the saturate, as opposed to
having to scalarize.
---
 .../Target/AArch64/AArch64ISelLowering.cpp    |   9 +
 llvm/test/CodeGen/AArch64/fcvt_combine.ll     |   7 +-
 llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll |  16 +-
 .../test/CodeGen/AArch64/fptosi-sat-vector.ll |  23 +-
 .../test/CodeGen/AArch64/fptoui-sat-vector.ll |  23 +-
 .../AArch64/sve-fixed-vector-llrint.ll        |   7 +-
 .../CodeGen/AArch64/sve-fixed-vector-lrint.ll |   7 +-
 llvm/test/CodeGen/AArch64/vector-llrint.ll    | 283 ++++++------------
 llvm/test/CodeGen/AArch64/vector-lrint.ll     | 283 ++++++------------
 9 files changed, 221 insertions(+), 437 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index a6c020c6b823d..1e0071fffe666 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4286,6 +4286,15 @@ AArch64TargetLowering::LowerVectorFP_TO_INT_SAT(SDValue Op,
     return SDValue();
 
   SDLoc DL(Op);
+  // Expand to f64 if we are saturating to i64, to help produce keep the lanes
+  // the same width and produce a fcvtzu.
+  if (SatWidth == 64 && SrcElementWidth < 64) {
+    MVT F64VT = MVT::getVectorVT(MVT::f64, SrcVT.getVectorNumElements());
+    SrcVal = DAG.getNode(ISD::FP_EXTEND, DL, F64VT, SrcVal);
+    SrcVT = F64VT;
+    SrcElementVT = MVT::f64;
+    SrcElementWidth = 64;
+  }
   // Cases that we can emit directly.
   if (SrcElementWidth == DstElementWidth && SrcElementWidth == SatWidth)
     return DAG.getNode(Op.getOpcode(), DL, DstVT, SrcVal,
diff --git a/llvm/test/CodeGen/AArch64/fcvt_combine.ll b/llvm/test/CodeGen/AArch64/fcvt_combine.ll
index 29170aab96566..62669a6d99eae 100644
--- a/llvm/test/CodeGen/AArch64/fcvt_combine.ll
+++ b/llvm/test/CodeGen/AArch64/fcvt_combine.ll
@@ -345,11 +345,8 @@ define <2 x i64> @test6_sat(<2 x float> %f) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmov v1.2s, #16.00000000
 ; CHECK-NEXT:    fmul v0.2s, v0.2s, v1.2s
-; CHECK-NEXT:    mov s1, v0.s[1]
-; CHECK-NEXT:    fcvtzs x8, s0
-; CHECK-NEXT:    fcvtzs x9, s1
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    mov v0.d[1], x9
+; CHECK-NEXT:    fcvtl v0.2d, v0.2s
+; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
 ; CHECK-NEXT:    ret
   %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
   %vcvt.i = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float> %mul.i)
diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
index 2ea581359af6f..4e8bfcd9d7516 100644
--- a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
@@ -436,12 +436,8 @@ entry:
 define <2 x i64> @stest_f32i64(<2 x float> %x) {
 ; CHECK-LABEL: stest_f32i64:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    mov s1, v0.s[1]
-; CHECK-NEXT:    fcvtzs x8, s0
-; CHECK-NEXT:    fcvtzs x9, s1
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    mov v0.d[1], x9
+; CHECK-NEXT:    fcvtl v0.2d, v0.2s
+; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
 ; CHECK-NEXT:    ret
 entry:
   %conv = fptosi <2 x float> %x to <2 x i128>
@@ -1056,12 +1052,8 @@ entry:
 define <2 x i64> @stest_f32i64_mm(<2 x float> %x) {
 ; CHECK-LABEL: stest_f32i64_mm:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    mov s1, v0.s[1]
-; CHECK-NEXT:    fcvtzs x8, s0
-; CHECK-NEXT:    fcvtzs x9, s1
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    mov v0.d[1], x9
+; CHECK-NEXT:    fcvtl v0.2d, v0.2s
+; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
 ; CHECK-NEXT:    ret
 entry:
   %conv = fptosi <2 x float> %x to <2 x i128>
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
index c45885a38f159..d620a8851ee44 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
@@ -793,12 +793,8 @@ define <2 x i50> @test_signed_v2f32_v2i50(<2 x float> %f) {
 define <2 x i64> @test_signed_v2f32_v2i64(<2 x float> %f) {
 ; CHECK-LABEL: test_signed_v2f32_v2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    mov s1, v0.s[1]
-; CHECK-NEXT:    fcvtzs x8, s0
-; CHECK-NEXT:    fcvtzs x9, s1
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    mov v0.d[1], x9
+; CHECK-NEXT:    fcvtl v0.2d, v0.2s
+; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
 ; CHECK-NEXT:    ret
     %x = call <2 x i64> @llvm.fptosi.sat.v2f32.v2i64(<2 x float> %f)
     ret <2 x i64> %x
@@ -1060,17 +1056,10 @@ define <4 x i50> @test_signed_v4f32_v4i50(<4 x float> %f) {
 define <4 x i64> @test_signed_v4f32_v4i64(<4 x float> %f) {
 ; CHECK-LABEL: test_signed_v4f32_v4i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    mov s3, v0.s[1]
-; CHECK-NEXT:    fcvtzs x9, s0
-; CHECK-NEXT:    mov s2, v1.s[1]
-; CHECK-NEXT:    fcvtzs x8, s1
-; CHECK-NEXT:    fcvtzs x11, s3
-; CHECK-NEXT:    fmov d0, x9
-; CHECK-NEXT:    fcvtzs x10, s2
-; CHECK-NEXT:    fmov d1, x8
-; CHECK-NEXT:    mov v0.d[1], x11
-; CHECK-NEXT:    mov v1.d[1], x10
+; CHECK-NEXT:    fcvtl2 v1.2d, v0.4s
+; CHECK-NEXT:    fcvtl v0.2d, v0.2s
+; CHECK-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
 ; CHECK-NEXT:    ret
     %x = call <4 x i64> @llvm.fptosi.sat.v4f32.v4i64(<4 x float> %f)
     ret <4 x i64> %x
diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
index c94db3484994c..16e04070b6543 100644
--- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
@@ -707,12 +707,8 @@ define <2 x i50> @test_unsigned_v2f32_v2i50(<2 x float> %f) {
 define <2 x i64> @test_unsigned_v2f32_v2i64(<2 x float> %f) {
 ; CHECK-LABEL: test_unsigned_v2f32_v2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    mov s1, v0.s[1]
-; CHECK-NEXT:    fcvtzu x8, s0
-; CHECK-NEXT:    fcvtzu x9, s1
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    mov v0.d[1], x9
+; CHECK-NEXT:    fcvtl v0.2d, v0.2s
+; CHECK-NEXT:    fcvtzu v0.2d, v0.2d
 ; CHECK-NEXT:    ret
     %x = call <2 x i64> @llvm.fptoui.sat.v2f32.v2i64(<2 x float> %f)
     ret <2 x i64> %x
@@ -927,17 +923,10 @@ define <4 x i50> @test_unsigned_v4f32_v4i50(<4 x float> %f) {
 define <4 x i64> @test_unsigned_v4f32_v4i64(<4 x float> %f) {
 ; CHECK-LABEL: test_unsigned_v4f32_v4i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    mov s3, v0.s[1]
-; CHECK-NEXT:    fcvtzu x9, s0
-; CHECK-NEXT:    mov s2, v1.s[1]
-; CHECK-NEXT:    fcvtzu x8, s1
-; CHECK-NEXT:    fcvtzu x11, s3
-; CHECK-NEXT:    fmov d0, x9
-; CHECK-NEXT:    fcvtzu x10, s2
-; CHECK-NEXT:    fmov d1, x8
-; CHECK-NEXT:    mov v0.d[1], x11
-; CHECK-NEXT:    mov v1.d[1], x10
+; CHECK-NEXT:    fcvtl2 v1.2d, v0.4s
+; CHECK-NEXT:    fcvtl v0.2d, v0.2s
+; CHECK-NEXT:    fcvtzu v1.2d, v1.2d
+; CHECK-NEXT:    fcvtzu v0.2d, v0.2d
 ; CHECK-NEXT:    ret
     %x = call <4 x i64> @llvm.fptoui.sat.v4f32.v4i64(<4 x float> %f)
     ret <4 x i64> %x
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll b/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll
index 9137eae269d91..c77861509e4a1 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll
@@ -295,11 +295,8 @@ define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) {
 ; CHECK-LABEL: llrint_v2i64_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    frintx v0.2s, v0.2s
-; CHECK-NEXT:    mov s1, v0.s[1]
-; CHECK-NEXT:    fcvtzs x8, s0
-; CHECK-NEXT:    fcvtzs x9, s1
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    mov v0.d[1], x9
+; CHECK-NEXT:    fcvtl v0.2d, v0.2s
+; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
 ; CHECK-NEXT:    ret
   %a = call <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float> %x)
   ret <2 x i64> %a
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll b/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll
index 9bdbe9b8ac62d..6a97e7ad64bf3 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll
@@ -534,11 +534,8 @@ define <2 x iXLen> @lrint_v2f32(<2 x float> %x) {
 ; CHECK-i64-LABEL: lrint_v2f32:
 ; CHECK-i64:       // %bb.0:
 ; CHECK-i64-NEXT:    frintx v0.2s, v0.2s
-; CHECK-i64-NEXT:    mov s1, v0.s[1]
-; CHECK-i64-NEXT:    fcvtzs x8, s0
-; CHECK-i64-NEXT:    fcvtzs x9, s1
-; CHECK-i64-NEXT:    fmov d0, x8
-; CHECK-i64-NEXT:    mov v0.d[1], x9
+; CHECK-i64-NEXT:    fcvtl v0.2d, v0.2s
+; CHECK-i64-NEXT:    fcvtzs v0.2d, v0.2d
 ; CHECK-i64-NEXT:    ret
   %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float> %x)
   ret <2 x iXLen> %a
diff --git a/llvm/test/CodeGen/AArch64/vector-llrint.ll b/llvm/test/CodeGen/AArch64/vector-llrint.ll
index b7e743b5085f2..5503de2b4c5db 100644
--- a/llvm/test/CodeGen/AArch64/vector-llrint.ll
+++ b/llvm/test/CodeGen/AArch64/vector-llrint.ll
@@ -387,11 +387,8 @@ define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) {
 ; CHECK-LABEL: llrint_v2i64_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    frintx v0.2s, v0.2s
-; CHECK-NEXT:    mov s1, v0.s[1]
-; CHECK-NEXT:    fcvtzs x8, s0
-; CHECK-NEXT:    fcvtzs x9, s1
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    mov v0.d[1], x9
+; CHECK-NEXT:    fcvtl v0.2d, v0.2s
+; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
 ; CHECK-NEXT:    ret
   %a = call <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float> %x)
   ret <2 x i64> %a
@@ -404,16 +401,10 @@ define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) {
 ; CHECK-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
 ; CHECK-NEXT:    frintx v0.2s, v0.2s
 ; CHECK-NEXT:    frintx v1.2s, v1.2s
-; CHECK-NEXT:    mov s2, v0.s[1]
-; CHECK-NEXT:    fcvtzs x8, s0
-; CHECK-NEXT:    mov s3, v1.s[1]
-; CHECK-NEXT:    fcvtzs x9, s1
-; CHECK-NEXT:    fcvtzs x10, s2
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    fcvtzs x11, s3
-; CHECK-NEXT:    fmov d1, x9
-; CHECK-NEXT:    mov v0.d[1], x10
-; CHECK-NEXT:    mov v1.d[1], x11
+; CHECK-NEXT:    fcvtl v0.2d, v0.2s
+; CHECK-NEXT:    fcvtl v1.2d, v1.2s
+; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-NEXT:    fcvtzs v1.2d, v1.2d
 ; CHECK-NEXT:    ret
   %a = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> %x)
   ret <4 x i64> %a
@@ -429,26 +420,14 @@ define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) {
 ; CHECK-NEXT:    frintx v1.2s, v1.2s
 ; CHECK-NEXT:    frintx v2.2s, v2.2s
 ; CHECK-NEXT:    frintx v3.2s, v3.2s
-; CHECK-NEXT:    mov s4, v0.s[1]
-; CHECK-NEXT:    mov s5, v1.s[1]
-; CHECK-NEXT:    fcvtzs x8, s0
-; CHECK-NEXT:    fcvtzs x10, s1
-; CHECK-NEXT:    mov s6, v2.s[1]
-; CHECK-NEXT:    mov s7, v3.s[1]
-; CHECK-NEXT:    fcvtzs x11, s2
-; CHECK-NEXT:    fcvtzs x12, s3
-; CHECK-NEXT:    fcvtzs x9, s4
-; CHECK-NEXT:    fcvtzs x13, s5
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    fmov d2, x10
-; CHECK-NEXT:    fcvtzs x14, s6
-; CHECK-NEXT:    fcvtzs x15, s7
-; CHECK-NEXT:    fmov d1, x11
-; CHECK-NEXT:    fmov d3, x12
-; CHECK-NEXT:    mov v0.d[1], x9
-; CHECK-NEXT:    mov v2.d[1], x13
-; CHECK-NEXT:    mov v1.d[1], x14
-; CHECK-NEXT:    mov v3.d[1], x15
+; CHECK-NEXT:    fcvtl v0.2d, v0.2s
+; CHECK-NEXT:    fcvtl v1.2d, v1.2s
+; CHECK-NEXT:    fcvtl v4.2d, v2.2s
+; CHECK-NEXT:    fcvtl v3.2d, v3.2s
+; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-NEXT:    fcvtzs v2.2d, v1.2d
+; CHECK-NEXT:    fcvtzs v1.2d, v4.2d
+; CHECK-NEXT:    fcvtzs v3.2d, v3.2d
 ; CHECK-NEXT:    ret
   %a = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> %x)
   ret <8 x i64> %a
@@ -458,58 +437,34 @@ declare <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float>)
 define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) {
 ; CHECK-LABEL: llrint_v16i64_v16f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    frintx v4.2s, v0.2s
-; CHECK-NEXT:    frintx v5.2s, v1.2s
-; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
+; CHECK-NEXT:    ext v4.16b, v1.16b, v1.16b, #8
+; CHECK-NEXT:    ext v5.16b, v0.16b, v0.16b, #8
 ; CHECK-NEXT:    ext v6.16b, v2.16b, v2.16b, #8
 ; CHECK-NEXT:    ext v7.16b, v3.16b, v3.16b, #8
-; CHECK-NEXT:    frintx v2.2s, v2.2s
-; CHECK-NEXT:    frintx v3.2s, v3.2s
-; CHECK-NEXT:    mov s16, v4.s[1]
-; CHECK-NEXT:    mov s17, v5.s[1]
-; CHECK-NEXT:    fcvtzs x8, s4
 ; CHECK-NEXT:    frintx v0.2s, v0.2s
 ; CHECK-NEXT:    frintx v1.2s, v1.2s
-; CHECK-NEXT:    fcvtzs x9, s5
-; CHECK-NEXT:    frintx v4.2s, v6.2s
-; CHECK-NEXT:    frintx v5.2s, v7.2s
-; CHECK-NEXT:    fcvtzs x10, s2
-; CHECK-NEXT:    mov s6, v2.s[1]
-; CHECK-NEXT:    fcvtzs x13, s3
-; CHECK-NEXT:    mov s3, v3.s[1]
-; CHECK-NEXT:    fcvtzs x11, s16
-; CHECK-NEXT:    fcvtzs x12, s17
-; CHECK-NEXT:    mov s7, v0.s[1]
-; CHECK-NEXT:    mov s16, v1.s[1]
-; CHECK-NEXT:    fcvtzs x15, s1
-; CHECK-NEXT:    mov s1, v4.s[1]
-; CHECK-NEXT:    mov s17, v5.s[1]
-; CHECK-NEXT:    fcvtzs x14, s0
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    fcvtzs x8, s4
-; CHECK-NEXT:    fmov d4, x10
-; CHECK-NEXT:    fcvtzs x10, s5
-; CHECK-NEXT:    fmov d2, x9
-; CHECK-NEXT:    fcvtzs x9, s6
-; CHECK-NEXT:    fmov d6, x13
-; CHECK-NEXT:    fcvtzs x13, s7
-; CHECK-NEXT:    fcvtzs x16, s16
-; CHECK-NEXT:    fcvtzs x17, s3
-; CHECK-NEXT:    fcvtzs x18, s1
-; CHECK-NEXT:    fcvtzs x0, s17
-; CHECK-NEXT:    fmov d1, x14
-; CHECK-NEXT:    fmov d3, x15
-; CHECK-NEXT:    fmov d5, x8
-; CHECK-NEXT:    fmov d7, x10
-; CHECK-NEXT:    mov v0.d[1], x11
-; CHECK-NEXT:    mov v2.d[1], x12
-; CHECK-NEXT:    mov v4.d[1], x9
-; CHECK-NEXT:    mov v1.d[1], x13
-; CHECK-NEXT:    mov v3.d[1], x16
-; CHECK-NEXT:    mov v6.d[1], x17
-; CHECK-NEXT:    mov v5.d[1], x18
-; CHECK-NEXT:    mov v7.d[1], x0
+; CHECK-NEXT:    frintx v2.2s, v2.2s
+; CHECK-NEXT:    frintx v3.2s, v3.2s
+; CHECK-NEXT:    frintx v5.2s, v5.2s
+; CHECK-NEXT:    frintx v4.2s, v4.2s
+; CHECK-NEXT:    frintx v6.2s, v6.2s
+; CHECK-NEXT:    frintx v7.2s, v7.2s
+; CHECK-NEXT:    fcvtl v0.2d, v0.2s
+; CHECK-NEXT:    fcvtl v1.2d, v1.2s
+; CHECK-NEXT:    fcvtl v16.2d, v2.2s
+; CHECK-NEXT:    fcvtl v18.2d, v3.2s
+; CHECK-NEXT:    fcvtl v5.2d, v5.2s
+; CHECK-NEXT:    fcvtl v17.2d, v4.2s
+; CHECK-NEXT:    fcvtl v19.2d, v6.2s
+; CHECK-NEXT:    fcvtl v7.2d, v7.2s
+; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-NEXT:    fcvtzs v2.2d, v1.2d
+; CHECK-NEXT:    fcvtzs v4.2d, v16.2d
+; CHECK-NEXT:    fcvtzs v6.2d, v18.2d
+; CHECK-NEXT:    fcvtzs v1.2d, v5.2d
+; CHECK-NEXT:    fcvtzs v3.2d, v17.2d
+; CHECK-NEXT:    fcvtzs v5.2d, v19.2d
+; CHECK-NEXT:    fcvtzs v7.2d, v7.2d
 ; CHECK-NEXT:    ret
   %a = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> %x)
   ret <16 x i64> %a
@@ -519,118 +474,70 @@ declare <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float>)
 define <32 x i64> @llrint_v32i64_v32f32(<32 x float> %x) {
 ; CHECK-LABEL: llrint_v32i64_v32f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ext v17.16b, v3.16b, v3.16b, #8
-; CHECK-NEXT:    ext v18.16b, v4.16b, v4.16b, #8
-; CHECK-NEXT:    ext v19.16b, v5.16b, v5.16b, #8
-; CHECK-NEXT:    ext v21.16b, v7.16b, v7.16b, #8
-; CHECK-NEXT:    ext v16.16b, v2.16b, v2.16b, #8
-; CHECK-NEXT:    ext v20.16b, v6.16b, v6.16b, #8
+; CHECK-NEXT:    ext v16.16b, v7.16b, v7.16b, #8
+; CHECK-NEXT:    ext v17.16b, v6.16b, v6.16b, #8
 ; CHECK-NEXT:    frintx v7.2s, v7.2s
-; CHECK-NEXT:    frintx v24.2s, v6.2s
-; CHECK-NEXT:    frintx v23.2s, v5.2s
+; CHECK-NEXT:    frintx v6.2s, v6.2s
+; CHECK-NEXT:    ext v18.16b, v5.16b, v5.16b, #8
+; CHECK-NEXT:    ext v21.16b, v4.16b, v4.16b, #8
+; CHECK-NEXT:    ext v22.16b, v2.16b, v2.16b, #8
+; CHECK-NEXT:    frintx v5.2s, v5.2s
+; CHECK-NEXT:    ext v23.16b, v3.16b, v3.16b, #8
 ; CHECK-NEXT:    frintx v4.2s, v4.2s
-; CHECK-NEXT:    frintx v3.2s, v3.2s
+; CHECK-NEXT:    ext v19.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    ext v20.16b, v1.16b, v1.16b, #8
+; CHECK-NEXT:    frintx v16.2s, v16.2s
 ; CHECK-NEXT:    frintx v17.2s, v17.2s
+; CHECK-NEXT:    fcvtl v7.2d, v7.2s
+; CHECK-NEXT:    fcvtl v6.2d, v6.2s
 ; CHECK-NEXT:    frintx v18.2s, v18.2s
-; CHECK-NEXT:    frintx v22.2s, v19.2s
 ; CHECK-NEXT:    frintx v21.2s, v21.2s
-; CHECK-NEXT:    frintx v16.2s, v16.2s
-; CHECK-NEXT:    frintx v20.2s, v20.2s
-; CHECK-NEXT:    mov s25, v7.s[1]
-; CHECK-NEXT:    fcvtzs x15, s7
-; CHECK-NEXT:    frintx v19.2s, v1.2s
-; CHECK-NEXT:    fcvtzs x16, s24
-; CHECK-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-NEXT:    fcvtzs x10, s17
-; CHECK-NEXT:    fcvtzs x11, s18
-; CHECK-NEXT:    mov s26, v22.s[1]
-; CHECK-NEXT:    fcvtzs x12, s22
-; CHECK-NEXT:    mov s22, v21.s[1]
-; CHECK-NEXT:    fcvtzs x14, s21
-; CHECK-NEXT:    mov s21, v24.s[1]
-; CHECK-NEXT:    fcvtzs x9, s16
-; CHECK-NEXT:    fcvtzs x13, s20
-; CHECK-NEXT:    mov s20, v20.s[1]
-; CHECK-NEXT:    fmov d24, x15
-; CHECK-NEXT:    mov s18, v18.s[1]
-; CHECK-NEXT:    fmov d6, x10
-; CHECK-NEXT:    fmov d7, x11
-; CHECK-NEXT:    fcvtzs x10, s25
-; CHECK-NEXT:    fcvtzs x11, s22
-; CHECK-NEXT:    fmov d25, x12
-; CHECK-NEXT:    frintx v22.2s, v2.2s
-; CHECK-NEXT:    fcvtzs x15, s21
-; CHECK-NEXT:    fmov d21, x14
-; CHECK-NEXT:    fmov d5, x9
-; CHECK-NEXT:    fcvtzs x9, s26
-; CHECK-NEXT:    fmov d26, x13
-; CHECK-NEXT:    fcvtzs x12, s20
-; CHECK-NEXT:    fcvtzs x13, s19
-; CHECK-NEXT:    mov s20, v23.s[1]
-; CHECK-NEXT:    mov v24.d[1], x10
-; CHECK-NEXT:    mov v21.d[1], x11
-; CHECK-NEXT:    fcvtzs x11, s23
-; CHECK-NEXT:    fcvtzs x10, s22
-; CHECK-NEXT:    mov s17, v17.s[1]
+; CHECK-NEXT:    frintx v2.2s, v2.2s
+; CHECK-NEXT:    frintx v3.2s, v3.2s
+; CHECK-NEXT:    fcvtl v5.2d, v5.2s
+; CHECK-NEXT:    frintx v23.2s, v23.2s
+; CHECK-NEXT:    fcvtl v4.2d, v4.2s
 ; CHECK-NEXT:    frintx v1.2s, v1.2s
-; CHECK-NEXT:    mov s22, v22.s[1]
-; CHECK-NEXT:    mov v26.d[1], x12
-; CHECK-NEXT:    fcvtzs x12, s18
-; CHECK-NEXT:    mov v25.d[1], x9
-; CHECK-NEXT:    fmov d2, x13
-; CHECK-NEXT:    fcvtzs x13, s20
-; CHECK-NEXT:    fmov d20, x16
-; CHECK-NEXT:    stp q24, q21, [x8, #224]
-; CHECK-NEXT:    ext v21.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    fmov d18, x11
-; CHECK-NEXT:    fcvtzs x11, s4
-; CHECK-NEXT:    mov s4, v4.s[1]
-; CHECK-NEXT:    fmov d23, x10
-; CHECK-NEXT:    mov v20.d[1], x15
-; CHECK-NEXT:    fcvtzs x10, s3
-; CHECK-NEXT:    mov s3, v3.s[1]
-; CHECK-NEXT:    mov v18.d[1], x13
+; CHECK-NEXT:    fcvtl v16.2d, v16.2s
+; CHECK-NEXT:    fcvtl v17.2d, v17.2s
+; CHECK-NEXT:    fcvtzs v7.2d, v7.2d
+; CHECK-NEXT:    fcvtzs v6.2d, v6.2d
+; CHECK-NEXT:    fcvtl v18.2d, v18.2s
+; CHECK-NEXT:    fcvtl v21.2d, v21.2s
+; CHECK-NEXT:    frintx v20.2s, v20.2s
+; CHECK-NEXT:    fcvtl v3.2d, v3.2s
+; CHECK-NEXT:    fcvtzs v5.2d, v5.2d
 ; CHECK-NEXT:    frintx v0.2s, v0.2s
-; CHECK-NEXT:    mov s16, v16.s[1]
-; CHECK-NEXT:    frintx v21.2s, v21.2s
-; CHECK-NEXT:    fcvtzs x13, s17
-; CHECK-NEXT:    fcvtzs x14, s22
-; CHECK-NEXT:    fcvtzs x9, s4
-; CHECK-NEXT:    fmov d4, x11
-; CHECK-NEXT:    mov v7.d[1], x12
-; CHECK-NEXT:    stp q20, q26, [x8, #192]
-; CHECK-NEXT:    fmov d20, x10
-; CHECK-NEXT:    fcvtzs x10, s3
-; CHECK-NEXT:    stp q18, q25, [x8, #160]
-; CHECK-NEXT:    mov s18, v19.s[1]
-; CHECK-NEXT:    mov s3, v1.s[1]
-; CHECK-NEXT:    mov s17, v0.s[1]
-; CHECK-NEXT:    mov s19, v21.s[1]
-; CHECK-NEXT:    fcvtzs x11, s21
-; CHECK-NEXT:    mov v4.d[1], x9
-; CHECK-NEXT:    fcvtzs x9, s16
-; CHECK-NEXT:    fcvtzs x12, s1
-; CHECK-NEXT:    mov v6.d[1], x13
-; CHECK-NEXT:    fcvtzs x13, s0
-; CHECK-NEXT:    mov v20.d[1], x10
-; CHECK-NEXT:    fcvtzs x15, s18
-; CHECK-NEXT:    fcvtzs x10, s3
-; CHECK-NEXT:    mov v23.d[1], x14
-; CHECK-NEXT:    fcvtzs x14, s17
-; CHECK-NEXT:    fmov d3, x11
-; CHECK-NEXT:    stp q4, q7, [x8, #128]
-; CHECK-NEXT:    mov v5.d[1], x9
-; CHECK-NEXT:    fcvtzs x9, s19
-; CHECK-NEXT:    stp q20, q6, [x8, #96]
-; CHECK-NEXT:    fmov d0, x12
-; CHECK-NEXT:    fmov d1, x13
-; CHECK-NEXT:    mov v2.d[1], x15
-; CHECK-NEXT:    stp q23, q5, [x8, #64]
-; CHECK-NEXT:    mov v0.d[1], x10
-; CHECK-NEXT:    mov v1.d[1], x14
-; CHECK-NEXT:    mov v3.d[1], x9
-; CHECK-NEXT:    stp q2, q0, [x8, #32]
-; CHECK-NEXT:    stp q1, q3, [x8]
+; CHECK-NEXT:    fcvtl v2.2d, v2.2s
+; CHECK-NEXT:    fcvtzs v4.2d, v4.2d
+; CHECK-NEXT:    fcvtzs v16.2d, v16.2d
+; CHECK-NEXT:    fcvtzs v17.2d, v17.2d
+; CHECK-NEXT:    fcvtl v1.2d, v1.2s
+; CHECK-NEXT:    fcvtzs v3.2d, v3.2d
+; CHECK-NEXT:    fcvtl v0.2d, v0.2s
+; CHECK-NEXT:    fcvtzs v2.2d, v2.2d
+; CHECK-NEXT:    stp q6, q17, [x8, #192]
+; CHECK-NEXT:    fcvtl v6.2d, v23.2s
+; CHECK-NEXT:    frintx v17.2s, v19.2s
+; CHECK-NEXT:    stp q7, q16, [x8, #224]
+; CHECK-NEXT:    frintx v7.2s, v22.2s
+; CHECK-NEXT:    fcvtzs v16.2d, v18.2d
+; CHECK-NEXT:    fcvtzs v18.2d, v21.2d
+; CHECK-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-NEXT:    fcvtzs v6.2d, v6.2d
+; CHECK-NEXT:    stp q5, q16, [x8, #160]
+; CHECK-NEXT:    fcvtl v7.2d, v7.2s
+; CHECK-NEXT:    fcvtl v5.2d, v20.2s
+; CHECK-NEXT:    stp q4, q18, [x8, #128]
+; CHECK-NEXT:    fcvtl v4.2d, v17.2s
+; CHECK-NEXT:    stp q3, q6, [x8, #96]
+; CHECK-NEXT:    fcvtzs v7.2d, v7.2d
+; CHECK-NEXT:    fcvtzs v3.2d, v5.2d
+; CHECK-NEXT:    stp q1, q3, [x8, #32]
+; CHECK-NEXT:    stp q2, q7, [x8, #64]
+; CHECK-NEXT:    fcvtzs v2.2d, v4.2d
+; CHECK-NEXT:    stp q0, q2, [x8]
 ; CHECK-NEXT:    ret
   %a = call <32 x i64> @llvm.llrint.v32i64.v32f32(<32 x float> %x)
   ret <32 x i64> %a
diff --git a/llvm/test/CodeGen/AArch64/vector-lrint.ll b/llvm/test/CodeGen/AArch64/vector-lrint.ll
index 44f29f1420fe2..602643264e7be 100644
--- a/llvm/test/CodeGen/AArch64/vector-lrint.ll
+++ b/llvm/test/CodeGen/AArch64/vector-lrint.ll
@@ -784,11 +784,8 @@ define <2 x iXLen> @lrint_v2f32(<2 x float> %x) {
 ; CHECK-i64-LABEL: lrint_v2f32:
 ; CHECK-i64:       // %bb.0:
 ; CHECK-i64-NEXT:    frintx v0.2s, v0.2s
-; CHECK-i64-NEXT:    mov s1, v0.s[1]
-; CHECK-i64-NEXT:    fcvtzs x8, s0
-; CHECK-i64-NEXT:    fcvtzs x9, s1
-; CHECK-i64-NEXT:    fmov d0, x8
-; CHECK-i64-NEXT:    mov v0.d[1], x9
+; CHECK-i64-NEXT:    fcvtl v0.2d, v0.2s
+; CHECK-i64-NEXT:    fcvtzs v0.2d, v0.2d
 ; CHECK-i64-NEXT:    ret
   %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float> %x)
   ret <2 x iXLen> %a
@@ -807,16 +804,10 @@ define <4 x iXLen> @lrint_v4f32(<4 x float> %x) {
 ; CHECK-i64-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
 ; CHECK-i64-NEXT:    frintx v0.2s, v0.2s
 ; CHECK-i64-NEXT:    frintx v1.2s, v1.2s
-; CHECK-i64-NEXT:    mov s2, v0.s[1]
-; CHECK-i64-NEXT:    fcvtzs x8, s0
-; CHECK-i64-NEXT:    mov s3, v1.s[1]
-; CHECK-i64-NEXT:    fcvtzs x9, s1
-; CHECK-i64-NEXT:    fcvtzs x10, s2
-; CHECK-i64-NEXT:    fmov d0, x8
-; CHECK-i64-NEXT:    fcvtzs x11, s3
-; CHECK-i64-NEXT:    fmov d1, x9
-; CHECK-i64-NEXT:    mov v0.d[1], x10
-; CHECK-i64-NEXT:    mov v1.d[1], x11
+; CHECK-i64-NEXT:    fcvtl v0.2d, v0.2s
+; CHECK-i64-NEXT:    fcvtl v1.2d, v1.2s
+; CHECK-i64-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-i64-NEXT:    fcvtzs v1.2d, v1.2d
 ; CHECK-i64-NEXT:    ret
   %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f32(<4 x float> %x)
   ret <4 x iXLen> %a
@@ -840,26 +831,14 @@ define <8 x iXLen> @lrint_v8f32(<8 x float> %x) {
 ; CHECK-i64-NEXT:    frintx v1.2s, v1.2s
 ; CHECK-i64-NEXT:    frintx v2.2s, v2.2s
 ; CHECK-i64-NEXT:    frintx v3.2s, v3.2s
-; CHECK-i64-NEXT:    mov s4, v0.s[1]
-; CHECK-i64-NEXT:    mov s5, v1.s[1]
-; CHECK-i64-NEXT:    fcvtzs x8, s0
-; CHECK-i64-NEXT:    fcvtzs x10, s1
-; CHECK-i64-NEXT:    mov s6, v2.s[1]
-; CHECK-i64-NEXT:    mov s7, v3.s[1]
-; CHECK-i64-NEXT:    fcvtzs x11, s2
-; CHECK-i64-NEXT:    fcvtzs x12, s3
-; CHECK-i64-NEXT:    fcvtzs x9, s4
-; CHECK-i64-NEXT:    fcvtzs x13, s5
-; CHECK-i64-NEXT:    fmov d0, x8
-; CHECK-i64-NEXT:    fmov d2, x10
-; CHECK-i64-NEXT:    fcvtzs x14, s6
-; CHECK-i64-NEXT:    fcvtzs x15, s7
-; CHECK-i64-NEXT:    fmov d1, x11
-; CHECK-i64-NEXT:    fmov d3, x12
-; CHECK-i64-NEXT:    mov v0.d[1], x9
-; CHECK-i64-NEXT:    mov v2.d[1], x13
-; CHECK-i64-NEXT:    mov v1.d[1], x14
-; CHECK-i64-NEXT:    mov v3.d[1], x15
+; CHECK-i64-NEXT:    fcvtl v0.2d, v0.2s
+; CHECK-i64-NEXT:    fcvtl v1.2d, v1.2s
+; CHECK-i64-NEXT:    fcvtl v4.2d, v2.2s
+; CHECK-i64-NEXT:    fcvtl v3.2d, v3.2s
+; CHECK-i64-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-i64-NEXT:    fcvtzs v2.2d, v1.2d
+; CHECK-i64-NEXT:    fcvtzs v1.2d, v4.2d
+; CHECK-i64-NEXT:    fcvtzs v3.2d, v3.2d
 ; CHECK-i64-NEXT:    ret
   %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f32(<8 x float> %x)
   ret <8 x iXLen> %a
@@ -881,58 +860,34 @@ define <16 x iXLen> @lrint_v16f32(<16 x float> %x) {
 ;
 ; CHECK-i64-LABEL: lrint_v16f32:
 ; CHECK-i64:       // %bb.0:
-; CHECK-i64-NEXT:    frintx v4.2s, v0.2s
-; CHECK-i64-NEXT:    frintx v5.2s, v1.2s
-; CHECK-i64-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-i64-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
+; CHECK-i64-NEXT:    ext v4.16b, v1.16b, v1.16b, #8
+; CHECK-i64-NEXT:    ext v5.16b, v0.16b, v0.16b, #8
 ; CHECK-i64-NEXT:    ext v6.16b, v2.16b, v2.16b, #8
 ; CHECK-i64-NEXT:    ext v7.16b, v3.16b, v3.16b, #8
-; CHECK-i64-NEXT:    frintx v2.2s, v2.2s
-; CHECK-i64-NEXT:    frintx v3.2s, v3.2s
-; CHECK-i64-NEXT:    mov s16, v4.s[1]
-; CHECK-i64-NEXT:    mov s17, v5.s[1]
-; CHECK-i64-NEXT:    fcvtzs x8, s4
 ; CHECK-i64-NEXT:    frintx v0.2s, v0.2s
 ; CHECK-i64-NEXT:    frintx v1.2s, v1.2s
-; CHECK-i64-NEXT:    fcvtzs x9, s5
-; CHECK-i64-NEXT:    frintx v4.2s, v6.2s
-; CHECK-i64-NEXT:    frintx v5.2s, v7.2s
-; CHECK-i64-NEXT:    fcvtzs x10, s2
-; CHECK-i64-NEXT:    mov s6, v2.s[1]
-; CHECK-i64-NEXT:    fcvtzs x13, s3
-; CHECK-i64-NEXT:    mov s3, v3.s[1]
-; CHECK-i64-NEXT:    fcvtzs x11, s16
-; CHECK-i64-NEXT:    fcvtzs x12, s17
-; CHECK-i64-NEXT:    mov s7, v0.s[1]
-; CHECK-i64-NEXT:    mov s16, v1.s[1]
-; CHECK-i64-NEXT:    fcvtzs x15, s1
-; CHECK-i64-NEXT:    mov s1, v4.s[1]
-; CHECK-i64-NEXT:    mov s17, v5.s[1]
-; CHECK-i64-NEXT:    fcvtzs x14, s0
-; CHECK-i64-NEXT:    fmov d0, x8
-; CHECK-i64-NEXT:    fcvtzs x8, s4
-; CHECK-i64-NEXT:    fmov d4, x10
-; CHECK-i64-NEXT:    fcvtzs x10, s5
-; CHECK-i64-NEXT:    fmov d2, x9
-; CHECK-i64-NEXT:    fcvtzs x9, s6
-; CHECK-i64-NEXT:    fmov d6, x13
-; CHECK-i64-NEXT:    fcvtzs x13, s7
-; CHECK-i64-NEXT:    fcvtzs x16, s16
-; CHECK-i64-NEXT:    fcvtzs x17, s3
-; CHECK-i64-NEXT:    fcvtzs x18, s1
-; CHECK-i64-NEXT:    fcvtzs x0, s17
-; CHECK-i64-NEXT:    fmov d1, x14
-; CHECK-i64-NEXT:    fmov d3, x15
-; CHECK-i64-NEXT:    fmov d5, x8
-; CHECK-i64-NEXT:    fmov d7, x10
-; CHECK-i64-NEXT:    mov v0.d[1], x11
-; CHECK-i64-NEXT:    mov v2.d[1], x12
-; CHECK-i64-NEXT:    mov v4.d[1], x9
-; CHECK-i64-NEXT:    mov v1.d[1], x13
-; CHECK-i64-NEXT:    mov v3.d[1], x16
-; CHECK-i64-NEXT:    mov v6.d[1], x17
-; CHECK-i64-NEXT:    mov v5.d[1], x18
-; CHECK-i64-NEXT:    mov v7.d[1], x0
+; CHECK-i64-NEXT:    frintx v2.2s, v2.2s
+; CHECK-i64-NEXT:    frintx v3.2s, v3.2s
+; CHECK-i64-NEXT:    frintx v5.2s, v5.2s
+; CHECK-i64-NEXT:    frintx v4.2s, v4.2s
+; CHECK-i64-NEXT:    frintx v6.2s, v6.2s
+; CHECK-i64-NEXT:    frintx v7.2s, v7.2s
+; CHECK-i64-NEXT:    fcvtl v0.2d, v0.2s
+; CHECK-i64-NEXT:    fcvtl v1.2d, v1.2s
+; CHECK-i64-NEXT:    fcvtl v16.2d, v2.2s
+; CHECK-i64-NEXT:    fcvtl v18.2d, v3.2s
+; CHECK-i64-NEXT:    fcvtl v5.2d, v5.2s
+; CHECK-i64-NEXT:    fcvtl v17.2d, v4.2s
+; CHECK-i64-NEXT:    fcvtl v19.2d, v6.2s
+; CHECK-i64-NEXT:    fcvtl v7.2d, v7.2s
+; CHECK-i64-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-i64-NEXT:    fcvtzs v2.2d, v1.2d
+; CHECK-i64-NEXT:    fcvtzs v4.2d, v16.2d
+; CHECK-i64-NEXT:    fcvtzs v6.2d, v18.2d
+; CHECK-i64-NEXT:    fcvtzs v1.2d, v5.2d
+; CHECK-i64-NEXT:    fcvtzs v3.2d, v17.2d
+; CHECK-i64-NEXT:    fcvtzs v5.2d, v19.2d
+; CHECK-i64-NEXT:    fcvtzs v7.2d, v7.2d
 ; CHECK-i64-NEXT:    ret
   %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float> %x)
   ret <16 x iXLen> %a
@@ -962,118 +917,70 @@ define <32 x iXLen> @lrint_v32f32(<32 x float> %x) {
 ;
 ; CHECK-i64-LABEL: lrint_v32f32:
 ; CHECK-i64:       // %bb.0:
-; CHECK-i64-NEXT:    ext v17.16b, v3.16b, v3.16b, #8
-; CHECK-i64-NEXT:    ext v18.16b, v4.16b, v4.16b, #8
-; CHECK-i64-NEXT:    ext v19.16b, v5.16b, v5.16b, #8
-; CHECK-i64-NEXT:    ext v21.16b, v7.16b, v7.16b, #8
-; CHECK-i64-NEXT:    ext v16.16b, v2.16b, v2.16b, #8
-; CHECK-i64-NEXT:    ext v20.16b, v6.16b, v6.16b, #8
+; CHECK-i64-NEXT:    ext v16.16b, v7.16b, v7.16b, #8
+; CHECK-i64-NEXT:    ext v17.16b, v6.16b, v6.16b, #8
 ; CHECK-i64-NEXT:    frintx v7.2s, v7.2s
-; CHECK-i64-NEXT:    frintx v24.2s, v6.2s
-; CHECK-i64-NEXT:    frintx v23.2s, v5.2s
+; CHECK-i64-NEXT:    frintx v6.2s, v6.2s
+; CHECK-i64-NEXT:    ext v18.16b, v5.16b, v5.16b, #8
+; CHECK-i64-NEXT:    ext v21.16b, v4.16b, v4.16b, #8
+; CHECK-i64-NEXT:    ext v22.16b, v2.16b, v2.16b, #8
+; CHECK-i64-NEXT:    frintx v5.2s, v5.2s
+; CHECK-i64-NEXT:    ext v23.16b, v3.16b, v3.16b, #8
 ; CHECK-i64-NEXT:    frintx v4.2s, v4.2s
-; CHECK-i64-NEXT:    frintx v3.2s, v3.2s
+; CHECK-i64-NEXT:    ext v19.16b, v0.16b, v0.16b, #8
+; CHECK-i64-NEXT:    ext v20.16b, v1.16b, v1.16b, #8
+; CHECK-i64-NEXT:    frintx v16.2s, v16.2s
 ; CHECK-i64-NEXT:    frintx v17.2s, v17.2s
+; CHECK-i64-NEXT:    fcvtl v7.2d, v7.2s
+; CHECK-i64-NEXT:    fcvtl v6.2d, v6.2s
 ; CHECK-i64-NEXT:    frintx v18.2s, v18.2s
-; CHECK-i64-NEXT:    frintx v22.2s, v19.2s
 ; CHECK-i64-NEXT:    frintx v21.2s, v21.2s
-; CHECK-i64-NEXT:    frintx v16.2s, v16.2s
-; CHECK-i64-NEXT:    frintx v20.2s, v20.2s
-; CHECK-i64-NEXT:    mov s25, v7.s[1]
-; CHECK-i64-NEXT:    fcvtzs x15, s7
-; CHECK-i64-NEXT:    frintx v19.2s, v1.2s
-; CHECK-i64-NEXT:    fcvtzs x16, s24
-; CHECK-i64-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-i64-NEXT:    fcvtzs x10, s17
-; CHECK-i64-NEXT:    fcvtzs x11, s18
-; CHECK-i64-NEXT:    mov s26, v22.s[1]
-; CHECK-i64-NEXT:    fcvtzs x12, s22
-; CHECK-i64-NEXT:    mov s22, v21.s[1]
-; CHECK-i64-NEXT:    fcvtzs x14, s21
-; CHECK-i64-NEXT:    mov s21, v24.s[1]
-; CHECK-i64-NEXT:    fcvtzs x9, s16
-; CHECK-i64-NEXT:    fcvtzs x13, s20
-; CHECK-i64-NEXT:    mov s20, v20.s[1]
-; CHECK-i64-NEXT:    fmov d24, x15
-; CHECK-i64-NEXT:    mov s18, v18.s[1]
-; CHECK-i64-NEXT:    fmov d6, x10
-; CHECK-i64-NEXT:    fmov d7, x11
-; CHECK-i64-NEXT:    fcvtzs x10, s25
-; CHECK-i64-NEXT:    fcvtzs x11, s22
-; CHECK-i64-NEXT:    fmov d25, x12
-; CHECK-i64-NEXT:    frintx v22.2s, v2.2s
-; CHECK-i64-NEXT:    fcvtzs x15, s21
-; CHECK-i64-NEXT:    fmov d21, x14
-; CHECK-i64-NEXT:    fmov d5, x9
-; CHECK-i64-NEXT:    fcvtzs x9, s26
-; CHECK-i64-NEXT:    fmov d26, x13
-; CHECK-i64-NEXT:    fcvtzs x12, s20
-; CHECK-i64-NEXT:    fcvtzs x13, s19
-; CHECK-i64-NEXT:    mov s20, v23.s[1]
-; CHECK-i64-NEXT:    mov v24.d[1], x10
-; CHECK-i64-NEXT:    mov v21.d[1], x11
-; CHECK-i64-NEXT:    fcvtzs x11, s23
-; CHECK-i64-NEXT:    fcvtzs x10, s22
-; CHECK-i64-NEXT:    mov s17, v17.s[1]
+; CHECK-i64-NEXT:    frintx v2.2s, v2.2s
+; CHECK-i64-NEXT:    frintx v3.2s, v3.2s
+; CHECK-i64-NEXT:    fcvtl v5.2d, v5.2s
+; CHECK-i64-NEXT:    frintx v23.2s, v23.2s
+; CHECK-i64-NEXT:    fcvtl v4.2d, v4.2s
 ; CHECK-i64-NEXT:    frintx v1.2s, v1.2s
-; CHECK-i64-NEXT:    mov s22, v22.s[1]
-; CHECK-i64-NEXT:    mov v26.d[1], x12
-; CHECK-i64-NEXT:    fcvtzs x12, s18
-; CHECK-i64-NEXT:    mov v25.d[1], x9
-; CHECK-i64-NEXT:    fmov d2, x13
-; CHECK-i64-NEXT:    fcvtzs x13, s20
-; CHECK-i64-NEXT:    fmov d20, x16
-; CHECK-i64-NEXT:    stp q24, q21, [x8, #224]
-; CHECK-i64-NEXT:    ext v21.16b, v0.16b, v0.16b, #8
-; CHECK-i64-NEXT:    fmov d18, x11
-; CHECK-i64-NEXT:    fcvtzs x11, s4
-; CHECK-i64-NEXT:    mov s4, v4.s[1]
-; CHECK-i64-NEXT:    fmov d23, x10
-; CHECK-i64-NEXT:    mov v20.d[1], x15
-; CHECK-i64-NEXT:    fcvtzs x10, s3
-; CHECK-i64-NEXT:    mov s3, v3.s[1]
-; CHECK-i64-NEXT:    mov v18.d[1], x13
+; CHECK-i64-NEXT:    fcvtl v16.2d, v16.2s
+; CHECK-i64-NEXT:    fcvtl v17.2d, v17.2s
+; CHECK-i64-NEXT:    fcvtzs v7.2d, v7.2d
+; CHECK-i64-NEXT:    fcvtzs v6.2d, v6.2d
+; CHECK-i64-NEXT:    fcvtl v18.2d, v18.2s
+; CHECK-i64-NEXT:    fcvtl v21.2d, v21.2s
+; CHECK-i64-NEXT:    frintx v20.2s, v20.2s
+; CHECK-i64-NEXT:    fcvtl v3.2d, v3.2s
+; CHECK-i64-NEXT:    fcvtzs v5.2d, v5.2d
 ; CHECK-i64-NEXT:    frintx v0.2s, v0.2s
-; CHECK-i64-NEXT:    mov s16, v16.s[1]
-; CHECK-i64-NEXT:    frintx v21.2s, v21.2s
-; CHECK-i64-NEXT:    fcvtzs x13, s17
-; CHECK-i64-NEXT:    fcvtzs x14, s22
-; CHECK-i64-NEXT:    fcvtzs x9, s4
-; CHECK-i64-NEXT:    fmov d4, x11
-; CHECK-i64-NEXT:    mov v7.d[1], x12
-; CHECK-i64-NEXT:    stp q20, q26, [x8, #192]
-; CHECK-i64-NEXT:    fmov d20, x10
-; CHECK-i64-NEXT:    fcvtzs x10, s3
-; CHECK-i64-NEXT:    stp q18, q25, [x8, #160]
-; CHECK-i64-NEXT:    mov s18, v19.s[1]
-; CHECK-i64-NEXT:    mov s3, v1.s[1]
-; CHECK-i64-NEXT:    mov s17, v0.s[1]
-; CHECK-i64-NEXT:    mov s19, v21.s[1]
-; CHECK-i64-NEXT:    fcvtzs x11, s21
-; CHECK-i64-NEXT:    mov v4.d[1], x9
-; CHECK-i64-NEXT:    fcvtzs x9, s16
-; CHECK-i64-NEXT:    fcvtzs x12, s1
-; CHECK-i64-NEXT:    mov v6.d[1], x13
-; CHECK-i64-NEXT:    fcvtzs x13, s0
-; CHECK-i64-NEXT:    mov v20.d[1], x10
-; CHECK-i64-NEXT:    fcvtzs x15, s18
-; CHECK-i64-NEXT:    fcvtzs x10, s3
-; CHECK-i64-NEXT:    mov v23.d[1], x14
-; CHECK-i64-NEXT:    fcvtzs x14, s17
-; CHECK-i64-NEXT:    fmov d3, x11
-; CHECK-i64-NEXT:    stp q4, q7, [x8, #128]
-; CHECK-i64-NEXT:    mov v5.d[1], x9
-; CHECK-i64-NEXT:    fcvtzs x9, s19
-; CHECK-i64-NEXT:    stp q20, q6, [x8, #96]
-; CHECK-i64-NEXT:    fmov d0, x12
-; CHECK-i64-NEXT:    fmov d1, x13
-; CHECK-i64-NEXT:    mov v2.d[1], x15
-; CHECK-i64-NEXT:    stp q23, q5, [x8, #64]
-; CHECK-i64-NEXT:    mov v0.d[1], x10
-; CHECK-i64-NEXT:    mov v1.d[1], x14
-; CHECK-i64-NEXT:    mov v3.d[1], x9
-; CHECK-i64-NEXT:    stp q2, q0, [x8, #32]
-; CHECK-i64-NEXT:    stp q1, q3, [x8]
+; CHECK-i64-NEXT:    fcvtl v2.2d, v2.2s
+; CHECK-i64-NEXT:    fcvtzs v4.2d, v4.2d
+; CHECK-i64-NEXT:    fcvtzs v16.2d, v16.2d
+; CHECK-i64-NEXT:    fcvtzs v17.2d, v17.2d
+; CHECK-i64-NEXT:    fcvtl v1.2d, v1.2s
+; CHECK-i64-NEXT:    fcvtzs v3.2d, v3.2d
+; CHECK-i64-NEXT:    fcvtl v0.2d, v0.2s
+; CHECK-i64-NEXT:    fcvtzs v2.2d, v2.2d
+; CHECK-i64-NEXT:    stp q6, q17, [x8, #192]
+; CHECK-i64-NEXT:    fcvtl v6.2d, v23.2s
+; CHECK-i64-NEXT:    frintx v17.2s, v19.2s
+; CHECK-i64-NEXT:    stp q7, q16, [x8, #224]
+; CHECK-i64-NEXT:    frintx v7.2s, v22.2s
+; CHECK-i64-NEXT:    fcvtzs v16.2d, v18.2d
+; CHECK-i64-NEXT:    fcvtzs v18.2d, v21.2d
+; CHECK-i64-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-i64-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-i64-NEXT:    fcvtzs v6.2d, v6.2d
+; CHECK-i64-NEXT:    stp q5, q16, [x8, #160]
+; CHECK-i64-NEXT:    fcvtl v7.2d, v7.2s
+; CHECK-i64-NEXT:    fcvtl v5.2d, v20.2s
+; CHECK-i64-NEXT:    stp q4, q18, [x8, #128]
+; CHECK-i64-NEXT:    fcvtl v4.2d, v17.2s
+; CHECK-i64-NEXT:    stp q3, q6, [x8, #96]
+; CHECK-i64-NEXT:    fcvtzs v7.2d, v7.2d
+; CHECK-i64-NEXT:    fcvtzs v3.2d, v5.2d
+; CHECK-i64-NEXT:    stp q1, q3, [x8, #32]
+; CHECK-i64-NEXT:    stp q2, q7, [x8, #64]
+; CHECK-i64-NEXT:    fcvtzs v2.2d, v4.2d
+; CHECK-i64-NEXT:    stp q0, q2, [x8]
 ; CHECK-i64-NEXT:    ret
   %a = call <32 x iXLen> @llvm.lrint.v32iXLen.v32f32(<32 x float> %x)
   ret <32 x iXLen> %a



More information about the llvm-commits mailing list