[llvm] [PowerPC] Change `half` to use soft promotion rather than `PromoteFloat` (PR #152632)
Trevor Gross via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 8 06:16:27 PDT 2025
https://github.com/tgross35 updated https://github.com/llvm/llvm-project/pull/152632
>From b8b124602fd17c4fa5dc898a843171d161afe524 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross at umich.edu>
Date: Fri, 8 Aug 2025 03:15:39 -0500
Subject: [PATCH 1/4] [Test] Add and update tests for lrint
A number of backends are missing either all tests for lrint, or
specifically those for f16 which currently crashes for `softPromoteHalf`
targets. For a number of popular backends, do the following:
* Ensure f16, f32, f64, and f128 are all covered
* Ensure both a 32- and 64-bit target are tested, if relevant
* Add `nounwind` to clean up CFI output
* Add a test covering the above if one did not exist
Squashed from commits on other branches:
Update existing vector tests
Add f128 to vector tests
Add a vector test to arm
nounwind for vector tests since cfi directives are causing CI failures
---
.../AArch64/sve-fixed-vector-llrint.ll | 682 +-
.../CodeGen/AArch64/sve-fixed-vector-lrint.ll | 1135 +-
llvm/test/CodeGen/AArch64/vector-llrint.ll | 537 +-
llvm/test/CodeGen/AArch64/vector-lrint.ll | 955 +-
llvm/test/CodeGen/ARM/llrint-conv.ll | 21 +
llvm/test/CodeGen/ARM/lrint-conv.ll | 18 +
llvm/test/CodeGen/ARM/vector-llrint.ll | 11126 +++++++++++++
llvm/test/CodeGen/ARM/vector-lrint.ll | 13251 ++++++++++++++++
llvm/test/CodeGen/AVR/llrint.ll | 18 +
llvm/test/CodeGen/AVR/lrint.ll | 18 +
llvm/test/CodeGen/LoongArch/lrint-conv.ll | 96 +
llvm/test/CodeGen/MSP430/lrint-conv.ll | 60 +
llvm/test/CodeGen/Mips/llrint-conv.ll | 15 +
llvm/test/CodeGen/Mips/lrint-conv.ll | 15 +
llvm/test/CodeGen/PowerPC/llrint-conv.ll | 32 +
llvm/test/CodeGen/PowerPC/lrint-conv.ll | 32 +
llvm/test/CodeGen/PowerPC/vector-llrint.ll | 1252 +-
llvm/test/CodeGen/PowerPC/vector-lrint.ll | 2897 +++-
llvm/test/CodeGen/RISCV/lrint-conv.ll | 76 +
llvm/test/CodeGen/SPARC/lrint-conv.ll | 68 +
llvm/test/CodeGen/WebAssembly/lrint-conv.ll | 62 +
llvm/test/CodeGen/X86/llrint-conv.ll | 128 +-
llvm/test/CodeGen/X86/lrint-conv-i32.ll | 74 +-
llvm/test/CodeGen/X86/lrint-conv-i64.ll | 34 +-
llvm/test/CodeGen/X86/vector-llrint-f16.ll | 15 +-
llvm/test/CodeGen/X86/vector-llrint.ll | 1188 +-
llvm/test/CodeGen/X86/vector-lrint-f16.ll | 12 +-
llvm/test/CodeGen/X86/vector-lrint.ll | 1714 +-
28 files changed, 34257 insertions(+), 1274 deletions(-)
create mode 100644 llvm/test/CodeGen/ARM/vector-llrint.ll
create mode 100644 llvm/test/CodeGen/ARM/vector-lrint.ll
create mode 100644 llvm/test/CodeGen/LoongArch/lrint-conv.ll
create mode 100644 llvm/test/CodeGen/MSP430/lrint-conv.ll
create mode 100644 llvm/test/CodeGen/RISCV/lrint-conv.ll
create mode 100644 llvm/test/CodeGen/SPARC/lrint-conv.ll
create mode 100644 llvm/test/CodeGen/WebAssembly/lrint-conv.ll
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll b/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll
index 7f144df499be0..38ba9240d15b4 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=aarch64 -mattr=+sve -aarch64-sve-vector-bits-min=256 | FileCheck %s
-define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) {
+define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) nounwind {
; CHECK-LABEL: llrint_v1i64_v1f16:
; CHECK: // %bb.0:
; CHECK-NEXT: frintx h0, h0
@@ -13,7 +13,7 @@ define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) {
}
declare <1 x i64> @llvm.llrint.v1i64.v1f16(<1 x half>)
-define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) {
+define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) nounwind {
; CHECK-LABEL: llrint_v1i64_v2f16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
@@ -30,7 +30,7 @@ define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) {
}
declare <2 x i64> @llvm.llrint.v2i64.v2f16(<2 x half>)
-define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) {
+define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) nounwind {
; CHECK-LABEL: llrint_v4i64_v4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: frintx v0.4h, v0.4h
@@ -51,7 +51,7 @@ define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) {
}
declare <4 x i64> @llvm.llrint.v4i64.v4f16(<4 x half>)
-define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) {
+define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) nounwind {
; CHECK-LABEL: llrint_v8i64_v8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
@@ -85,7 +85,7 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) {
}
declare <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half>)
-define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) {
+define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) nounwind {
; CHECK-LABEL: llrint_v16i64_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ext v2.16b, v1.16b, v1.16b, #8
@@ -144,16 +144,13 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) {
}
declare <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half>)
-define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
+define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) nounwind {
; CHECK-LABEL: llrint_v32i64_v32f16:
; CHECK: // %bb.0:
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK-NEXT: sub x9, sp, #272
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: and sp, x9, #0xffffffffffffffe0
-; CHECK-NEXT: .cfi_def_cfa w29, 16
-; CHECK-NEXT: .cfi_offset w30, -8
-; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: frintx v5.4h, v0.4h
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: ext v4.16b, v1.16b, v1.16b, #8
@@ -278,7 +275,7 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
}
declare <32 x i64> @llvm.llrint.v32i64.v32f16(<32 x half>)
-define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) {
+define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) nounwind {
; CHECK-LABEL: llrint_v1i64_v1f32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
@@ -291,7 +288,7 @@ define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) {
}
declare <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float>)
-define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) {
+define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) nounwind {
; CHECK-LABEL: llrint_v2i64_v2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: frintx v0.2s, v0.2s
@@ -303,7 +300,7 @@ define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) {
}
declare <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float>)
-define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) {
+define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) nounwind {
; CHECK-LABEL: llrint_v4i64_v4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: frintx v0.4s, v0.4s
@@ -324,7 +321,7 @@ define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) {
}
declare <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float>)
-define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) {
+define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) nounwind {
; CHECK-LABEL: llrint_v8i64_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: frintx v0.4s, v0.4s
@@ -357,7 +354,7 @@ define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) {
}
declare <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float>)
-define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) {
+define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) nounwind {
; CHECK-LABEL: llrint_v16i64_v16f32:
; CHECK: // %bb.0:
; CHECK-NEXT: frintx v3.4s, v3.4s
@@ -414,16 +411,13 @@ define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) {
}
declare <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float>)
-define <32 x i64> @llrint_v32i64_v32f32(<32 x float> %x) {
+define <32 x i64> @llrint_v32i64_v32f32(<32 x float> %x) nounwind {
; CHECK-LABEL: llrint_v32i64_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK-NEXT: sub x9, sp, #272
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: and sp, x9, #0xffffffffffffffe0
-; CHECK-NEXT: .cfi_def_cfa w29, 16
-; CHECK-NEXT: .cfi_offset w30, -8
-; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: frintx v0.4s, v0.4s
; CHECK-NEXT: frintx v1.4s, v1.4s
; CHECK-NEXT: frintx v2.4s, v2.4s
@@ -544,7 +538,7 @@ define <32 x i64> @llrint_v32i64_v32f32(<32 x float> %x) {
}
declare <32 x i64> @llvm.llrint.v32i64.v32f32(<32 x float>)
-define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) {
+define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) nounwind {
; CHECK-LABEL: llrint_v1i64_v1f64:
; CHECK: // %bb.0:
; CHECK-NEXT: frintx d0, d0
@@ -556,7 +550,7 @@ define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) {
}
declare <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double>)
-define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) {
+define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) nounwind {
; CHECK-LABEL: llrint_v2i64_v2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: frintx v0.2d, v0.2d
@@ -567,7 +561,7 @@ define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) {
}
declare <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double>)
-define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) {
+define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) nounwind {
; CHECK-LABEL: llrint_v4i64_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl2
@@ -593,7 +587,7 @@ define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) {
}
declare <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double>)
-define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) {
+define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) nounwind {
; CHECK-LABEL: llrint_v8i64_v8f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl2
@@ -635,7 +629,7 @@ define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) {
}
declare <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double>)
-define <16 x i64> @llrint_v16f64(<16 x double> %x) {
+define <16 x i64> @llrint_v16f64(<16 x double> %x) nounwind {
; CHECK-LABEL: llrint_v16f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.d, vl2
@@ -708,16 +702,13 @@ define <16 x i64> @llrint_v16f64(<16 x double> %x) {
}
declare <16 x i64> @llvm.llrint.v16i64.v16f64(<16 x double>)
-define <32 x i64> @llrint_v32f64(<32 x double> %x) {
+define <32 x i64> @llrint_v32f64(<32 x double> %x) nounwind {
; CHECK-LABEL: llrint_v32f64:
; CHECK: // %bb.0:
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK-NEXT: sub x9, sp, #272
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: and sp, x9, #0xffffffffffffffe0
-; CHECK-NEXT: .cfi_def_cfa w29, 16
-; CHECK-NEXT: .cfi_offset w30, -8
-; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: ptrue p1.d, vl2
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
@@ -861,3 +852,640 @@ define <32 x i64> @llrint_v32f64(<32 x double> %x) {
ret <32 x i64> %a
}
declare <32 x i64> @llvm.llrint.v32i64.v32f64(<32 x double>)
+
+define <1 x i64> @llrint_v1i64_v1fp128(<1 x fp128> %x) nounwind {
+; CHECK-LABEL: llrint_v1i64_v1fp128:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %a = call <1 x i64> @llvm.llrint.v1i64.v1fp128(<1 x fp128> %x)
+ ret <1 x i64> %a
+}
+declare <1 x i64> @llvm.llrint.v1i64.v1fp128(<1 x fp128>)
+
+define <2 x i64> @llrint_v2i64_v2fp128(<2 x fp128> %x) nounwind {
+; CHECK-LABEL: llrint_v2i64_v2fp128:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #48
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: mov v0.16b, v1.16b
+; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: add sp, sp, #48
+; CHECK-NEXT: ret
+ %a = call <2 x i64> @llvm.llrint.v2i64.v2fp128(<2 x fp128> %x)
+ ret <2 x i64> %a
+}
+declare <2 x i64> @llvm.llrint.v2i64.v2fp128(<2 x fp128>)
+
+define <4 x i64> @llrint_v4i64_v4fp128(<4 x fp128> %x) nounwind {
+; CHECK-LABEL: llrint_v4i64_v4fp128:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT: sub sp, sp, #64
+; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: str q0, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: mov v0.16b, v3.16b
+; CHECK-NEXT: stp q2, q1, [sp, #16] // 32-byte Folded Spill
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x8, sp, #64
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: add x8, sp, #64
+; CHECK-NEXT: ptrue p0.d, vl2
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: ldr z1, [x8] // 16-byte Folded Reload
+; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: mov z1.d, z0.d
+; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
+; CHECK-NEXT: addvl sp, sp, #1
+; CHECK-NEXT: add sp, sp, #64
+; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT: ret
+ %a = call <4 x i64> @llvm.llrint.v4i64.v4fp128(<4 x fp128> %x)
+ ret <4 x i64> %a
+}
+declare <4 x i64> @llvm.llrint.v4i64.v4fp128(<4 x fp128>)
+
+define <8 x i64> @llrint_v8i64_v8fp128(<8 x fp128> %x) nounwind {
+; CHECK-LABEL: llrint_v8i64_v8fp128:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT: sub sp, sp, #128
+; CHECK-NEXT: addvl sp, sp, #-2
+; CHECK-NEXT: str q0, [sp, #112] // 16-byte Folded Spill
+; CHECK-NEXT: mov v0.16b, v7.16b
+; CHECK-NEXT: stp q6, q5, [sp, #16] // 32-byte Folded Spill
+; CHECK-NEXT: stp q4, q3, [sp, #48] // 32-byte Folded Spill
+; CHECK-NEXT: stp q2, q1, [sp, #80] // 32-byte Folded Spill
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x8, sp, #128
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: add x8, sp, #128
+; CHECK-NEXT: ptrue p0.d, vl2
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: ldr z1, [x8, #1, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: add x8, sp, #128
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #96] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #96] // 16-byte Folded Reload
+; CHECK-NEXT: add x8, sp, #128
+; CHECK-NEXT: ptrue p0.d, vl2
+; CHECK-NEXT: ldr z2, [x8, #1, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: ldr z1, [x8] // 16-byte Folded Reload
+; CHECK-NEXT: mov z3.d, z2.d
+; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: ext z3.b, z3.b, z2.b, #16
+; CHECK-NEXT: // kill: def $q2 killed $q2 killed $z2
+; CHECK-NEXT: // kill: def $q3 killed $q3 killed $z3
+; CHECK-NEXT: mov z1.d, z0.d
+; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
+; CHECK-NEXT: addvl sp, sp, #2
+; CHECK-NEXT: add sp, sp, #128
+; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT: ret
+ %a = call <8 x i64> @llvm.llrint.v8i64.v8fp128(<8 x fp128> %x)
+ ret <8 x i64> %a
+}
+declare <8 x i64> @llvm.llrint.v8i64.v8fp128(<8 x fp128>)
+
+define <16 x i64> @llrint_v16fp128(<16 x fp128> %x) nounwind {
+; CHECK-LABEL: llrint_v16fp128:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT: sub sp, sp, #256
+; CHECK-NEXT: addvl sp, sp, #-4
+; CHECK-NEXT: addvl x8, sp, #4
+; CHECK-NEXT: str q1, [sp, #240] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q1, [x8, #272]
+; CHECK-NEXT: addvl x8, sp, #4
+; CHECK-NEXT: str q0, [sp, #224] // 16-byte Folded Spill
+; CHECK-NEXT: stp q7, q6, [sp, #128] // 32-byte Folded Spill
+; CHECK-NEXT: str q1, [sp, #112] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q1, [x8, #288]
+; CHECK-NEXT: addvl x8, sp, #4
+; CHECK-NEXT: stp q5, q4, [sp, #160] // 32-byte Folded Spill
+; CHECK-NEXT: str q1, [sp, #96] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q1, [x8, #304]
+; CHECK-NEXT: addvl x8, sp, #4
+; CHECK-NEXT: stp q3, q2, [sp, #192] // 32-byte Folded Spill
+; CHECK-NEXT: str q1, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q1, [x8, #320]
+; CHECK-NEXT: addvl x8, sp, #4
+; CHECK-NEXT: str q1, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q1, [x8, #336]
+; CHECK-NEXT: addvl x8, sp, #4
+; CHECK-NEXT: str q1, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q1, [x8, #352]
+; CHECK-NEXT: addvl x8, sp, #4
+; CHECK-NEXT: str q1, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q1, [x8, #368]
+; CHECK-NEXT: addvl x8, sp, #4
+; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q1, [x8, #384]
+; CHECK-NEXT: mov v0.16b, v1.16b
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x8, sp, #256
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: str z0, [x8, #3, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: add x8, sp, #256
+; CHECK-NEXT: ptrue p0.d, vl2
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: ldr z1, [x8, #3, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: str z0, [x8, #3, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: add x8, sp, #256
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: str z0, [x8, #2, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #96] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #96] // 16-byte Folded Reload
+; CHECK-NEXT: add x8, sp, #256
+; CHECK-NEXT: ptrue p0.d, vl2
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: ldr z1, [x8, #2, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: str z0, [x8, #2, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #128] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #128] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #128] // 16-byte Folded Reload
+; CHECK-NEXT: add x8, sp, #256
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #160] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #176] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #160] // 16-byte Folded Reload
+; CHECK-NEXT: add x8, sp, #256
+; CHECK-NEXT: ptrue p0.d, vl2
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: ldr z1, [x8, #1, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #192] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #192] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #208] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #192] // 16-byte Folded Reload
+; CHECK-NEXT: add x8, sp, #256
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #240] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #240] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #240] // 16-byte Folded Reload
+; CHECK-NEXT: add x8, sp, #256
+; CHECK-NEXT: ptrue p0.d, vl2
+; CHECK-NEXT: ldr z2, [x8, #1, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z4, [x8, #2, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z6, [x8, #3, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: ldr z1, [x8] // 16-byte Folded Reload
+; CHECK-NEXT: mov z3.d, z2.d
+; CHECK-NEXT: mov z5.d, z4.d
+; CHECK-NEXT: mov z7.d, z6.d
+; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: ext z3.b, z3.b, z2.b, #16
+; CHECK-NEXT: ext z5.b, z5.b, z4.b, #16
+; CHECK-NEXT: ext z7.b, z7.b, z6.b, #16
+; CHECK-NEXT: // kill: def $q2 killed $q2 killed $z2
+; CHECK-NEXT: // kill: def $q4 killed $q4 killed $z4
+; CHECK-NEXT: // kill: def $q3 killed $q3 killed $z3
+; CHECK-NEXT: // kill: def $q5 killed $q5 killed $z5
+; CHECK-NEXT: // kill: def $q6 killed $q6 killed $z6
+; CHECK-NEXT: // kill: def $q7 killed $q7 killed $z7
+; CHECK-NEXT: mov z1.d, z0.d
+; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
+; CHECK-NEXT: addvl sp, sp, #4
+; CHECK-NEXT: add sp, sp, #256
+; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT: ret
+ %a = call <16 x i64> @llvm.llrint.v16i64.v16fp128(<16 x fp128> %x)
+ ret <16 x i64> %a
+}
+declare <16 x i64> @llvm.llrint.v16i64.v16fp128(<16 x fp128>)
+
+define <32 x i64> @llrint_v32fp128(<32 x fp128> %x) nounwind {
+; CHECK-LABEL: llrint_v32fp128:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: sub sp, sp, #512
+; CHECK-NEXT: addvl sp, sp, #-8
+; CHECK-NEXT: addvl x9, sp, #8
+; CHECK-NEXT: stp q2, q1, [sp, #16] // 32-byte Folded Spill
+; CHECK-NEXT: mov x19, x8
+; CHECK-NEXT: stp q0, q7, [sp, #48] // 32-byte Folded Spill
+; CHECK-NEXT: ldr q0, [x9, #864]
+; CHECK-NEXT: addvl x9, sp, #8
+; CHECK-NEXT: str q6, [sp, #96] // 16-byte Folded Spill
+; CHECK-NEXT: str q0, [sp, #496] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [x9, #880]
+; CHECK-NEXT: addvl x9, sp, #8
+; CHECK-NEXT: stp q5, q4, [sp, #128] // 32-byte Folded Spill
+; CHECK-NEXT: str q0, [sp, #480] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [x9, #896]
+; CHECK-NEXT: addvl x9, sp, #8
+; CHECK-NEXT: str q0, [sp, #464] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [x9, #912]
+; CHECK-NEXT: addvl x9, sp, #8
+; CHECK-NEXT: str q0, [sp, #448] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [x9, #800]
+; CHECK-NEXT: addvl x9, sp, #8
+; CHECK-NEXT: str q0, [sp, #432] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [x9, #816]
+; CHECK-NEXT: addvl x9, sp, #8
+; CHECK-NEXT: str q0, [sp, #416] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [x9, #832]
+; CHECK-NEXT: addvl x9, sp, #8
+; CHECK-NEXT: str q0, [sp, #400] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [x9, #848]
+; CHECK-NEXT: addvl x9, sp, #8
+; CHECK-NEXT: str q0, [sp, #384] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [x9, #736]
+; CHECK-NEXT: addvl x9, sp, #8
+; CHECK-NEXT: str q0, [sp, #368] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [x9, #752]
+; CHECK-NEXT: addvl x9, sp, #8
+; CHECK-NEXT: str q0, [sp, #352] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [x9, #768]
+; CHECK-NEXT: addvl x9, sp, #8
+; CHECK-NEXT: str q0, [sp, #336] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [x9, #784]
+; CHECK-NEXT: addvl x9, sp, #8
+; CHECK-NEXT: str q0, [sp, #320] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [x9, #672]
+; CHECK-NEXT: addvl x9, sp, #8
+; CHECK-NEXT: str q0, [sp, #304] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [x9, #688]
+; CHECK-NEXT: addvl x9, sp, #8
+; CHECK-NEXT: str q0, [sp, #288] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [x9, #704]
+; CHECK-NEXT: addvl x9, sp, #8
+; CHECK-NEXT: str q0, [sp, #272] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [x9, #720]
+; CHECK-NEXT: addvl x9, sp, #8
+; CHECK-NEXT: str q0, [sp, #256] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [x9, #608]
+; CHECK-NEXT: addvl x9, sp, #8
+; CHECK-NEXT: str q0, [sp, #240] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [x9, #624]
+; CHECK-NEXT: addvl x9, sp, #8
+; CHECK-NEXT: str q0, [sp, #224] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [x9, #640]
+; CHECK-NEXT: addvl x9, sp, #8
+; CHECK-NEXT: str q0, [sp, #208] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [x9, #656]
+; CHECK-NEXT: addvl x9, sp, #8
+; CHECK-NEXT: str q0, [sp, #192] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [x9, #544]
+; CHECK-NEXT: addvl x9, sp, #8
+; CHECK-NEXT: str q0, [sp, #176] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [x9, #560]
+; CHECK-NEXT: addvl x9, sp, #8
+; CHECK-NEXT: str q0, [sp, #160] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [x9, #576]
+; CHECK-NEXT: addvl x9, sp, #8
+; CHECK-NEXT: str q0, [sp, #112] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [x9, #592]
+; CHECK-NEXT: str q0, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: mov v0.16b, v3.16b
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x8, sp, #512
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: str z0, [x8, #7, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: add x8, sp, #512
+; CHECK-NEXT: ptrue p0.d, vl2
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: ldr z1, [x8, #7, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: str z0, [x8, #7, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: add x8, sp, #512
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: str z0, [x8, #6, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #128] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #128] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #128] // 16-byte Folded Reload
+; CHECK-NEXT: add x8, sp, #512
+; CHECK-NEXT: ptrue p0.d, vl2
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: ldr z1, [x8, #6, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: str z0, [x8, #6, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #144] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #144] // 16-byte Folded Reload
+; CHECK-NEXT: add x8, sp, #512
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: str z0, [x8, #5, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #160] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #176] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #160] // 16-byte Folded Reload
+; CHECK-NEXT: add x8, sp, #512
+; CHECK-NEXT: ptrue p0.d, vl2
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: ldr z1, [x8, #5, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: str z0, [x8, #5, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #192] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #192] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #208] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #192] // 16-byte Folded Reload
+; CHECK-NEXT: add x8, sp, #512
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: str z0, [x8, #4, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #224] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #240] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #224] // 16-byte Folded Reload
+; CHECK-NEXT: add x8, sp, #512
+; CHECK-NEXT: ptrue p0.d, vl2
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: ldr z1, [x8, #4, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: str z0, [x8, #4, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #256] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #256] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #272] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #256] // 16-byte Folded Reload
+; CHECK-NEXT: add x8, sp, #512
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: str z0, [x8, #3, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #288] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #288] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #304] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #288] // 16-byte Folded Reload
+; CHECK-NEXT: add x8, sp, #512
+; CHECK-NEXT: ptrue p0.d, vl2
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: ldr z1, [x8, #3, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: str z0, [x8, #3, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #320] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #320] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #336] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #320] // 16-byte Folded Reload
+; CHECK-NEXT: add x8, sp, #512
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: str z0, [x8, #2, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #352] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #352] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #368] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #352] // 16-byte Folded Reload
+; CHECK-NEXT: add x8, sp, #512
+; CHECK-NEXT: ptrue p0.d, vl2
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: ldr z1, [x8, #2, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: str z0, [x8, #2, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #384] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #384] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #400] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #384] // 16-byte Folded Reload
+; CHECK-NEXT: add x8, sp, #512
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #416] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #416] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #432] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #416] // 16-byte Folded Reload
+; CHECK-NEXT: add x8, sp, #512
+; CHECK-NEXT: ptrue p0.d, vl2
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: ldr z1, [x8, #1, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #448] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #448] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #464] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #448] // 16-byte Folded Reload
+; CHECK-NEXT: add x8, sp, #512
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #480] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #480] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #496] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #480] // 16-byte Folded Reload
+; CHECK-NEXT: add x8, sp, #512
+; CHECK-NEXT: ptrue p0.d, vl2
+; CHECK-NEXT: add x9, sp, #512
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: ldr z1, [x8] // 16-byte Folded Reload
+; CHECK-NEXT: mov x8, #28 // =0x1c
+; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: ptrue p0.d, vl4
+; CHECK-NEXT: st1d { z0.d }, p0, [x19, x8, lsl #3]
+; CHECK-NEXT: mov x8, #24 // =0x18
+; CHECK-NEXT: ldr z0, [x9, #1, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: st1d { z0.d }, p0, [x19, x8, lsl #3]
+; CHECK-NEXT: mov x8, #20 // =0x14
+; CHECK-NEXT: ldr z0, [x9, #2, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: st1d { z0.d }, p0, [x19, x8, lsl #3]
+; CHECK-NEXT: mov x8, #16 // =0x10
+; CHECK-NEXT: ldr z0, [x9, #3, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: st1d { z0.d }, p0, [x19, x8, lsl #3]
+; CHECK-NEXT: mov x8, #12 // =0xc
+; CHECK-NEXT: ldr z0, [x9, #4, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: st1d { z0.d }, p0, [x19, x8, lsl #3]
+; CHECK-NEXT: mov x8, #8 // =0x8
+; CHECK-NEXT: ldr z0, [x9, #5, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: st1d { z0.d }, p0, [x19, x8, lsl #3]
+; CHECK-NEXT: mov x8, #4 // =0x4
+; CHECK-NEXT: ldr z0, [x9, #6, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: st1d { z0.d }, p0, [x19, x8, lsl #3]
+; CHECK-NEXT: add x8, sp, #512
+; CHECK-NEXT: ldr z0, [x8, #7, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: st1d { z0.d }, p0, [x19]
+; CHECK-NEXT: addvl sp, sp, #8
+; CHECK-NEXT: add sp, sp, #512
+; CHECK-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x29, [sp], #32 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %a = call <32 x i64> @llvm.llrint.v32i64.v16fp128(<32 x fp128> %x)
+ ret <32 x i64> %a
+}
+declare <32 x i64> @llvm.llrint.v32i64.v32fp128(<32 x fp128>)
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll b/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll
index 9fe8d92a182ac..175f4993d06c9 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll
@@ -4,7 +4,7 @@
; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=aarch64 -mattr=+sve \
; RUN: -aarch64-sve-vector-bits-min=256 | FileCheck --check-prefixes=CHECK-i64 %s
-define <1 x iXLen> @lrint_v1f16(<1 x half> %x) {
+define <1 x iXLen> @lrint_v1f16(<1 x half> %x) nounwind {
; CHECK-i32-LABEL: lrint_v1f16:
; CHECK-i32: // %bb.0:
; CHECK-i32-NEXT: frintx h0, h0
@@ -23,7 +23,7 @@ define <1 x iXLen> @lrint_v1f16(<1 x half> %x) {
}
declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half>)
-define <2 x iXLen> @lrint_v2f16(<2 x half> %x) {
+define <2 x iXLen> @lrint_v2f16(<2 x half> %x) nounwind {
; CHECK-i32-LABEL: lrint_v2f16:
; CHECK-i32: // %bb.0:
; CHECK-i32-NEXT: // kill: def $d0 killed $d0 def $q0
@@ -53,7 +53,7 @@ define <2 x iXLen> @lrint_v2f16(<2 x half> %x) {
}
declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half>)
-define <4 x iXLen> @lrint_v4f16(<4 x half> %x) {
+define <4 x iXLen> @lrint_v4f16(<4 x half> %x) nounwind {
; CHECK-i32-LABEL: lrint_v4f16:
; CHECK-i32: // %bb.0:
; CHECK-i32-NEXT: frintx v0.4h, v0.4h
@@ -81,7 +81,7 @@ define <4 x iXLen> @lrint_v4f16(<4 x half> %x) {
}
declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f16(<4 x half>)
-define <8 x iXLen> @lrint_v8f16(<8 x half> %x) {
+define <8 x iXLen> @lrint_v8f16(<8 x half> %x) nounwind {
; CHECK-i32-LABEL: lrint_v8f16:
; CHECK-i32: // %bb.0:
; CHECK-i32-NEXT: frintx v2.8h, v0.8h
@@ -143,7 +143,7 @@ define <8 x iXLen> @lrint_v8f16(<8 x half> %x) {
}
declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f16(<8 x half>)
-define <16 x iXLen> @lrint_v16f16(<16 x half> %x) {
+define <16 x iXLen> @lrint_v16f16(<16 x half> %x) nounwind {
; CHECK-i32-LABEL: lrint_v16f16:
; CHECK-i32: // %bb.0:
; CHECK-i32-NEXT: frintx v1.8h, v1.8h
@@ -254,26 +254,17 @@ define <16 x iXLen> @lrint_v16f16(<16 x half> %x) {
}
declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half>)
-define <32 x iXLen> @lrint_v32f16(<32 x half> %x) {
+define <32 x iXLen> @lrint_v32f16(<32 x half> %x) nounwind {
; CHECK-i32-LABEL: lrint_v32f16:
; CHECK-i32: // %bb.0:
; CHECK-i32-NEXT: stp x26, x25, [sp, #-64]! // 16-byte Folded Spill
-; CHECK-i32-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill
-; CHECK-i32-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill
-; CHECK-i32-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
-; CHECK-i32-NEXT: .cfi_def_cfa_offset 64
-; CHECK-i32-NEXT: .cfi_offset w19, -8
-; CHECK-i32-NEXT: .cfi_offset w20, -16
-; CHECK-i32-NEXT: .cfi_offset w21, -24
-; CHECK-i32-NEXT: .cfi_offset w22, -32
-; CHECK-i32-NEXT: .cfi_offset w23, -40
-; CHECK-i32-NEXT: .cfi_offset w24, -48
-; CHECK-i32-NEXT: .cfi_offset w25, -56
-; CHECK-i32-NEXT: .cfi_offset w26, -64
; CHECK-i32-NEXT: frintx v3.8h, v3.8h
; CHECK-i32-NEXT: frintx v2.8h, v2.8h
+; CHECK-i32-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
; CHECK-i32-NEXT: frintx v1.8h, v1.8h
; CHECK-i32-NEXT: frintx v0.8h, v0.8h
+; CHECK-i32-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill
+; CHECK-i32-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill
; CHECK-i32-NEXT: mov h4, v3.h[7]
; CHECK-i32-NEXT: mov h5, v3.h[6]
; CHECK-i32-NEXT: mov h6, v3.h[5]
@@ -378,9 +369,6 @@ define <32 x iXLen> @lrint_v32f16(<32 x half> %x) {
; CHECK-i64-NEXT: sub x9, sp, #272
; CHECK-i64-NEXT: mov x29, sp
; CHECK-i64-NEXT: and sp, x9, #0xffffffffffffffe0
-; CHECK-i64-NEXT: .cfi_def_cfa w29, 16
-; CHECK-i64-NEXT: .cfi_offset w30, -8
-; CHECK-i64-NEXT: .cfi_offset w29, -16
; CHECK-i64-NEXT: frintx v5.4h, v0.4h
; CHECK-i64-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-i64-NEXT: ext v4.16b, v1.16b, v1.16b, #8
@@ -505,7 +493,7 @@ define <32 x iXLen> @lrint_v32f16(<32 x half> %x) {
}
declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f16(<32 x half>)
-define <1 x iXLen> @lrint_v1f32(<1 x float> %x) {
+define <1 x iXLen> @lrint_v1f32(<1 x float> %x) nounwind {
; CHECK-i32-LABEL: lrint_v1f32:
; CHECK-i32: // %bb.0:
; CHECK-i32-NEXT: frintx v0.2s, v0.2s
@@ -524,7 +512,7 @@ define <1 x iXLen> @lrint_v1f32(<1 x float> %x) {
}
declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f32(<1 x float>)
-define <2 x iXLen> @lrint_v2f32(<2 x float> %x) {
+define <2 x iXLen> @lrint_v2f32(<2 x float> %x) nounwind {
; CHECK-i32-LABEL: lrint_v2f32:
; CHECK-i32: // %bb.0:
; CHECK-i32-NEXT: frintx v0.2s, v0.2s
@@ -542,7 +530,7 @@ define <2 x iXLen> @lrint_v2f32(<2 x float> %x) {
}
declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float>)
-define <4 x iXLen> @lrint_v4f32(<4 x float> %x) {
+define <4 x iXLen> @lrint_v4f32(<4 x float> %x) nounwind {
; CHECK-i32-LABEL: lrint_v4f32:
; CHECK-i32: // %bb.0:
; CHECK-i32-NEXT: frintx v0.4s, v0.4s
@@ -569,7 +557,7 @@ define <4 x iXLen> @lrint_v4f32(<4 x float> %x) {
}
declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f32(<4 x float>)
-define <8 x iXLen> @lrint_v8f32(<8 x float> %x) {
+define <8 x iXLen> @lrint_v8f32(<8 x float> %x) nounwind {
; CHECK-i32-LABEL: lrint_v8f32:
; CHECK-i32: // %bb.0:
; CHECK-i32-NEXT: ptrue p0.d, vl2
@@ -636,7 +624,7 @@ define <8 x iXLen> @lrint_v8f32(<8 x float> %x) {
}
declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f32(<8 x float>)
-define <16 x iXLen> @lrint_v16f32(<16 x float> %x) {
+define <16 x iXLen> @lrint_v16f32(<16 x float> %x) nounwind {
; CHECK-i32-LABEL: lrint_v16f32:
; CHECK-i32: // %bb.0:
; CHECK-i32-NEXT: ptrue p0.d, vl2
@@ -754,24 +742,10 @@ define <16 x iXLen> @lrint_v16f32(<16 x float> %x) {
}
declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float>)
-define <32 x iXLen> @lrint_v32f32(<32 x float> %x) {
+define <32 x iXLen> @lrint_v32f32(<32 x float> %x) nounwind {
; CHECK-i32-LABEL: lrint_v32f32:
; CHECK-i32: // %bb.0:
; CHECK-i32-NEXT: str x27, [sp, #-80]! // 8-byte Folded Spill
-; CHECK-i32-NEXT: stp x26, x25, [sp, #16] // 16-byte Folded Spill
-; CHECK-i32-NEXT: stp x24, x23, [sp, #32] // 16-byte Folded Spill
-; CHECK-i32-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill
-; CHECK-i32-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill
-; CHECK-i32-NEXT: .cfi_def_cfa_offset 80
-; CHECK-i32-NEXT: .cfi_offset w19, -8
-; CHECK-i32-NEXT: .cfi_offset w20, -16
-; CHECK-i32-NEXT: .cfi_offset w21, -24
-; CHECK-i32-NEXT: .cfi_offset w22, -32
-; CHECK-i32-NEXT: .cfi_offset w23, -40
-; CHECK-i32-NEXT: .cfi_offset w24, -48
-; CHECK-i32-NEXT: .cfi_offset w25, -56
-; CHECK-i32-NEXT: .cfi_offset w26, -64
-; CHECK-i32-NEXT: .cfi_offset w27, -80
; CHECK-i32-NEXT: ptrue p1.d, vl2
; CHECK-i32-NEXT: // kill: def $q6 killed $q6 def $z6
; CHECK-i32-NEXT: // kill: def $q7 killed $q7 def $z7
@@ -781,11 +755,15 @@ define <32 x iXLen> @lrint_v32f32(<32 x float> %x) {
; CHECK-i32-NEXT: // kill: def $q5 killed $q5 def $z5
; CHECK-i32-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-i32-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-i32-NEXT: stp x24, x23, [sp, #32] // 16-byte Folded Spill
; CHECK-i32-NEXT: ptrue p0.s, vl8
+; CHECK-i32-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill
; CHECK-i32-NEXT: splice z6.d, p1, z6.d, z7.d
; CHECK-i32-NEXT: splice z2.d, p1, z2.d, z3.d
; CHECK-i32-NEXT: splice z4.d, p1, z4.d, z5.d
; CHECK-i32-NEXT: splice z0.d, p1, z0.d, z1.d
+; CHECK-i32-NEXT: stp x26, x25, [sp, #16] // 16-byte Folded Spill
+; CHECK-i32-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill
; CHECK-i32-NEXT: movprfx z3, z6
; CHECK-i32-NEXT: frintx z3.s, p0/m, z6.s
; CHECK-i32-NEXT: frintx z2.s, p0/m, z2.s
@@ -897,9 +875,6 @@ define <32 x iXLen> @lrint_v32f32(<32 x float> %x) {
; CHECK-i64-NEXT: sub x9, sp, #272
; CHECK-i64-NEXT: mov x29, sp
; CHECK-i64-NEXT: and sp, x9, #0xffffffffffffffe0
-; CHECK-i64-NEXT: .cfi_def_cfa w29, 16
-; CHECK-i64-NEXT: .cfi_offset w30, -8
-; CHECK-i64-NEXT: .cfi_offset w29, -16
; CHECK-i64-NEXT: frintx v0.4s, v0.4s
; CHECK-i64-NEXT: frintx v1.4s, v1.4s
; CHECK-i64-NEXT: frintx v2.4s, v2.4s
@@ -1020,7 +995,7 @@ define <32 x iXLen> @lrint_v32f32(<32 x float> %x) {
}
declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f32(<32 x float>)
-define <1 x iXLen> @lrint_v1f64(<1 x double> %x) {
+define <1 x iXLen> @lrint_v1f64(<1 x double> %x) nounwind {
; CHECK-i32-LABEL: lrint_v1f64:
; CHECK-i32: // %bb.0:
; CHECK-i32-NEXT: frintx d0, d0
@@ -1039,7 +1014,7 @@ define <1 x iXLen> @lrint_v1f64(<1 x double> %x) {
}
declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double>)
-define <2 x iXLen> @lrint_v2f64(<2 x double> %x) {
+define <2 x iXLen> @lrint_v2f64(<2 x double> %x) nounwind {
; CHECK-i32-LABEL: lrint_v2f64:
; CHECK-i32: // %bb.0:
; CHECK-i32-NEXT: frintx v0.2d, v0.2d
@@ -1061,7 +1036,7 @@ define <2 x iXLen> @lrint_v2f64(<2 x double> %x) {
}
declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double>)
-define <4 x iXLen> @lrint_v4f64(<4 x double> %x) {
+define <4 x iXLen> @lrint_v4f64(<4 x double> %x) nounwind {
; CHECK-i32-LABEL: lrint_v4f64:
; CHECK-i32: // %bb.0:
; CHECK-i32-NEXT: ptrue p0.d, vl2
@@ -1109,7 +1084,7 @@ define <4 x iXLen> @lrint_v4f64(<4 x double> %x) {
}
declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f64(<4 x double>)
-define <8 x iXLen> @lrint_v8f64(<8 x double> %x) {
+define <8 x iXLen> @lrint_v8f64(<8 x double> %x) nounwind {
; CHECK-i32-LABEL: lrint_v8f64:
; CHECK-i32: // %bb.0:
; CHECK-i32-NEXT: ptrue p0.d, vl2
@@ -1188,7 +1163,7 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) {
}
declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double>)
-define <16 x iXLen> @lrint_v16f64(<16 x double> %x) {
+define <16 x iXLen> @lrint_v16f64(<16 x double> %x) nounwind {
; CHECK-i32-LABEL: lrint_v16f64:
; CHECK-i32: // %bb.0:
; CHECK-i32-NEXT: ptrue p1.d, vl2
@@ -1329,7 +1304,7 @@ define <16 x iXLen> @lrint_v16f64(<16 x double> %x) {
}
declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f64(<16 x double>)
-define <32 x iXLen> @lrint_v32f64(<32 x double> %x) {
+define <32 x iXLen> @lrint_v32f64(<32 x double> %x) nounwind {
; CHECK-i32-LABEL: lrint_v32f64:
; CHECK-i32: // %bb.0:
; CHECK-i32-NEXT: ptrue p1.d, vl2
@@ -1465,9 +1440,6 @@ define <32 x iXLen> @lrint_v32f64(<32 x double> %x) {
; CHECK-i64-NEXT: sub x9, sp, #272
; CHECK-i64-NEXT: mov x29, sp
; CHECK-i64-NEXT: and sp, x9, #0xffffffffffffffe0
-; CHECK-i64-NEXT: .cfi_def_cfa w29, 16
-; CHECK-i64-NEXT: .cfi_offset w30, -8
-; CHECK-i64-NEXT: .cfi_offset w29, -16
; CHECK-i64-NEXT: ptrue p1.d, vl2
; CHECK-i64-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-i64-NEXT: // kill: def $q1 killed $q1 def $z1
@@ -1611,3 +1583,1060 @@ define <32 x iXLen> @lrint_v32f64(<32 x double> %x) {
ret <32 x iXLen> %a
}
declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f64(<32 x double>)
+
+define <1 x iXLen> @lrint_v1fp128(<1 x fp128> %x) nounwind {
+; CHECK-i32-LABEL: lrint_v1fp128:
+; CHECK-i32: // %bb.0:
+; CHECK-i32-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: fmov s0, w0
+; CHECK-i32-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-i32-NEXT: ret
+;
+; CHECK-i64-LABEL: lrint_v1fp128:
+; CHECK-i64: // %bb.0:
+; CHECK-i64-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-i64-NEXT: ret
+ %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1fp128(<1 x fp128> %x)
+ ret <1 x iXLen> %a
+}
+declare <1 x iXLen> @llvm.lrint.v1iXLen.v1fp128(<1 x fp128>)
+
+define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) nounwind {
+; CHECK-i32-LABEL: lrint_v2fp128:
+; CHECK-i32: // %bb.0:
+; CHECK-i32-NEXT: sub sp, sp, #48
+; CHECK-i32-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-i32-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: fmov s0, w0
+; CHECK-i32-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-i32-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[1], w0
+; CHECK-i32-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-i32-NEXT: add sp, sp, #48
+; CHECK-i32-NEXT: ret
+;
+; CHECK-i64-LABEL: lrint_v2fp128:
+; CHECK-i64: // %bb.0:
+; CHECK-i64-NEXT: sub sp, sp, #48
+; CHECK-i64-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-i64-NEXT: mov v0.16b, v1.16b
+; CHECK-i64-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-i64-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: add sp, sp, #48
+; CHECK-i64-NEXT: ret
+ %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2fp128(<2 x fp128> %x)
+ ret <2 x iXLen> %a
+}
+declare <2 x iXLen> @llvm.lrint.v2iXLen.v2fp128(<2 x fp128>)
+
+define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) nounwind {
+; CHECK-i32-LABEL: lrint_v4fp128:
+; CHECK-i32: // %bb.0:
+; CHECK-i32-NEXT: sub sp, sp, #80
+; CHECK-i32-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
+; CHECK-i32-NEXT: stp q2, q3, [sp, #16] // 32-byte Folded Spill
+; CHECK-i32-NEXT: str q1, [sp] // 16-byte Folded Spill
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: fmov s0, w0
+; CHECK-i32-NEXT: str q0, [sp, #48] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[1], w0
+; CHECK-i32-NEXT: str q0, [sp, #48] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[2], w0
+; CHECK-i32-NEXT: str q0, [sp, #48] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-i32-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[3], w0
+; CHECK-i32-NEXT: add sp, sp, #80
+; CHECK-i32-NEXT: ret
+;
+; CHECK-i64-LABEL: lrint_v4fp128:
+; CHECK-i64: // %bb.0:
+; CHECK-i64-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-i64-NEXT: sub sp, sp, #64
+; CHECK-i64-NEXT: addvl sp, sp, #-1
+; CHECK-i64-NEXT: str q0, [sp, #48] // 16-byte Folded Spill
+; CHECK-i64-NEXT: mov v0.16b, v3.16b
+; CHECK-i64-NEXT: stp q2, q1, [sp, #16] // 32-byte Folded Spill
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-i64-NEXT: add x8, sp, #64
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: str z0, [x8] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-i64-NEXT: add x8, sp, #64
+; CHECK-i64-NEXT: ptrue p0.d, vl2
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: ldr z1, [x8] // 16-byte Folded Reload
+; CHECK-i64-NEXT: splice z0.d, p0, z0.d, z1.d
+; CHECK-i64-NEXT: mov z1.d, z0.d
+; CHECK-i64-NEXT: ext z1.b, z1.b, z0.b, #16
+; CHECK-i64-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-i64-NEXT: // kill: def $q1 killed $q1 killed $z1
+; CHECK-i64-NEXT: addvl sp, sp, #1
+; CHECK-i64-NEXT: add sp, sp, #64
+; CHECK-i64-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-i64-NEXT: ret
+ %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4fp128(<4 x fp128> %x)
+ ret <4 x iXLen> %a
+}
+declare <4 x iXLen> @llvm.lrint.v4iXLen.v4fp128(<4 x fp128>)
+
+define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) nounwind {
+; CHECK-i32-LABEL: lrint_v8fp128:
+; CHECK-i32: // %bb.0:
+; CHECK-i32-NEXT: sub sp, sp, #176
+; CHECK-i32-NEXT: str q0, [sp, #96] // 16-byte Folded Spill
+; CHECK-i32-NEXT: mov v0.16b, v7.16b
+; CHECK-i32-NEXT: stp x30, x25, [sp, #112] // 16-byte Folded Spill
+; CHECK-i32-NEXT: stp x24, x23, [sp, #128] // 16-byte Folded Spill
+; CHECK-i32-NEXT: stp x22, x21, [sp, #144] // 16-byte Folded Spill
+; CHECK-i32-NEXT: stp x20, x19, [sp, #160] // 16-byte Folded Spill
+; CHECK-i32-NEXT: stp q6, q5, [sp] // 32-byte Folded Spill
+; CHECK-i32-NEXT: stp q4, q3, [sp, #32] // 32-byte Folded Spill
+; CHECK-i32-NEXT: stp q2, q1, [sp, #64] // 32-byte Folded Spill
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov w19, w0
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov w20, w0
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov w21, w0
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov w22, w0
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov w23, w0
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov w24, w0
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov w25, w0
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: fmov s1, w22
+; CHECK-i32-NEXT: fmov s0, w0
+; CHECK-i32-NEXT: mov v0.s[1], w25
+; CHECK-i32-NEXT: mov v1.s[1], w21
+; CHECK-i32-NEXT: ldp x22, x21, [sp, #144] // 16-byte Folded Reload
+; CHECK-i32-NEXT: ldp x30, x25, [sp, #112] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[2], w24
+; CHECK-i32-NEXT: mov v1.s[2], w20
+; CHECK-i32-NEXT: mov v0.s[3], w23
+; CHECK-i32-NEXT: mov v1.s[3], w19
+; CHECK-i32-NEXT: ldp x20, x19, [sp, #160] // 16-byte Folded Reload
+; CHECK-i32-NEXT: ldp x24, x23, [sp, #128] // 16-byte Folded Reload
+; CHECK-i32-NEXT: add sp, sp, #176
+; CHECK-i32-NEXT: ret
+;
+; CHECK-i64-LABEL: lrint_v8fp128:
+; CHECK-i64: // %bb.0:
+; CHECK-i64-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-i64-NEXT: sub sp, sp, #128
+; CHECK-i64-NEXT: addvl sp, sp, #-2
+; CHECK-i64-NEXT: str q0, [sp, #112] // 16-byte Folded Spill
+; CHECK-i64-NEXT: mov v0.16b, v7.16b
+; CHECK-i64-NEXT: stp q6, q5, [sp, #16] // 32-byte Folded Spill
+; CHECK-i64-NEXT: stp q4, q3, [sp, #48] // 32-byte Folded Spill
+; CHECK-i64-NEXT: stp q2, q1, [sp, #80] // 32-byte Folded Spill
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-i64-NEXT: add x8, sp, #128
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-i64-NEXT: add x8, sp, #128
+; CHECK-i64-NEXT: ptrue p0.d, vl2
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: ldr z1, [x8, #1, mul vl] // 16-byte Folded Reload
+; CHECK-i64-NEXT: splice z0.d, p0, z0.d, z1.d
+; CHECK-i64-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #64] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload
+; CHECK-i64-NEXT: add x8, sp, #128
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: str z0, [x8] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #96] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #96] // 16-byte Folded Reload
+; CHECK-i64-NEXT: add x8, sp, #128
+; CHECK-i64-NEXT: ptrue p0.d, vl2
+; CHECK-i64-NEXT: ldr z2, [x8, #1, mul vl] // 16-byte Folded Reload
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: ldr z1, [x8] // 16-byte Folded Reload
+; CHECK-i64-NEXT: mov z3.d, z2.d
+; CHECK-i64-NEXT: splice z0.d, p0, z0.d, z1.d
+; CHECK-i64-NEXT: ext z3.b, z3.b, z2.b, #16
+; CHECK-i64-NEXT: // kill: def $q2 killed $q2 killed $z2
+; CHECK-i64-NEXT: // kill: def $q3 killed $q3 killed $z3
+; CHECK-i64-NEXT: mov z1.d, z0.d
+; CHECK-i64-NEXT: ext z1.b, z1.b, z0.b, #16
+; CHECK-i64-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-i64-NEXT: // kill: def $q1 killed $q1 killed $z1
+; CHECK-i64-NEXT: addvl sp, sp, #2
+; CHECK-i64-NEXT: add sp, sp, #128
+; CHECK-i64-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-i64-NEXT: ret
+ %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8fp128(<8 x fp128> %x)
+ ret <8 x iXLen> %a
+}
+declare <8 x iXLen> @llvm.lrint.v8iXLen.v8fp128(<8 x fp128>)
+
+define <16 x iXLen> @lrint_v16fp128(<16 x fp128> %x) nounwind {
+; CHECK-i32-LABEL: lrint_v16fp128:
+; CHECK-i32: // %bb.0:
+; CHECK-i32-NEXT: sub sp, sp, #368
+; CHECK-i32-NEXT: stp q3, q0, [sp, #144] // 32-byte Folded Spill
+; CHECK-i32-NEXT: stp q2, q1, [sp, #176] // 32-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #368]
+; CHECK-i32-NEXT: stp x29, x30, [sp, #272] // 16-byte Folded Spill
+; CHECK-i32-NEXT: str q1, [sp, #64] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #384]
+; CHECK-i32-NEXT: stp x28, x27, [sp, #288] // 16-byte Folded Spill
+; CHECK-i32-NEXT: str q1, [sp, #48] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #400]
+; CHECK-i32-NEXT: stp x26, x25, [sp, #304] // 16-byte Folded Spill
+; CHECK-i32-NEXT: str q1, [sp, #32] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #416]
+; CHECK-i32-NEXT: stp x24, x23, [sp, #320] // 16-byte Folded Spill
+; CHECK-i32-NEXT: str q1, [sp, #208] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #432]
+; CHECK-i32-NEXT: stp x22, x21, [sp, #336] // 16-byte Folded Spill
+; CHECK-i32-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #448]
+; CHECK-i32-NEXT: stp x20, x19, [sp, #352] // 16-byte Folded Spill
+; CHECK-i32-NEXT: str q1, [sp, #224] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #464]
+; CHECK-i32-NEXT: stp q7, q6, [sp, #80] // 32-byte Folded Spill
+; CHECK-i32-NEXT: str q1, [sp, #240] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #480]
+; CHECK-i32-NEXT: stp q5, q4, [sp, #112] // 32-byte Folded Spill
+; CHECK-i32-NEXT: mov v0.16b, v1.16b
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #240] // 16-byte Folded Reload
+; CHECK-i32-NEXT: str w0, [sp, #268] // 4-byte Folded Spill
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload
+; CHECK-i32-NEXT: str w0, [sp, #240] // 4-byte Folded Spill
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-i32-NEXT: str w0, [sp, #224] // 4-byte Folded Spill
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #208] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov w23, w0
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-i32-NEXT: str w0, [sp, #208] // 4-byte Folded Spill
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov w24, w0
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov w25, w0
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov w27, w0
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov w26, w0
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov w28, w0
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #128] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov w29, w0
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov w19, w0
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #176] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov w20, w0
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #192] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov w21, w0
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov w22, w0
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: fmov s1, w19
+; CHECK-i32-NEXT: fmov s0, w0
+; CHECK-i32-NEXT: ldr w8, [sp, #224] // 4-byte Folded Reload
+; CHECK-i32-NEXT: fmov s2, w27
+; CHECK-i32-NEXT: fmov s3, w23
+; CHECK-i32-NEXT: mov v0.s[1], w22
+; CHECK-i32-NEXT: mov v1.s[1], w29
+; CHECK-i32-NEXT: mov v2.s[1], w25
+; CHECK-i32-NEXT: mov v3.s[1], w8
+; CHECK-i32-NEXT: ldr w8, [sp, #240] // 4-byte Folded Reload
+; CHECK-i32-NEXT: ldp x29, x30, [sp, #272] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[2], w21
+; CHECK-i32-NEXT: mov v1.s[2], w28
+; CHECK-i32-NEXT: mov v2.s[2], w24
+; CHECK-i32-NEXT: mov v3.s[2], w8
+; CHECK-i32-NEXT: ldr w8, [sp, #208] // 4-byte Folded Reload
+; CHECK-i32-NEXT: ldp x22, x21, [sp, #336] // 16-byte Folded Reload
+; CHECK-i32-NEXT: ldp x24, x23, [sp, #320] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[3], w20
+; CHECK-i32-NEXT: mov v1.s[3], w26
+; CHECK-i32-NEXT: mov v2.s[3], w8
+; CHECK-i32-NEXT: ldr w8, [sp, #268] // 4-byte Folded Reload
+; CHECK-i32-NEXT: ldp x20, x19, [sp, #352] // 16-byte Folded Reload
+; CHECK-i32-NEXT: ldp x26, x25, [sp, #304] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v3.s[3], w8
+; CHECK-i32-NEXT: ldp x28, x27, [sp, #288] // 16-byte Folded Reload
+; CHECK-i32-NEXT: add sp, sp, #368
+; CHECK-i32-NEXT: ret
+;
+; CHECK-i64-LABEL: lrint_v16fp128:
+; CHECK-i64: // %bb.0:
+; CHECK-i64-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-i64-NEXT: sub sp, sp, #256
+; CHECK-i64-NEXT: addvl sp, sp, #-4
+; CHECK-i64-NEXT: addvl x8, sp, #4
+; CHECK-i64-NEXT: str q1, [sp, #240] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q1, [x8, #272]
+; CHECK-i64-NEXT: addvl x8, sp, #4
+; CHECK-i64-NEXT: str q0, [sp, #224] // 16-byte Folded Spill
+; CHECK-i64-NEXT: stp q7, q6, [sp, #128] // 32-byte Folded Spill
+; CHECK-i64-NEXT: str q1, [sp, #112] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q1, [x8, #288]
+; CHECK-i64-NEXT: addvl x8, sp, #4
+; CHECK-i64-NEXT: stp q5, q4, [sp, #160] // 32-byte Folded Spill
+; CHECK-i64-NEXT: str q1, [sp, #96] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q1, [x8, #304]
+; CHECK-i64-NEXT: addvl x8, sp, #4
+; CHECK-i64-NEXT: stp q3, q2, [sp, #192] // 32-byte Folded Spill
+; CHECK-i64-NEXT: str q1, [sp, #80] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q1, [x8, #320]
+; CHECK-i64-NEXT: addvl x8, sp, #4
+; CHECK-i64-NEXT: str q1, [sp, #64] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q1, [x8, #336]
+; CHECK-i64-NEXT: addvl x8, sp, #4
+; CHECK-i64-NEXT: str q1, [sp, #48] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q1, [x8, #352]
+; CHECK-i64-NEXT: addvl x8, sp, #4
+; CHECK-i64-NEXT: str q1, [sp, #32] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q1, [x8, #368]
+; CHECK-i64-NEXT: addvl x8, sp, #4
+; CHECK-i64-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q1, [x8, #384]
+; CHECK-i64-NEXT: mov v0.16b, v1.16b
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-i64-NEXT: add x8, sp, #256
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: str z0, [x8, #3, mul vl] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-i64-NEXT: add x8, sp, #256
+; CHECK-i64-NEXT: ptrue p0.d, vl2
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: ldr z1, [x8, #3, mul vl] // 16-byte Folded Reload
+; CHECK-i64-NEXT: splice z0.d, p0, z0.d, z1.d
+; CHECK-i64-NEXT: str z0, [x8, #3, mul vl] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #64] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload
+; CHECK-i64-NEXT: add x8, sp, #256
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: str z0, [x8, #2, mul vl] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #96] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #96] // 16-byte Folded Reload
+; CHECK-i64-NEXT: add x8, sp, #256
+; CHECK-i64-NEXT: ptrue p0.d, vl2
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: ldr z1, [x8, #2, mul vl] // 16-byte Folded Reload
+; CHECK-i64-NEXT: splice z0.d, p0, z0.d, z1.d
+; CHECK-i64-NEXT: str z0, [x8, #2, mul vl] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #128] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #128] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #128] // 16-byte Folded Reload
+; CHECK-i64-NEXT: add x8, sp, #256
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #160] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #176] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #160] // 16-byte Folded Reload
+; CHECK-i64-NEXT: add x8, sp, #256
+; CHECK-i64-NEXT: ptrue p0.d, vl2
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: ldr z1, [x8, #1, mul vl] // 16-byte Folded Reload
+; CHECK-i64-NEXT: splice z0.d, p0, z0.d, z1.d
+; CHECK-i64-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #192] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #192] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #208] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #192] // 16-byte Folded Reload
+; CHECK-i64-NEXT: add x8, sp, #256
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: str z0, [x8] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #240] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #240] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #240] // 16-byte Folded Reload
+; CHECK-i64-NEXT: add x8, sp, #256
+; CHECK-i64-NEXT: ptrue p0.d, vl2
+; CHECK-i64-NEXT: ldr z2, [x8, #1, mul vl] // 16-byte Folded Reload
+; CHECK-i64-NEXT: ldr z4, [x8, #2, mul vl] // 16-byte Folded Reload
+; CHECK-i64-NEXT: ldr z6, [x8, #3, mul vl] // 16-byte Folded Reload
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: ldr z1, [x8] // 16-byte Folded Reload
+; CHECK-i64-NEXT: mov z3.d, z2.d
+; CHECK-i64-NEXT: mov z5.d, z4.d
+; CHECK-i64-NEXT: mov z7.d, z6.d
+; CHECK-i64-NEXT: splice z0.d, p0, z0.d, z1.d
+; CHECK-i64-NEXT: ext z3.b, z3.b, z2.b, #16
+; CHECK-i64-NEXT: ext z5.b, z5.b, z4.b, #16
+; CHECK-i64-NEXT: ext z7.b, z7.b, z6.b, #16
+; CHECK-i64-NEXT: // kill: def $q2 killed $q2 killed $z2
+; CHECK-i64-NEXT: // kill: def $q4 killed $q4 killed $z4
+; CHECK-i64-NEXT: // kill: def $q3 killed $q3 killed $z3
+; CHECK-i64-NEXT: // kill: def $q5 killed $q5 killed $z5
+; CHECK-i64-NEXT: // kill: def $q6 killed $q6 killed $z6
+; CHECK-i64-NEXT: // kill: def $q7 killed $q7 killed $z7
+; CHECK-i64-NEXT: mov z1.d, z0.d
+; CHECK-i64-NEXT: ext z1.b, z1.b, z0.b, #16
+; CHECK-i64-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-i64-NEXT: // kill: def $q1 killed $q1 killed $z1
+; CHECK-i64-NEXT: addvl sp, sp, #4
+; CHECK-i64-NEXT: add sp, sp, #256
+; CHECK-i64-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-i64-NEXT: ret
+ %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16fp128(<16 x fp128> %x)
+ ret <16 x iXLen> %a
+}
+declare <16 x iXLen> @llvm.lrint.v16iXLen.v16fp128(<16 x fp128>)
+
+define <32 x iXLen> @lrint_v32fp128(<32 x fp128> %x) nounwind {
+; CHECK-i32-LABEL: lrint_v32fp128:
+; CHECK-i32: // %bb.0:
+; CHECK-i32-NEXT: stp x29, x30, [sp, #-96]! // 16-byte Folded Spill
+; CHECK-i32-NEXT: stp x28, x27, [sp, #16] // 16-byte Folded Spill
+; CHECK-i32-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill
+; CHECK-i32-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill
+; CHECK-i32-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill
+; CHECK-i32-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-i32-NEXT: sub sp, sp, #528
+; CHECK-i32-NEXT: stp q2, q1, [sp, #368] // 32-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #624]
+; CHECK-i32-NEXT: str q0, [sp, #224] // 16-byte Folded Spill
+; CHECK-i32-NEXT: str q1, [sp, #160] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #640]
+; CHECK-i32-NEXT: str q7, [sp, #208] // 16-byte Folded Spill
+; CHECK-i32-NEXT: str q1, [sp, #128] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #656]
+; CHECK-i32-NEXT: str q6, [sp, #240] // 16-byte Folded Spill
+; CHECK-i32-NEXT: str q1, [sp, #96] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #672]
+; CHECK-i32-NEXT: str q5, [sp, #272] // 16-byte Folded Spill
+; CHECK-i32-NEXT: str q1, [sp, #80] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #688]
+; CHECK-i32-NEXT: str q4, [sp, #304] // 16-byte Folded Spill
+; CHECK-i32-NEXT: str q1, [sp, #48] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #704]
+; CHECK-i32-NEXT: str q3, [sp, #336] // 16-byte Folded Spill
+; CHECK-i32-NEXT: str q1, [sp, #32] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #720]
+; CHECK-i32-NEXT: str q1, [sp, #112] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #736]
+; CHECK-i32-NEXT: str q1, [sp, #176] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #752]
+; CHECK-i32-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #768]
+; CHECK-i32-NEXT: str q1, [sp, #64] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #784]
+; CHECK-i32-NEXT: str q1, [sp, #192] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #800]
+; CHECK-i32-NEXT: str q1, [sp, #288] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #816]
+; CHECK-i32-NEXT: str q1, [sp, #144] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #832]
+; CHECK-i32-NEXT: str q1, [sp, #256] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #848]
+; CHECK-i32-NEXT: str q1, [sp, #352] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #864]
+; CHECK-i32-NEXT: str q1, [sp, #416] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #880]
+; CHECK-i32-NEXT: str q1, [sp, #320] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #896]
+; CHECK-i32-NEXT: str q1, [sp, #400] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #912]
+; CHECK-i32-NEXT: str q1, [sp, #448] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #928]
+; CHECK-i32-NEXT: str q1, [sp, #480] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #944]
+; CHECK-i32-NEXT: str q1, [sp, #432] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #960]
+; CHECK-i32-NEXT: str q1, [sp, #464] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #976]
+; CHECK-i32-NEXT: str q1, [sp, #496] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #992]
+; CHECK-i32-NEXT: mov v0.16b, v1.16b
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #496] // 16-byte Folded Reload
+; CHECK-i32-NEXT: str w0, [sp, #524] // 4-byte Folded Spill
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #464] // 16-byte Folded Reload
+; CHECK-i32-NEXT: str w0, [sp, #496] // 4-byte Folded Spill
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #432] // 16-byte Folded Reload
+; CHECK-i32-NEXT: str w0, [sp, #464] // 4-byte Folded Spill
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #480] // 16-byte Folded Reload
+; CHECK-i32-NEXT: str w0, [sp, #432] // 4-byte Folded Spill
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #448] // 16-byte Folded Reload
+; CHECK-i32-NEXT: str w0, [sp, #480] // 4-byte Folded Spill
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #400] // 16-byte Folded Reload
+; CHECK-i32-NEXT: str w0, [sp, #448] // 4-byte Folded Spill
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #320] // 16-byte Folded Reload
+; CHECK-i32-NEXT: str w0, [sp, #400] // 4-byte Folded Spill
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #416] // 16-byte Folded Reload
+; CHECK-i32-NEXT: str w0, [sp, #320] // 4-byte Folded Spill
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #352] // 16-byte Folded Reload
+; CHECK-i32-NEXT: str w0, [sp, #416] // 4-byte Folded Spill
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #256] // 16-byte Folded Reload
+; CHECK-i32-NEXT: str w0, [sp, #352] // 4-byte Folded Spill
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload
+; CHECK-i32-NEXT: str w0, [sp, #256] // 4-byte Folded Spill
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #288] // 16-byte Folded Reload
+; CHECK-i32-NEXT: str w0, [sp, #144] // 4-byte Folded Spill
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #192] // 16-byte Folded Reload
+; CHECK-i32-NEXT: str w0, [sp, #288] // 4-byte Folded Spill
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-i32-NEXT: str w0, [sp, #192] // 4-byte Folded Spill
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-i32-NEXT: str w0, [sp, #64] // 4-byte Folded Spill
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #176] // 16-byte Folded Reload
+; CHECK-i32-NEXT: str w0, [sp, #16] // 4-byte Folded Spill
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload
+; CHECK-i32-NEXT: str w0, [sp, #176] // 4-byte Folded Spill
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-i32-NEXT: str w0, [sp, #112] // 4-byte Folded Spill
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov w29, w0
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov w21, w0
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload
+; CHECK-i32-NEXT: str w0, [sp, #80] // 4-byte Folded Spill
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #128] // 16-byte Folded Reload
+; CHECK-i32-NEXT: str w0, [sp, #96] // 4-byte Folded Spill
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov w23, w0
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #208] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov w24, w0
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #240] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov w28, w0
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #272] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov w19, w0
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #304] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov w25, w0
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #336] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov w26, w0
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #368] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov w20, w0
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #384] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov w22, w0
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov w27, w0
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr s4, [sp, #16] // 4-byte Folded Reload
+; CHECK-i32-NEXT: ldr w8, [sp, #64] // 4-byte Folded Reload
+; CHECK-i32-NEXT: fmov s2, w24
+; CHECK-i32-NEXT: ldr s5, [sp, #144] // 4-byte Folded Reload
+; CHECK-i32-NEXT: ldr s6, [sp, #320] // 4-byte Folded Reload
+; CHECK-i32-NEXT: fmov s3, w21
+; CHECK-i32-NEXT: mov v4.s[1], w8
+; CHECK-i32-NEXT: ldr w8, [sp, #256] // 4-byte Folded Reload
+; CHECK-i32-NEXT: ldr s7, [sp, #432] // 4-byte Folded Reload
+; CHECK-i32-NEXT: mov v2.s[1], w23
+; CHECK-i32-NEXT: fmov s1, w26
+; CHECK-i32-NEXT: fmov s0, w0
+; CHECK-i32-NEXT: mov v5.s[1], w8
+; CHECK-i32-NEXT: ldr w8, [sp, #400] // 4-byte Folded Reload
+; CHECK-i32-NEXT: mov v3.s[1], w29
+; CHECK-i32-NEXT: mov v6.s[1], w8
+; CHECK-i32-NEXT: ldr w8, [sp, #464] // 4-byte Folded Reload
+; CHECK-i32-NEXT: mov v1.s[1], w25
+; CHECK-i32-NEXT: mov v0.s[1], w27
+; CHECK-i32-NEXT: mov v7.s[1], w8
+; CHECK-i32-NEXT: ldr w8, [sp, #96] // 4-byte Folded Reload
+; CHECK-i32-NEXT: mov v2.s[2], w8
+; CHECK-i32-NEXT: ldr w8, [sp, #112] // 4-byte Folded Reload
+; CHECK-i32-NEXT: mov v1.s[2], w19
+; CHECK-i32-NEXT: mov v0.s[2], w22
+; CHECK-i32-NEXT: mov v3.s[2], w8
+; CHECK-i32-NEXT: ldr w8, [sp, #192] // 4-byte Folded Reload
+; CHECK-i32-NEXT: mov v4.s[2], w8
+; CHECK-i32-NEXT: ldr w8, [sp, #352] // 4-byte Folded Reload
+; CHECK-i32-NEXT: mov v1.s[3], w28
+; CHECK-i32-NEXT: mov v0.s[3], w20
+; CHECK-i32-NEXT: mov v5.s[2], w8
+; CHECK-i32-NEXT: ldr w8, [sp, #448] // 4-byte Folded Reload
+; CHECK-i32-NEXT: mov v6.s[2], w8
+; CHECK-i32-NEXT: ldr w8, [sp, #496] // 4-byte Folded Reload
+; CHECK-i32-NEXT: mov v7.s[2], w8
+; CHECK-i32-NEXT: ldr w8, [sp, #80] // 4-byte Folded Reload
+; CHECK-i32-NEXT: mov v2.s[3], w8
+; CHECK-i32-NEXT: ldr w8, [sp, #176] // 4-byte Folded Reload
+; CHECK-i32-NEXT: mov v3.s[3], w8
+; CHECK-i32-NEXT: ldr w8, [sp, #288] // 4-byte Folded Reload
+; CHECK-i32-NEXT: mov v4.s[3], w8
+; CHECK-i32-NEXT: ldr w8, [sp, #416] // 4-byte Folded Reload
+; CHECK-i32-NEXT: mov v5.s[3], w8
+; CHECK-i32-NEXT: ldr w8, [sp, #480] // 4-byte Folded Reload
+; CHECK-i32-NEXT: mov v6.s[3], w8
+; CHECK-i32-NEXT: ldr w8, [sp, #524] // 4-byte Folded Reload
+; CHECK-i32-NEXT: mov v7.s[3], w8
+; CHECK-i32-NEXT: add sp, sp, #528
+; CHECK-i32-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
+; CHECK-i32-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload
+; CHECK-i32-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload
+; CHECK-i32-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload
+; CHECK-i32-NEXT: ldp x28, x27, [sp, #16] // 16-byte Folded Reload
+; CHECK-i32-NEXT: ldp x29, x30, [sp], #96 // 16-byte Folded Reload
+; CHECK-i32-NEXT: ret
+;
+; CHECK-i64-LABEL: lrint_v32fp128:
+; CHECK-i64: // %bb.0:
+; CHECK-i64-NEXT: str x29, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-i64-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-i64-NEXT: sub sp, sp, #512
+; CHECK-i64-NEXT: addvl sp, sp, #-8
+; CHECK-i64-NEXT: addvl x9, sp, #8
+; CHECK-i64-NEXT: stp q2, q1, [sp, #16] // 32-byte Folded Spill
+; CHECK-i64-NEXT: mov x19, x8
+; CHECK-i64-NEXT: stp q0, q7, [sp, #48] // 32-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [x9, #864]
+; CHECK-i64-NEXT: addvl x9, sp, #8
+; CHECK-i64-NEXT: str q6, [sp, #96] // 16-byte Folded Spill
+; CHECK-i64-NEXT: str q0, [sp, #496] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [x9, #880]
+; CHECK-i64-NEXT: addvl x9, sp, #8
+; CHECK-i64-NEXT: stp q5, q4, [sp, #128] // 32-byte Folded Spill
+; CHECK-i64-NEXT: str q0, [sp, #480] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [x9, #896]
+; CHECK-i64-NEXT: addvl x9, sp, #8
+; CHECK-i64-NEXT: str q0, [sp, #464] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [x9, #912]
+; CHECK-i64-NEXT: addvl x9, sp, #8
+; CHECK-i64-NEXT: str q0, [sp, #448] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [x9, #800]
+; CHECK-i64-NEXT: addvl x9, sp, #8
+; CHECK-i64-NEXT: str q0, [sp, #432] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [x9, #816]
+; CHECK-i64-NEXT: addvl x9, sp, #8
+; CHECK-i64-NEXT: str q0, [sp, #416] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [x9, #832]
+; CHECK-i64-NEXT: addvl x9, sp, #8
+; CHECK-i64-NEXT: str q0, [sp, #400] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [x9, #848]
+; CHECK-i64-NEXT: addvl x9, sp, #8
+; CHECK-i64-NEXT: str q0, [sp, #384] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [x9, #736]
+; CHECK-i64-NEXT: addvl x9, sp, #8
+; CHECK-i64-NEXT: str q0, [sp, #368] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [x9, #752]
+; CHECK-i64-NEXT: addvl x9, sp, #8
+; CHECK-i64-NEXT: str q0, [sp, #352] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [x9, #768]
+; CHECK-i64-NEXT: addvl x9, sp, #8
+; CHECK-i64-NEXT: str q0, [sp, #336] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [x9, #784]
+; CHECK-i64-NEXT: addvl x9, sp, #8
+; CHECK-i64-NEXT: str q0, [sp, #320] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [x9, #672]
+; CHECK-i64-NEXT: addvl x9, sp, #8
+; CHECK-i64-NEXT: str q0, [sp, #304] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [x9, #688]
+; CHECK-i64-NEXT: addvl x9, sp, #8
+; CHECK-i64-NEXT: str q0, [sp, #288] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [x9, #704]
+; CHECK-i64-NEXT: addvl x9, sp, #8
+; CHECK-i64-NEXT: str q0, [sp, #272] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [x9, #720]
+; CHECK-i64-NEXT: addvl x9, sp, #8
+; CHECK-i64-NEXT: str q0, [sp, #256] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [x9, #608]
+; CHECK-i64-NEXT: addvl x9, sp, #8
+; CHECK-i64-NEXT: str q0, [sp, #240] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [x9, #624]
+; CHECK-i64-NEXT: addvl x9, sp, #8
+; CHECK-i64-NEXT: str q0, [sp, #224] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [x9, #640]
+; CHECK-i64-NEXT: addvl x9, sp, #8
+; CHECK-i64-NEXT: str q0, [sp, #208] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [x9, #656]
+; CHECK-i64-NEXT: addvl x9, sp, #8
+; CHECK-i64-NEXT: str q0, [sp, #192] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [x9, #544]
+; CHECK-i64-NEXT: addvl x9, sp, #8
+; CHECK-i64-NEXT: str q0, [sp, #176] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [x9, #560]
+; CHECK-i64-NEXT: addvl x9, sp, #8
+; CHECK-i64-NEXT: str q0, [sp, #160] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [x9, #576]
+; CHECK-i64-NEXT: addvl x9, sp, #8
+; CHECK-i64-NEXT: str q0, [sp, #112] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [x9, #592]
+; CHECK-i64-NEXT: str q0, [sp, #80] // 16-byte Folded Spill
+; CHECK-i64-NEXT: mov v0.16b, v3.16b
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-i64-NEXT: add x8, sp, #512
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: str z0, [x8, #7, mul vl] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-i64-NEXT: add x8, sp, #512
+; CHECK-i64-NEXT: ptrue p0.d, vl2
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: ldr z1, [x8, #7, mul vl] // 16-byte Folded Reload
+; CHECK-i64-NEXT: splice z0.d, p0, z0.d, z1.d
+; CHECK-i64-NEXT: str z0, [x8, #7, mul vl] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #64] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload
+; CHECK-i64-NEXT: add x8, sp, #512
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: str z0, [x8, #6, mul vl] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #128] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #128] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #128] // 16-byte Folded Reload
+; CHECK-i64-NEXT: add x8, sp, #512
+; CHECK-i64-NEXT: ptrue p0.d, vl2
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: ldr z1, [x8, #6, mul vl] // 16-byte Folded Reload
+; CHECK-i64-NEXT: splice z0.d, p0, z0.d, z1.d
+; CHECK-i64-NEXT: str z0, [x8, #6, mul vl] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #144] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #144] // 16-byte Folded Reload
+; CHECK-i64-NEXT: add x8, sp, #512
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: str z0, [x8, #5, mul vl] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #160] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #176] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #160] // 16-byte Folded Reload
+; CHECK-i64-NEXT: add x8, sp, #512
+; CHECK-i64-NEXT: ptrue p0.d, vl2
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: ldr z1, [x8, #5, mul vl] // 16-byte Folded Reload
+; CHECK-i64-NEXT: splice z0.d, p0, z0.d, z1.d
+; CHECK-i64-NEXT: str z0, [x8, #5, mul vl] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #192] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #192] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #208] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #192] // 16-byte Folded Reload
+; CHECK-i64-NEXT: add x8, sp, #512
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: str z0, [x8, #4, mul vl] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #224] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #240] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #224] // 16-byte Folded Reload
+; CHECK-i64-NEXT: add x8, sp, #512
+; CHECK-i64-NEXT: ptrue p0.d, vl2
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: ldr z1, [x8, #4, mul vl] // 16-byte Folded Reload
+; CHECK-i64-NEXT: splice z0.d, p0, z0.d, z1.d
+; CHECK-i64-NEXT: str z0, [x8, #4, mul vl] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #256] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #256] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #272] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #256] // 16-byte Folded Reload
+; CHECK-i64-NEXT: add x8, sp, #512
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: str z0, [x8, #3, mul vl] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #288] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #288] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #304] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #288] // 16-byte Folded Reload
+; CHECK-i64-NEXT: add x8, sp, #512
+; CHECK-i64-NEXT: ptrue p0.d, vl2
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: ldr z1, [x8, #3, mul vl] // 16-byte Folded Reload
+; CHECK-i64-NEXT: splice z0.d, p0, z0.d, z1.d
+; CHECK-i64-NEXT: str z0, [x8, #3, mul vl] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #320] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #320] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #336] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #320] // 16-byte Folded Reload
+; CHECK-i64-NEXT: add x8, sp, #512
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: str z0, [x8, #2, mul vl] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #352] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #352] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #368] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #352] // 16-byte Folded Reload
+; CHECK-i64-NEXT: add x8, sp, #512
+; CHECK-i64-NEXT: ptrue p0.d, vl2
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: ldr z1, [x8, #2, mul vl] // 16-byte Folded Reload
+; CHECK-i64-NEXT: splice z0.d, p0, z0.d, z1.d
+; CHECK-i64-NEXT: str z0, [x8, #2, mul vl] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #384] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #384] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #400] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #384] // 16-byte Folded Reload
+; CHECK-i64-NEXT: add x8, sp, #512
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #416] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #416] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #432] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #416] // 16-byte Folded Reload
+; CHECK-i64-NEXT: add x8, sp, #512
+; CHECK-i64-NEXT: ptrue p0.d, vl2
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: ldr z1, [x8, #1, mul vl] // 16-byte Folded Reload
+; CHECK-i64-NEXT: splice z0.d, p0, z0.d, z1.d
+; CHECK-i64-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #448] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #448] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #464] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #448] // 16-byte Folded Reload
+; CHECK-i64-NEXT: add x8, sp, #512
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: str z0, [x8] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #480] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #480] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #496] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #480] // 16-byte Folded Reload
+; CHECK-i64-NEXT: add x8, sp, #512
+; CHECK-i64-NEXT: ptrue p0.d, vl2
+; CHECK-i64-NEXT: add x9, sp, #512
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: ldr z1, [x8] // 16-byte Folded Reload
+; CHECK-i64-NEXT: mov x8, #28 // =0x1c
+; CHECK-i64-NEXT: splice z0.d, p0, z0.d, z1.d
+; CHECK-i64-NEXT: ptrue p0.d, vl4
+; CHECK-i64-NEXT: st1d { z0.d }, p0, [x19, x8, lsl #3]
+; CHECK-i64-NEXT: mov x8, #24 // =0x18
+; CHECK-i64-NEXT: ldr z0, [x9, #1, mul vl] // 16-byte Folded Reload
+; CHECK-i64-NEXT: st1d { z0.d }, p0, [x19, x8, lsl #3]
+; CHECK-i64-NEXT: mov x8, #20 // =0x14
+; CHECK-i64-NEXT: ldr z0, [x9, #2, mul vl] // 16-byte Folded Reload
+; CHECK-i64-NEXT: st1d { z0.d }, p0, [x19, x8, lsl #3]
+; CHECK-i64-NEXT: mov x8, #16 // =0x10
+; CHECK-i64-NEXT: ldr z0, [x9, #3, mul vl] // 16-byte Folded Reload
+; CHECK-i64-NEXT: st1d { z0.d }, p0, [x19, x8, lsl #3]
+; CHECK-i64-NEXT: mov x8, #12 // =0xc
+; CHECK-i64-NEXT: ldr z0, [x9, #4, mul vl] // 16-byte Folded Reload
+; CHECK-i64-NEXT: st1d { z0.d }, p0, [x19, x8, lsl #3]
+; CHECK-i64-NEXT: mov x8, #8 // =0x8
+; CHECK-i64-NEXT: ldr z0, [x9, #5, mul vl] // 16-byte Folded Reload
+; CHECK-i64-NEXT: st1d { z0.d }, p0, [x19, x8, lsl #3]
+; CHECK-i64-NEXT: mov x8, #4 // =0x4
+; CHECK-i64-NEXT: ldr z0, [x9, #6, mul vl] // 16-byte Folded Reload
+; CHECK-i64-NEXT: st1d { z0.d }, p0, [x19, x8, lsl #3]
+; CHECK-i64-NEXT: add x8, sp, #512
+; CHECK-i64-NEXT: ldr z0, [x8, #7, mul vl] // 16-byte Folded Reload
+; CHECK-i64-NEXT: st1d { z0.d }, p0, [x19]
+; CHECK-i64-NEXT: addvl sp, sp, #8
+; CHECK-i64-NEXT: add sp, sp, #512
+; CHECK-i64-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-i64-NEXT: ldr x29, [sp], #32 // 8-byte Folded Reload
+; CHECK-i64-NEXT: ret
+ %a = call <32 x iXLen> @llvm.lrint.v32iXLen.v16fp128(<32 x fp128> %x)
+ ret <32 x iXLen> %a
+}
+declare <32 x iXLen> @llvm.lrint.v32iXLen.v32fp128(<32 x fp128>)
diff --git a/llvm/test/CodeGen/AArch64/vector-llrint.ll b/llvm/test/CodeGen/AArch64/vector-llrint.ll
index 5503de2b4c5db..8f139cc225a67 100644
--- a/llvm/test/CodeGen/AArch64/vector-llrint.ll
+++ b/llvm/test/CodeGen/AArch64/vector-llrint.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=aarch64 -mattr=+neon | FileCheck %s
-define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) {
+define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) nounwind {
; CHECK-LABEL: llrint_v1i64_v1f16:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvt s0, h0
@@ -14,7 +14,7 @@ define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) {
}
declare <1 x i64> @llvm.llrint.v1i64.v1f16(<1 x half>)
-define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) {
+define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) nounwind {
; CHECK-LABEL: llrint_v1i64_v2f16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
@@ -33,7 +33,7 @@ define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) {
}
declare <2 x i64> @llvm.llrint.v2i64.v2f16(<2 x half>)
-define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) {
+define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) nounwind {
; CHECK-LABEL: llrint_v4i64_v4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
@@ -62,7 +62,7 @@ define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) {
}
declare <4 x i64> @llvm.llrint.v4i64.v4f16(<4 x half>)
-define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) {
+define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) nounwind {
; CHECK-LABEL: llrint_v8i64_v8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
@@ -110,7 +110,7 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) {
}
declare <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half>)
-define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) {
+define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) nounwind {
; CHECK-LABEL: llrint_v16i64_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8
@@ -197,7 +197,7 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) {
}
declare <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half>)
-define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
+define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) nounwind {
; CHECK-LABEL: llrint_v32i64_v32f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ext v4.16b, v1.16b, v1.16b, #8
@@ -370,7 +370,7 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
}
declare <32 x i64> @llvm.llrint.v32i64.v32f16(<32 x half>)
-define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) {
+define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) nounwind {
; CHECK-LABEL: llrint_v1i64_v1f32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
@@ -383,7 +383,7 @@ define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) {
}
declare <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float>)
-define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) {
+define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) nounwind {
; CHECK-LABEL: llrint_v2i64_v2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: frintx v0.2s, v0.2s
@@ -395,7 +395,7 @@ define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) {
}
declare <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float>)
-define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) {
+define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) nounwind {
; CHECK-LABEL: llrint_v4i64_v4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
@@ -411,7 +411,7 @@ define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) {
}
declare <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float>)
-define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) {
+define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) nounwind {
; CHECK-LABEL: llrint_v8i64_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8
@@ -434,7 +434,7 @@ define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) {
}
declare <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float>)
-define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) {
+define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) nounwind {
; CHECK-LABEL: llrint_v16i64_v16f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ext v4.16b, v1.16b, v1.16b, #8
@@ -471,7 +471,7 @@ define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) {
}
declare <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float>)
-define <32 x i64> @llrint_v32i64_v32f32(<32 x float> %x) {
+define <32 x i64> @llrint_v32i64_v32f32(<32 x float> %x) nounwind {
; CHECK-LABEL: llrint_v32i64_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ext v16.16b, v7.16b, v7.16b, #8
@@ -544,7 +544,7 @@ define <32 x i64> @llrint_v32i64_v32f32(<32 x float> %x) {
}
declare <32 x i64> @llvm.llrint.v32i64.v32f32(<32 x float>)
-define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) {
+define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) nounwind {
; CHECK-LABEL: llrint_v1i64_v1f64:
; CHECK: // %bb.0:
; CHECK-NEXT: frintx d0, d0
@@ -556,7 +556,7 @@ define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) {
}
declare <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double>)
-define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) {
+define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) nounwind {
; CHECK-LABEL: llrint_v2i64_v2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: frintx v0.2d, v0.2d
@@ -567,7 +567,7 @@ define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) {
}
declare <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double>)
-define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) {
+define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) nounwind {
; CHECK-LABEL: llrint_v4i64_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: frintx v0.2d, v0.2d
@@ -580,7 +580,7 @@ define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) {
}
declare <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double>)
-define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) {
+define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) nounwind {
; CHECK-LABEL: llrint_v8i64_v8f64:
; CHECK: // %bb.0:
; CHECK-NEXT: frintx v0.2d, v0.2d
@@ -597,7 +597,7 @@ define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) {
}
declare <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double>)
-define <16 x i64> @llrint_v16f64(<16 x double> %x) {
+define <16 x i64> @llrint_v16f64(<16 x double> %x) nounwind {
; CHECK-LABEL: llrint_v16f64:
; CHECK: // %bb.0:
; CHECK-NEXT: frintx v0.2d, v0.2d
@@ -622,7 +622,7 @@ define <16 x i64> @llrint_v16f64(<16 x double> %x) {
}
declare <16 x i64> @llvm.llrint.v16i64.v16f64(<16 x double>)
-define <32 x i64> @llrint_v32f64(<32 x double> %x) {
+define <32 x i64> @llrint_v32f64(<32 x double> %x) nounwind {
; CHECK-LABEL: llrint_v32f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q17, q16, [sp, #96]
@@ -674,3 +674,504 @@ define <32 x i64> @llrint_v32f64(<32 x double> %x) {
ret <32 x i64> %a
}
declare <32 x i64> @llvm.llrint.v32i64.v32f64(<32 x double>)
+
+define <1 x i64> @llrint_v1i64_v1f128(<1 x fp128> %x) nounwind {
+; CHECK-LABEL: llrint_v1i64_v1f128:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %a = call <1 x i64> @llvm.llrint.v1i64.v1f128(<1 x fp128> %x)
+ ret <1 x i64> %a
+}
+declare <1 x i64> @llvm.llrint.v1i64.v1f128(<1 x fp128>)
+
+define <2 x i64> @llrint_v2i64_v2f128(<2 x fp128> %x) nounwind {
+; CHECK-LABEL: llrint_v2i64_v2f128:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #48
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: mov v0.16b, v1.16b
+; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: add sp, sp, #48
+; CHECK-NEXT: ret
+ %a = call <2 x i64> @llvm.llrint.v2i64.v2f128(<2 x fp128> %x)
+ ret <2 x i64> %a
+}
+declare <2 x i64> @llvm.llrint.v2i64.v2f128(<2 x fp128>)
+
+define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) nounwind {
+; CHECK-LABEL: llrint_v4i64_v4f128:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #80
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: mov v0.16b, v1.16b
+; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
+; CHECK-NEXT: stp q3, q2, [sp, #32] // 32-byte Folded Spill
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d1, x0
+; CHECK-NEXT: ldp q0, q4, [sp, #16] // 32-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT: mov v1.d[1], v4.d[0]
+; CHECK-NEXT: add sp, sp, #80
+; CHECK-NEXT: ret
+ %a = call <4 x i64> @llvm.llrint.v4i64.v4f128(<4 x fp128> %x)
+ ret <4 x i64> %a
+}
+declare <4 x i64> @llvm.llrint.v4i64.v4f128(<4 x fp128>)
+
+define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) nounwind {
+; CHECK-LABEL: llrint_v8i64_v8f128:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #144
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: mov v0.16b, v1.16b
+; CHECK-NEXT: str x30, [sp, #128] // 8-byte Folded Spill
+; CHECK-NEXT: stp q3, q2, [sp, #16] // 32-byte Folded Spill
+; CHECK-NEXT: stp q5, q4, [sp, #48] // 32-byte Folded Spill
+; CHECK-NEXT: stp q7, q6, [sp, #96] // 32-byte Folded Spill
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: str q0, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: str q0, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #96] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d3, x0
+; CHECK-NEXT: ldp q0, q1, [sp, #80] // 32-byte Folded Reload
+; CHECK-NEXT: ldr q2, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #128] // 8-byte Folded Reload
+; CHECK-NEXT: mov v3.d[1], v1.d[0]
+; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #144
+; CHECK-NEXT: ret
+ %a = call <8 x i64> @llvm.llrint.v8i64.v8f128(<8 x fp128> %x)
+ ret <8 x i64> %a
+}
+declare <8 x i64> @llvm.llrint.v8i64.v8f128(<8 x fp128>)
+
+define <16 x i64> @llrint_v16f128(<16 x fp128> %x) nounwind {
+; CHECK-LABEL: llrint_v16f128:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #272
+; CHECK-NEXT: str q2, [sp, #160] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q2, [sp, #368]
+; CHECK-NEXT: stp q0, q3, [sp] // 32-byte Folded Spill
+; CHECK-NEXT: mov v0.16b, v1.16b
+; CHECK-NEXT: str q2, [sp, #240] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q2, [sp, #384]
+; CHECK-NEXT: stp x29, x30, [sp, #256] // 16-byte Folded Spill
+; CHECK-NEXT: str q2, [sp, #224] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q2, [sp, #336]
+; CHECK-NEXT: stp q5, q7, [sp, #32] // 32-byte Folded Spill
+; CHECK-NEXT: str q2, [sp, #192] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q2, [sp, #352]
+; CHECK-NEXT: str q2, [sp, #176] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q2, [sp, #304]
+; CHECK-NEXT: str q2, [sp, #144] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q2, [sp, #320]
+; CHECK-NEXT: stp q4, q2, [sp, #112] // 32-byte Folded Spill
+; CHECK-NEXT: ldr q2, [sp, #272]
+; CHECK-NEXT: stp q6, q2, [sp, #80] // 32-byte Folded Spill
+; CHECK-NEXT: ldr q2, [sp, #288]
+; CHECK-NEXT: str q2, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #208] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #208] // 16-byte Folded Reload
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: str q0, [sp, #208] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: str q0, [sp, #160] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: str q0, [sp, #112] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: str q0, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: str q0, [sp, #96] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #128] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #128] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #128] // 16-byte Folded Reload
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: str q0, [sp, #144] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #176] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #176] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #192] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #176] // 16-byte Folded Reload
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: str q0, [sp, #192] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #224] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #240] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d7, x0
+; CHECK-NEXT: ldp q0, q1, [sp, #208] // 32-byte Folded Reload
+; CHECK-NEXT: ldp q4, q2, [sp, #96] // 32-byte Folded Reload
+; CHECK-NEXT: ldr q3, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT: ldp x29, x30, [sp, #256] // 16-byte Folded Reload
+; CHECK-NEXT: ldr q6, [sp, #192] // 16-byte Folded Reload
+; CHECK-NEXT: mov v7.d[1], v1.d[0]
+; CHECK-NEXT: ldp q5, q1, [sp, #144] // 32-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #272
+; CHECK-NEXT: ret
+ %a = call <16 x i64> @llvm.llrint.v16i64.v16f128(<16 x fp128> %x)
+ ret <16 x i64> %a
+}
+declare <16 x i64> @llvm.llrint.v16i64.v16f128(<16 x fp128>)
+
+define <32 x i64> @llrint_v32f128(<32 x fp128> %x) nounwind {
+; CHECK-LABEL: llrint_v32f128:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: sub sp, sp, #512
+; CHECK-NEXT: str q0, [sp, #464] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #896]
+; CHECK-NEXT: mov x19, x8
+; CHECK-NEXT: str q7, [sp, #272] // 16-byte Folded Spill
+; CHECK-NEXT: str q0, [sp, #496] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #912]
+; CHECK-NEXT: str q6, [sp, #320] // 16-byte Folded Spill
+; CHECK-NEXT: str q0, [sp, #480] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #864]
+; CHECK-NEXT: stp q3, q5, [sp, #16] // 32-byte Folded Spill
+; CHECK-NEXT: str q0, [sp, #448] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #880]
+; CHECK-NEXT: stp q2, q0, [sp, #416] // 32-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #832]
+; CHECK-NEXT: str q0, [sp, #400] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #848]
+; CHECK-NEXT: stp q4, q0, [sp, #368] // 32-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #800]
+; CHECK-NEXT: str q0, [sp, #352] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #816]
+; CHECK-NEXT: str q0, [sp, #336] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #768]
+; CHECK-NEXT: str q0, [sp, #304] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #784]
+; CHECK-NEXT: str q0, [sp, #288] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #736]
+; CHECK-NEXT: str q0, [sp, #256] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #752]
+; CHECK-NEXT: str q0, [sp, #240] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #704]
+; CHECK-NEXT: str q0, [sp, #208] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #720]
+; CHECK-NEXT: str q0, [sp, #192] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #672]
+; CHECK-NEXT: str q0, [sp, #160] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #688]
+; CHECK-NEXT: str q0, [sp, #128] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #640]
+; CHECK-NEXT: str q0, [sp, #144] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #656]
+; CHECK-NEXT: str q0, [sp, #112] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #608]
+; CHECK-NEXT: str q0, [sp, #176] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #624]
+; CHECK-NEXT: str q0, [sp, #96] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #576]
+; CHECK-NEXT: str q0, [sp, #224] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #592]
+; CHECK-NEXT: str q0, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #544]
+; CHECK-NEXT: str q0, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #560]
+; CHECK-NEXT: str q0, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: mov v0.16b, v1.16b
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #464] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: str q0, [sp, #464] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #416] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: str q0, [sp, #416] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #368] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: str q0, [sp, #368] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #272] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #272] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #320] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #272] // 16-byte Folded Reload
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: str q0, [sp, #320] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #272] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #272] // 16-byte Folded Reload
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: str q0, [sp, #272] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: str q0, [sp, #224] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #96] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #176] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #96] // 16-byte Folded Reload
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: str q0, [sp, #176] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #112] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #112] // 16-byte Folded Reload
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: str q0, [sp, #144] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #128] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #128] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #128] // 16-byte Folded Reload
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: str q0, [sp, #160] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #192] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #192] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #208] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #192] // 16-byte Folded Reload
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: str q0, [sp, #208] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #240] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #240] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #256] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #240] // 16-byte Folded Reload
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: str q0, [sp, #256] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #288] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #288] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #304] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #288] // 16-byte Folded Reload
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: str q0, [sp, #304] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #336] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #336] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #352] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #336] // 16-byte Folded Reload
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: str q0, [sp, #352] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #384] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #384] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #400] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #384] // 16-byte Folded Reload
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: str q0, [sp, #400] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #432] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #432] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #448] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #432] // 16-byte Folded Reload
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: str q0, [sp, #448] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #480] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #480] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #496] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: ldr q0, [sp, #400] // 16-byte Folded Reload
+; CHECK-NEXT: str q0, [x19, #208]
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #480] // 16-byte Folded Reload
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: ldr q1, [sp, #352] // 16-byte Folded Reload
+; CHECK-NEXT: str q1, [x19, #192]
+; CHECK-NEXT: ldr q1, [sp, #304] // 16-byte Folded Reload
+; CHECK-NEXT: str q1, [x19, #176]
+; CHECK-NEXT: ldr q1, [sp, #256] // 16-byte Folded Reload
+; CHECK-NEXT: str q0, [x19, #240]
+; CHECK-NEXT: str q1, [x19, #160]
+; CHECK-NEXT: ldr q0, [sp, #448] // 16-byte Folded Reload
+; CHECK-NEXT: str q0, [x19, #224]
+; CHECK-NEXT: ldr q0, [sp, #208] // 16-byte Folded Reload
+; CHECK-NEXT: str q0, [x19, #144]
+; CHECK-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload
+; CHECK-NEXT: str q0, [x19, #128]
+; CHECK-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload
+; CHECK-NEXT: str q0, [x19, #112]
+; CHECK-NEXT: ldr q0, [sp, #176] // 16-byte Folded Reload
+; CHECK-NEXT: str q0, [x19, #96]
+; CHECK-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload
+; CHECK-NEXT: str q0, [x19, #80]
+; CHECK-NEXT: ldr q0, [sp, #272] // 16-byte Folded Reload
+; CHECK-NEXT: str q0, [x19, #64]
+; CHECK-NEXT: ldr q0, [sp, #320] // 16-byte Folded Reload
+; CHECK-NEXT: str q0, [x19, #48]
+; CHECK-NEXT: ldr q0, [sp, #368] // 16-byte Folded Reload
+; CHECK-NEXT: str q0, [x19, #32]
+; CHECK-NEXT: ldr q0, [sp, #416] // 16-byte Folded Reload
+; CHECK-NEXT: str q0, [x19, #16]
+; CHECK-NEXT: ldr q0, [sp, #464] // 16-byte Folded Reload
+; CHECK-NEXT: str q0, [x19]
+; CHECK-NEXT: add sp, sp, #512
+; CHECK-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x29, [sp], #32 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %a = call <32 x i64> @llvm.llrint.v32i64.v16f128(<32 x fp128> %x)
+ ret <32 x i64> %a
+}
+declare <32 x i64> @llvm.llrint.v32i64.v32f128(<32 x fp128>)
diff --git a/llvm/test/CodeGen/AArch64/vector-lrint.ll b/llvm/test/CodeGen/AArch64/vector-lrint.ll
index 602643264e7be..b899db839a65a 100644
--- a/llvm/test/CodeGen/AArch64/vector-lrint.ll
+++ b/llvm/test/CodeGen/AArch64/vector-lrint.ll
@@ -45,7 +45,7 @@
; CHECK-i64-GI-NEXT: warning: Instruction selection used fallback path for lrint_v16f64
; CHECK-i64-GI-NEXT: warning: Instruction selection used fallback path for lrint_v32f64
-define <1 x iXLen> @lrint_v1f16(<1 x half> %x) {
+define <1 x iXLen> @lrint_v1f16(<1 x half> %x) nounwind {
; CHECK-i32-LABEL: lrint_v1f16:
; CHECK-i32: // %bb.0:
; CHECK-i32-NEXT: fcvt s0, h0
@@ -66,7 +66,7 @@ define <1 x iXLen> @lrint_v1f16(<1 x half> %x) {
}
declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half>)
-define <2 x iXLen> @lrint_v2f16(<2 x half> %x) {
+define <2 x iXLen> @lrint_v2f16(<2 x half> %x) nounwind {
; CHECK-i32-LABEL: lrint_v2f16:
; CHECK-i32: // %bb.0:
; CHECK-i32-NEXT: // kill: def $d0 killed $d0 def $q0
@@ -100,7 +100,7 @@ define <2 x iXLen> @lrint_v2f16(<2 x half> %x) {
}
declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half>)
-define <4 x iXLen> @lrint_v4f16(<4 x half> %x) {
+define <4 x iXLen> @lrint_v4f16(<4 x half> %x) nounwind {
; CHECK-i32-LABEL: lrint_v4f16:
; CHECK-i32: // %bb.0:
; CHECK-i32-NEXT: // kill: def $d0 killed $d0 def $q0
@@ -153,7 +153,7 @@ define <4 x iXLen> @lrint_v4f16(<4 x half> %x) {
}
declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f16(<4 x half>)
-define <8 x iXLen> @lrint_v8f16(<8 x half> %x) {
+define <8 x iXLen> @lrint_v8f16(<8 x half> %x) nounwind {
; CHECK-i32-LABEL: lrint_v8f16:
; CHECK-i32: // %bb.0:
; CHECK-i32-NEXT: ext v1.16b, v0.16b, v0.16b, #8
@@ -244,7 +244,7 @@ define <8 x iXLen> @lrint_v8f16(<8 x half> %x) {
}
declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f16(<8 x half>)
-define <16 x iXLen> @lrint_v16f16(<16 x half> %x) {
+define <16 x iXLen> @lrint_v16f16(<16 x half> %x) nounwind {
; CHECK-i32-LABEL: lrint_v16f16:
; CHECK-i32: // %bb.0:
; CHECK-i32-NEXT: ext v2.16b, v0.16b, v0.16b, #8
@@ -413,7 +413,7 @@ define <16 x iXLen> @lrint_v16f16(<16 x half> %x) {
}
declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half>)
-define <32 x iXLen> @lrint_v32f16(<32 x half> %x) {
+define <32 x iXLen> @lrint_v32f16(<32 x half> %x) nounwind {
; CHECK-i32-LABEL: lrint_v32f16:
; CHECK-i32: // %bb.0:
; CHECK-i32-NEXT: ext v5.16b, v0.16b, v0.16b, #8
@@ -748,7 +748,7 @@ define <32 x iXLen> @lrint_v32f16(<32 x half> %x) {
}
declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f16(<32 x half>)
-define <1 x iXLen> @lrint_v1f32(<1 x float> %x) {
+define <1 x iXLen> @lrint_v1f32(<1 x float> %x) nounwind {
; CHECK-i32-LABEL: lrint_v1f32:
; CHECK-i32: // %bb.0:
; CHECK-i32-NEXT: frintx v0.2s, v0.2s
@@ -774,7 +774,7 @@ define <1 x iXLen> @lrint_v1f32(<1 x float> %x) {
}
declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f32(<1 x float>)
-define <2 x iXLen> @lrint_v2f32(<2 x float> %x) {
+define <2 x iXLen> @lrint_v2f32(<2 x float> %x) nounwind {
; CHECK-i32-LABEL: lrint_v2f32:
; CHECK-i32: // %bb.0:
; CHECK-i32-NEXT: frintx v0.2s, v0.2s
@@ -792,7 +792,7 @@ define <2 x iXLen> @lrint_v2f32(<2 x float> %x) {
}
declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float>)
-define <4 x iXLen> @lrint_v4f32(<4 x float> %x) {
+define <4 x iXLen> @lrint_v4f32(<4 x float> %x) nounwind {
; CHECK-i32-LABEL: lrint_v4f32:
; CHECK-i32: // %bb.0:
; CHECK-i32-NEXT: frintx v0.4s, v0.4s
@@ -814,7 +814,7 @@ define <4 x iXLen> @lrint_v4f32(<4 x float> %x) {
}
declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f32(<4 x float>)
-define <8 x iXLen> @lrint_v8f32(<8 x float> %x) {
+define <8 x iXLen> @lrint_v8f32(<8 x float> %x) nounwind {
; CHECK-i32-LABEL: lrint_v8f32:
; CHECK-i32: // %bb.0:
; CHECK-i32-NEXT: frintx v0.4s, v0.4s
@@ -845,7 +845,7 @@ define <8 x iXLen> @lrint_v8f32(<8 x float> %x) {
}
declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f32(<8 x float>)
-define <16 x iXLen> @lrint_v16f32(<16 x float> %x) {
+define <16 x iXLen> @lrint_v16f32(<16 x float> %x) nounwind {
; CHECK-i32-LABEL: lrint_v16f32:
; CHECK-i32: // %bb.0:
; CHECK-i32-NEXT: frintx v0.4s, v0.4s
@@ -894,7 +894,7 @@ define <16 x iXLen> @lrint_v16f32(<16 x float> %x) {
}
declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float>)
-define <32 x iXLen> @lrint_v32f32(<32 x float> %x) {
+define <32 x iXLen> @lrint_v32f32(<32 x float> %x) nounwind {
; CHECK-i32-LABEL: lrint_v32f32:
; CHECK-i32: // %bb.0:
; CHECK-i32-NEXT: frintx v0.4s, v0.4s
@@ -987,7 +987,7 @@ define <32 x iXLen> @lrint_v32f32(<32 x float> %x) {
}
declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f32(<32 x float>)
-define <1 x iXLen> @lrint_v1f64(<1 x double> %x) {
+define <1 x iXLen> @lrint_v1f64(<1 x double> %x) nounwind {
; CHECK-i32-LABEL: lrint_v1f64:
; CHECK-i32: // %bb.0:
; CHECK-i32-NEXT: frintx d0, d0
@@ -1006,7 +1006,7 @@ define <1 x iXLen> @lrint_v1f64(<1 x double> %x) {
}
declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double>)
-define <2 x iXLen> @lrint_v2f64(<2 x double> %x) {
+define <2 x iXLen> @lrint_v2f64(<2 x double> %x) nounwind {
; CHECK-i32-LABEL: lrint_v2f64:
; CHECK-i32: // %bb.0:
; CHECK-i32-NEXT: frintx v0.2d, v0.2d
@@ -1028,7 +1028,7 @@ define <2 x iXLen> @lrint_v2f64(<2 x double> %x) {
}
declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double>)
-define <4 x iXLen> @lrint_v4f64(<4 x double> %x) {
+define <4 x iXLen> @lrint_v4f64(<4 x double> %x) nounwind {
; CHECK-i32-LABEL: lrint_v4f64:
; CHECK-i32: // %bb.0:
; CHECK-i32-NEXT: frintx v0.2d, v0.2d
@@ -1057,7 +1057,7 @@ define <4 x iXLen> @lrint_v4f64(<4 x double> %x) {
}
declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f64(<4 x double>)
-define <8 x iXLen> @lrint_v8f64(<8 x double> %x) {
+define <8 x iXLen> @lrint_v8f64(<8 x double> %x) nounwind {
; CHECK-i32-LABEL: lrint_v8f64:
; CHECK-i32: // %bb.0:
; CHECK-i32-NEXT: frintx v2.2d, v2.2d
@@ -1102,7 +1102,7 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) {
}
declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double>)
-define <16 x iXLen> @lrint_v16f64(<16 x double> %x) {
+define <16 x iXLen> @lrint_v16f64(<16 x double> %x) nounwind {
; CHECK-i32-LABEL: lrint_v16f64:
; CHECK-i32: // %bb.0:
; CHECK-i32-NEXT: frintx v0.2d, v0.2d
@@ -1179,7 +1179,7 @@ define <16 x iXLen> @lrint_v16f64(<16 x double> %x) {
}
declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f64(<16 x double>)
-define <32 x iXLen> @lrint_v32f64(<32 x double> %x) {
+define <32 x iXLen> @lrint_v32f64(<32 x double> %x) nounwind {
; CHECK-i32-LABEL: lrint_v32f64:
; CHECK-i32: // %bb.0:
; CHECK-i32-NEXT: frintx v17.2d, v0.2d
@@ -1335,3 +1335,922 @@ define <32 x iXLen> @lrint_v32f64(<32 x double> %x) {
ret <32 x iXLen> %a
}
declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f64(<32 x double>)
+
+define <1 x iXLen> @lrint_v1fp128(<1 x fp128> %x) nounwind {
+; CHECK-i32-LABEL: lrint_v1fp128:
+; CHECK-i32: // %bb.0:
+; CHECK-i32-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: fmov s0, w0
+; CHECK-i32-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-i32-NEXT: ret
+;
+; CHECK-i64-LABEL: lrint_v1fp128:
+; CHECK-i64: // %bb.0:
+; CHECK-i64-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-i64-NEXT: ret
+ %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1fp128(<1 x fp128> %x)
+ ret <1 x iXLen> %a
+}
+declare <1 x iXLen> @llvm.lrint.v1iXLen.v1fp128(<1 x fp128>)
+
+define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) nounwind {
+; CHECK-i32-LABEL: lrint_v2fp128:
+; CHECK-i32: // %bb.0:
+; CHECK-i32-NEXT: sub sp, sp, #48
+; CHECK-i32-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-i32-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: fmov s0, w0
+; CHECK-i32-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-i32-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[1], w0
+; CHECK-i32-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-i32-NEXT: add sp, sp, #48
+; CHECK-i32-NEXT: ret
+;
+; CHECK-i64-LABEL: lrint_v2fp128:
+; CHECK-i64: // %bb.0:
+; CHECK-i64-NEXT: sub sp, sp, #48
+; CHECK-i64-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-i64-NEXT: mov v0.16b, v1.16b
+; CHECK-i64-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-i64-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: add sp, sp, #48
+; CHECK-i64-NEXT: ret
+ %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2fp128(<2 x fp128> %x)
+ ret <2 x iXLen> %a
+}
+declare <2 x iXLen> @llvm.lrint.v2iXLen.v2fp128(<2 x fp128>)
+
+define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) nounwind {
+; CHECK-i32-LABEL: lrint_v4fp128:
+; CHECK-i32: // %bb.0:
+; CHECK-i32-NEXT: sub sp, sp, #80
+; CHECK-i32-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
+; CHECK-i32-NEXT: stp q2, q3, [sp, #16] // 32-byte Folded Spill
+; CHECK-i32-NEXT: str q1, [sp] // 16-byte Folded Spill
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: fmov s0, w0
+; CHECK-i32-NEXT: str q0, [sp, #48] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[1], w0
+; CHECK-i32-NEXT: str q0, [sp, #48] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[2], w0
+; CHECK-i32-NEXT: str q0, [sp, #48] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-i32-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[3], w0
+; CHECK-i32-NEXT: add sp, sp, #80
+; CHECK-i32-NEXT: ret
+;
+; CHECK-i64-LABEL: lrint_v4fp128:
+; CHECK-i64: // %bb.0:
+; CHECK-i64-NEXT: sub sp, sp, #80
+; CHECK-i64-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-i64-NEXT: mov v0.16b, v1.16b
+; CHECK-i64-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
+; CHECK-i64-NEXT: stp q3, q2, [sp, #32] // 32-byte Folded Spill
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d1, x0
+; CHECK-i64-NEXT: ldp q0, q4, [sp, #16] // 32-byte Folded Reload
+; CHECK-i64-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-i64-NEXT: mov v1.d[1], v4.d[0]
+; CHECK-i64-NEXT: add sp, sp, #80
+; CHECK-i64-NEXT: ret
+ %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4fp128(<4 x fp128> %x)
+ ret <4 x iXLen> %a
+}
+declare <4 x iXLen> @llvm.lrint.v4iXLen.v4fp128(<4 x fp128>)
+
+define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) nounwind {
+; CHECK-i32-LABEL: lrint_v8fp128:
+; CHECK-i32: // %bb.0:
+; CHECK-i32-NEXT: sub sp, sp, #144
+; CHECK-i32-NEXT: str x30, [sp, #128] // 8-byte Folded Spill
+; CHECK-i32-NEXT: str q4, [sp, #96] // 16-byte Folded Spill
+; CHECK-i32-NEXT: stp q1, q2, [sp] // 32-byte Folded Spill
+; CHECK-i32-NEXT: stp q3, q5, [sp, #32] // 32-byte Folded Spill
+; CHECK-i32-NEXT: stp q6, q7, [sp, #64] // 32-byte Folded Spill
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: fmov s0, w0
+; CHECK-i32-NEXT: str q0, [sp, #112] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[1], w0
+; CHECK-i32-NEXT: str q0, [sp, #112] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[2], w0
+; CHECK-i32-NEXT: str q0, [sp, #112] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[3], w0
+; CHECK-i32-NEXT: str q0, [sp, #112] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: fmov s0, w0
+; CHECK-i32-NEXT: str q0, [sp, #96] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[1], w0
+; CHECK-i32-NEXT: str q0, [sp, #96] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[2], w0
+; CHECK-i32-NEXT: str q0, [sp, #96] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldp q1, q0, [sp, #96] // 32-byte Folded Reload
+; CHECK-i32-NEXT: ldr x30, [sp, #128] // 8-byte Folded Reload
+; CHECK-i32-NEXT: mov v1.s[3], w0
+; CHECK-i32-NEXT: add sp, sp, #144
+; CHECK-i32-NEXT: ret
+;
+; CHECK-i64-LABEL: lrint_v8fp128:
+; CHECK-i64: // %bb.0:
+; CHECK-i64-NEXT: sub sp, sp, #144
+; CHECK-i64-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-i64-NEXT: mov v0.16b, v1.16b
+; CHECK-i64-NEXT: str x30, [sp, #128] // 8-byte Folded Spill
+; CHECK-i64-NEXT: stp q3, q2, [sp, #16] // 32-byte Folded Spill
+; CHECK-i64-NEXT: stp q5, q4, [sp, #48] // 32-byte Folded Spill
+; CHECK-i64-NEXT: stp q7, q6, [sp, #96] // 32-byte Folded Spill
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #80] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #80] // 16-byte Folded Reload
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: str q0, [sp, #80] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #48] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: str q0, [sp, #64] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #96] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d3, x0
+; CHECK-i64-NEXT: ldp q0, q1, [sp, #80] // 32-byte Folded Reload
+; CHECK-i64-NEXT: ldr q2, [sp, #64] // 16-byte Folded Reload
+; CHECK-i64-NEXT: ldr x30, [sp, #128] // 8-byte Folded Reload
+; CHECK-i64-NEXT: mov v3.d[1], v1.d[0]
+; CHECK-i64-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-i64-NEXT: add sp, sp, #144
+; CHECK-i64-NEXT: ret
+ %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8fp128(<8 x fp128> %x)
+ ret <8 x iXLen> %a
+}
+declare <8 x iXLen> @llvm.lrint.v8iXLen.v8fp128(<8 x fp128>)
+
+define <16 x iXLen> @lrint_v16fp128(<16 x fp128> %x) nounwind {
+; CHECK-i32-LABEL: lrint_v16fp128:
+; CHECK-i32: // %bb.0:
+; CHECK-i32-NEXT: sub sp, sp, #272
+; CHECK-i32-NEXT: stp q1, q2, [sp] // 32-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #384]
+; CHECK-i32-NEXT: stp x29, x30, [sp, #256] // 16-byte Folded Spill
+; CHECK-i32-NEXT: str q1, [sp, #176] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #368]
+; CHECK-i32-NEXT: stp q3, q5, [sp, #32] // 32-byte Folded Spill
+; CHECK-i32-NEXT: str q1, [sp, #160] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #352]
+; CHECK-i32-NEXT: stp q7, q4, [sp, #208] // 32-byte Folded Spill
+; CHECK-i32-NEXT: str q1, [sp, #144] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #336]
+; CHECK-i32-NEXT: str q1, [sp, #192] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #320]
+; CHECK-i32-NEXT: str q1, [sp, #128] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #304]
+; CHECK-i32-NEXT: str q1, [sp, #112] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #288]
+; CHECK-i32-NEXT: stp q6, q1, [sp, #80] // 32-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #272]
+; CHECK-i32-NEXT: str q1, [sp, #64] // 16-byte Folded Spill
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: fmov s0, w0
+; CHECK-i32-NEXT: str q0, [sp, #240] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #240] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[1], w0
+; CHECK-i32-NEXT: str q0, [sp, #240] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #240] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[2], w0
+; CHECK-i32-NEXT: str q0, [sp, #240] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #240] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[3], w0
+; CHECK-i32-NEXT: str q0, [sp, #240] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: fmov s0, w0
+; CHECK-i32-NEXT: str q0, [sp, #224] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[1], w0
+; CHECK-i32-NEXT: str q0, [sp, #224] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[2], w0
+; CHECK-i32-NEXT: str q0, [sp, #224] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #208] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[3], w0
+; CHECK-i32-NEXT: str q0, [sp, #224] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: fmov s0, w0
+; CHECK-i32-NEXT: str q0, [sp, #208] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #208] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[1], w0
+; CHECK-i32-NEXT: str q0, [sp, #208] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #208] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[2], w0
+; CHECK-i32-NEXT: str q0, [sp, #208] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #128] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #208] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[3], w0
+; CHECK-i32-NEXT: str q0, [sp, #208] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #192] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: fmov s0, w0
+; CHECK-i32-NEXT: str q0, [sp, #192] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #192] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[1], w0
+; CHECK-i32-NEXT: str q0, [sp, #192] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #192] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[2], w0
+; CHECK-i32-NEXT: str q0, [sp, #192] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #176] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldp q3, q2, [sp, #192] // 32-byte Folded Reload
+; CHECK-i32-NEXT: ldp q1, q0, [sp, #224] // 32-byte Folded Reload
+; CHECK-i32-NEXT: ldp x29, x30, [sp, #256] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v3.s[3], w0
+; CHECK-i32-NEXT: add sp, sp, #272
+; CHECK-i32-NEXT: ret
+;
+; CHECK-i64-LABEL: lrint_v16fp128:
+; CHECK-i64: // %bb.0:
+; CHECK-i64-NEXT: sub sp, sp, #272
+; CHECK-i64-NEXT: str q2, [sp, #160] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q2, [sp, #368]
+; CHECK-i64-NEXT: stp q0, q3, [sp] // 32-byte Folded Spill
+; CHECK-i64-NEXT: mov v0.16b, v1.16b
+; CHECK-i64-NEXT: str q2, [sp, #240] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q2, [sp, #384]
+; CHECK-i64-NEXT: stp x29, x30, [sp, #256] // 16-byte Folded Spill
+; CHECK-i64-NEXT: str q2, [sp, #224] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q2, [sp, #336]
+; CHECK-i64-NEXT: stp q5, q7, [sp, #32] // 32-byte Folded Spill
+; CHECK-i64-NEXT: str q2, [sp, #192] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q2, [sp, #352]
+; CHECK-i64-NEXT: str q2, [sp, #176] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q2, [sp, #304]
+; CHECK-i64-NEXT: str q2, [sp, #144] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q2, [sp, #320]
+; CHECK-i64-NEXT: stp q4, q2, [sp, #112] // 32-byte Folded Spill
+; CHECK-i64-NEXT: ldr q2, [sp, #272]
+; CHECK-i64-NEXT: stp q6, q2, [sp, #80] // 32-byte Folded Spill
+; CHECK-i64-NEXT: ldr q2, [sp, #288]
+; CHECK-i64-NEXT: str q2, [sp, #64] // 16-byte Folded Spill
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #208] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #208] // 16-byte Folded Reload
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: str q0, [sp, #208] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: str q0, [sp, #160] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: str q0, [sp, #112] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #48] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: str q0, [sp, #80] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #64] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: str q0, [sp, #96] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #128] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #128] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #128] // 16-byte Folded Reload
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: str q0, [sp, #144] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #176] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #176] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #192] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #176] // 16-byte Folded Reload
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: str q0, [sp, #192] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #224] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #240] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d7, x0
+; CHECK-i64-NEXT: ldp q0, q1, [sp, #208] // 32-byte Folded Reload
+; CHECK-i64-NEXT: ldp q4, q2, [sp, #96] // 32-byte Folded Reload
+; CHECK-i64-NEXT: ldr q3, [sp, #80] // 16-byte Folded Reload
+; CHECK-i64-NEXT: ldp x29, x30, [sp, #256] // 16-byte Folded Reload
+; CHECK-i64-NEXT: ldr q6, [sp, #192] // 16-byte Folded Reload
+; CHECK-i64-NEXT: mov v7.d[1], v1.d[0]
+; CHECK-i64-NEXT: ldp q5, q1, [sp, #144] // 32-byte Folded Reload
+; CHECK-i64-NEXT: add sp, sp, #272
+; CHECK-i64-NEXT: ret
+ %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16fp128(<16 x fp128> %x)
+ ret <16 x iXLen> %a
+}
+declare <16 x iXLen> @llvm.lrint.v16iXLen.v16fp128(<16 x fp128>)
+
+define <32 x iXLen> @lrint_v32fp128(<32 x fp128> %x) nounwind {
+; CHECK-i32-LABEL: lrint_v32fp128:
+; CHECK-i32: // %bb.0:
+; CHECK-i32-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-i32-NEXT: sub sp, sp, #512
+; CHECK-i32-NEXT: str q1, [sp] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #896]
+; CHECK-i32-NEXT: stp q2, q3, [sp, #16] // 32-byte Folded Spill
+; CHECK-i32-NEXT: str q1, [sp, #368] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #880]
+; CHECK-i32-NEXT: stp q7, q4, [sp, #464] // 32-byte Folded Spill
+; CHECK-i32-NEXT: str q1, [sp, #352] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #864]
+; CHECK-i32-NEXT: str q6, [sp, #112] // 16-byte Folded Spill
+; CHECK-i32-NEXT: str q1, [sp, #336] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #848]
+; CHECK-i32-NEXT: str q5, [sp, #80] // 16-byte Folded Spill
+; CHECK-i32-NEXT: str q1, [sp, #384] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #832]
+; CHECK-i32-NEXT: str q1, [sp, #320] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #816]
+; CHECK-i32-NEXT: str q1, [sp, #304] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #800]
+; CHECK-i32-NEXT: str q1, [sp, #288] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #784]
+; CHECK-i32-NEXT: str q1, [sp, #400] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #768]
+; CHECK-i32-NEXT: str q1, [sp, #272] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #752]
+; CHECK-i32-NEXT: str q1, [sp, #256] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #736]
+; CHECK-i32-NEXT: str q1, [sp, #240] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #720]
+; CHECK-i32-NEXT: str q1, [sp, #416] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #704]
+; CHECK-i32-NEXT: str q1, [sp, #224] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #688]
+; CHECK-i32-NEXT: str q1, [sp, #208] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #672]
+; CHECK-i32-NEXT: str q1, [sp, #192] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #656]
+; CHECK-i32-NEXT: str q1, [sp, #432] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #640]
+; CHECK-i32-NEXT: str q1, [sp, #176] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #624]
+; CHECK-i32-NEXT: str q1, [sp, #160] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #608]
+; CHECK-i32-NEXT: str q1, [sp, #144] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #592]
+; CHECK-i32-NEXT: str q1, [sp, #448] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #576]
+; CHECK-i32-NEXT: str q1, [sp, #128] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #560]
+; CHECK-i32-NEXT: str q1, [sp, #96] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #544]
+; CHECK-i32-NEXT: str q1, [sp, #64] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q1, [sp, #528]
+; CHECK-i32-NEXT: str q1, [sp, #48] // 16-byte Folded Spill
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: fmov s0, w0
+; CHECK-i32-NEXT: str q0, [sp, #496] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #496] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[1], w0
+; CHECK-i32-NEXT: str q0, [sp, #496] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #496] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[2], w0
+; CHECK-i32-NEXT: str q0, [sp, #496] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #496] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[3], w0
+; CHECK-i32-NEXT: str q0, [sp, #496] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #480] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: fmov s0, w0
+; CHECK-i32-NEXT: str q0, [sp, #480] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #480] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[1], w0
+; CHECK-i32-NEXT: str q0, [sp, #480] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #480] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[2], w0
+; CHECK-i32-NEXT: str q0, [sp, #480] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #464] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #480] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[3], w0
+; CHECK-i32-NEXT: str q0, [sp, #480] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: fmov s0, w0
+; CHECK-i32-NEXT: str q0, [sp, #464] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #464] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[1], w0
+; CHECK-i32-NEXT: str q0, [sp, #464] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #464] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[2], w0
+; CHECK-i32-NEXT: str q0, [sp, #464] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #128] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #464] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[3], w0
+; CHECK-i32-NEXT: str q0, [sp, #464] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #448] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: fmov s0, w0
+; CHECK-i32-NEXT: str q0, [sp, #448] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #448] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[1], w0
+; CHECK-i32-NEXT: str q0, [sp, #448] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #448] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[2], w0
+; CHECK-i32-NEXT: str q0, [sp, #448] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #176] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #448] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[3], w0
+; CHECK-i32-NEXT: str q0, [sp, #448] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #432] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: fmov s0, w0
+; CHECK-i32-NEXT: str q0, [sp, #432] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #192] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #432] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[1], w0
+; CHECK-i32-NEXT: str q0, [sp, #432] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #208] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #432] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[2], w0
+; CHECK-i32-NEXT: str q0, [sp, #432] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #432] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[3], w0
+; CHECK-i32-NEXT: str q0, [sp, #432] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #416] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: fmov s0, w0
+; CHECK-i32-NEXT: str q0, [sp, #416] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #240] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #416] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[1], w0
+; CHECK-i32-NEXT: str q0, [sp, #416] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #256] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #416] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[2], w0
+; CHECK-i32-NEXT: str q0, [sp, #416] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #272] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #416] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[3], w0
+; CHECK-i32-NEXT: str q0, [sp, #416] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #400] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: fmov s0, w0
+; CHECK-i32-NEXT: str q0, [sp, #400] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #288] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #400] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[1], w0
+; CHECK-i32-NEXT: str q0, [sp, #400] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #304] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #400] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[2], w0
+; CHECK-i32-NEXT: str q0, [sp, #400] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #320] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #400] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[3], w0
+; CHECK-i32-NEXT: str q0, [sp, #400] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #384] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: fmov s0, w0
+; CHECK-i32-NEXT: str q0, [sp, #384] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #336] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #384] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[1], w0
+; CHECK-i32-NEXT: str q0, [sp, #384] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #352] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldr q0, [sp, #384] // 16-byte Folded Reload
+; CHECK-i32-NEXT: mov v0.s[2], w0
+; CHECK-i32-NEXT: str q0, [sp, #384] // 16-byte Folded Spill
+; CHECK-i32-NEXT: ldr q0, [sp, #368] // 16-byte Folded Reload
+; CHECK-i32-NEXT: bl lrintl
+; CHECK-i32-NEXT: ldp q7, q6, [sp, #384] // 32-byte Folded Reload
+; CHECK-i32-NEXT: ldp q1, q0, [sp, #480] // 32-byte Folded Reload
+; CHECK-i32-NEXT: ldp q3, q2, [sp, #448] // 32-byte Folded Reload
+; CHECK-i32-NEXT: ldp q5, q4, [sp, #416] // 32-byte Folded Reload
+; CHECK-i32-NEXT: mov v7.s[3], w0
+; CHECK-i32-NEXT: add sp, sp, #512
+; CHECK-i32-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-i32-NEXT: ret
+;
+; CHECK-i64-LABEL: lrint_v32fp128:
+; CHECK-i64: // %bb.0:
+; CHECK-i64-NEXT: str x29, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-i64-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-i64-NEXT: sub sp, sp, #512
+; CHECK-i64-NEXT: str q0, [sp, #464] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #896]
+; CHECK-i64-NEXT: mov x19, x8
+; CHECK-i64-NEXT: str q7, [sp, #272] // 16-byte Folded Spill
+; CHECK-i64-NEXT: str q0, [sp, #496] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #912]
+; CHECK-i64-NEXT: str q6, [sp, #320] // 16-byte Folded Spill
+; CHECK-i64-NEXT: str q0, [sp, #480] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #864]
+; CHECK-i64-NEXT: stp q3, q5, [sp, #16] // 32-byte Folded Spill
+; CHECK-i64-NEXT: str q0, [sp, #448] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #880]
+; CHECK-i64-NEXT: stp q2, q0, [sp, #416] // 32-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #832]
+; CHECK-i64-NEXT: str q0, [sp, #400] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #848]
+; CHECK-i64-NEXT: stp q4, q0, [sp, #368] // 32-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #800]
+; CHECK-i64-NEXT: str q0, [sp, #352] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #816]
+; CHECK-i64-NEXT: str q0, [sp, #336] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #768]
+; CHECK-i64-NEXT: str q0, [sp, #304] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #784]
+; CHECK-i64-NEXT: str q0, [sp, #288] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #736]
+; CHECK-i64-NEXT: str q0, [sp, #256] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #752]
+; CHECK-i64-NEXT: str q0, [sp, #240] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #704]
+; CHECK-i64-NEXT: str q0, [sp, #208] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #720]
+; CHECK-i64-NEXT: str q0, [sp, #192] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #672]
+; CHECK-i64-NEXT: str q0, [sp, #160] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #688]
+; CHECK-i64-NEXT: str q0, [sp, #128] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #640]
+; CHECK-i64-NEXT: str q0, [sp, #144] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #656]
+; CHECK-i64-NEXT: str q0, [sp, #112] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #608]
+; CHECK-i64-NEXT: str q0, [sp, #176] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #624]
+; CHECK-i64-NEXT: str q0, [sp, #96] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #576]
+; CHECK-i64-NEXT: str q0, [sp, #224] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #592]
+; CHECK-i64-NEXT: str q0, [sp, #80] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #544]
+; CHECK-i64-NEXT: str q0, [sp, #64] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #560]
+; CHECK-i64-NEXT: str q0, [sp, #48] // 16-byte Folded Spill
+; CHECK-i64-NEXT: mov v0.16b, v1.16b
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #464] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: str q0, [sp, #464] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #416] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: str q0, [sp, #416] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #368] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: str q0, [sp, #368] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #272] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #272] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #320] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #272] // 16-byte Folded Reload
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: str q0, [sp, #320] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #272] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #272] // 16-byte Folded Reload
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: str q0, [sp, #272] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #80] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #80] // 16-byte Folded Reload
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: str q0, [sp, #224] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #96] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #176] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #96] // 16-byte Folded Reload
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: str q0, [sp, #176] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #112] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #112] // 16-byte Folded Reload
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: str q0, [sp, #144] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #128] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #128] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #128] // 16-byte Folded Reload
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: str q0, [sp, #160] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #192] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #192] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #208] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #192] // 16-byte Folded Reload
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: str q0, [sp, #208] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #240] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #240] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #256] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #240] // 16-byte Folded Reload
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: str q0, [sp, #256] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #288] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #288] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #304] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #288] // 16-byte Folded Reload
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: str q0, [sp, #304] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #336] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #336] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #352] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #336] // 16-byte Folded Reload
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: str q0, [sp, #352] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #384] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #384] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #400] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #384] // 16-byte Folded Reload
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: str q0, [sp, #400] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #432] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #432] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #448] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #432] // 16-byte Folded Reload
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: str q0, [sp, #448] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #480] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: str q0, [sp, #480] // 16-byte Folded Spill
+; CHECK-i64-NEXT: ldr q0, [sp, #496] // 16-byte Folded Reload
+; CHECK-i64-NEXT: bl lrintl
+; CHECK-i64-NEXT: ldr q0, [sp, #400] // 16-byte Folded Reload
+; CHECK-i64-NEXT: str q0, [x19, #208]
+; CHECK-i64-NEXT: fmov d0, x0
+; CHECK-i64-NEXT: ldr q1, [sp, #480] // 16-byte Folded Reload
+; CHECK-i64-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT: ldr q1, [sp, #352] // 16-byte Folded Reload
+; CHECK-i64-NEXT: str q1, [x19, #192]
+; CHECK-i64-NEXT: ldr q1, [sp, #304] // 16-byte Folded Reload
+; CHECK-i64-NEXT: str q1, [x19, #176]
+; CHECK-i64-NEXT: ldr q1, [sp, #256] // 16-byte Folded Reload
+; CHECK-i64-NEXT: str q0, [x19, #240]
+; CHECK-i64-NEXT: str q1, [x19, #160]
+; CHECK-i64-NEXT: ldr q0, [sp, #448] // 16-byte Folded Reload
+; CHECK-i64-NEXT: str q0, [x19, #224]
+; CHECK-i64-NEXT: ldr q0, [sp, #208] // 16-byte Folded Reload
+; CHECK-i64-NEXT: str q0, [x19, #144]
+; CHECK-i64-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload
+; CHECK-i64-NEXT: str q0, [x19, #128]
+; CHECK-i64-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload
+; CHECK-i64-NEXT: str q0, [x19, #112]
+; CHECK-i64-NEXT: ldr q0, [sp, #176] // 16-byte Folded Reload
+; CHECK-i64-NEXT: str q0, [x19, #96]
+; CHECK-i64-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload
+; CHECK-i64-NEXT: str q0, [x19, #80]
+; CHECK-i64-NEXT: ldr q0, [sp, #272] // 16-byte Folded Reload
+; CHECK-i64-NEXT: str q0, [x19, #64]
+; CHECK-i64-NEXT: ldr q0, [sp, #320] // 16-byte Folded Reload
+; CHECK-i64-NEXT: str q0, [x19, #48]
+; CHECK-i64-NEXT: ldr q0, [sp, #368] // 16-byte Folded Reload
+; CHECK-i64-NEXT: str q0, [x19, #32]
+; CHECK-i64-NEXT: ldr q0, [sp, #416] // 16-byte Folded Reload
+; CHECK-i64-NEXT: str q0, [x19, #16]
+; CHECK-i64-NEXT: ldr q0, [sp, #464] // 16-byte Folded Reload
+; CHECK-i64-NEXT: str q0, [x19]
+; CHECK-i64-NEXT: add sp, sp, #512
+; CHECK-i64-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-i64-NEXT: ldr x29, [sp], #32 // 8-byte Folded Reload
+; CHECK-i64-NEXT: ret
+ %a = call <32 x iXLen> @llvm.lrint.v32iXLen.v16fp128(<32 x fp128> %x)
+ ret <32 x iXLen> %a
+}
+declare <32 x iXLen> @llvm.lrint.v32iXLen.v32fp128(<32 x fp128>)
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-i32-GI: {{.*}}
diff --git a/llvm/test/CodeGen/ARM/llrint-conv.ll b/llvm/test/CodeGen/ARM/llrint-conv.ll
index 017955bb43afb..f0fb2e7543be6 100644
--- a/llvm/test/CodeGen/ARM/llrint-conv.ll
+++ b/llvm/test/CodeGen/ARM/llrint-conv.ll
@@ -1,6 +1,16 @@
; RUN: llc < %s -mtriple=arm-eabi -float-abi=soft | FileCheck %s --check-prefix=SOFTFP
; RUN: llc < %s -mtriple=arm-eabi -float-abi=hard | FileCheck %s --check-prefix=HARDFP
+; SOFTFP-LABEL: testmsxh_builtin:
+; SOFTFP: bl llrintf
+; HARDFP-LABEL: testmsxh_builtin:
+; HARDFP: bl llrintf
+define i64 @testmsxh_builtin(half %x) {
+entry:
+ %0 = tail call i64 @llvm.llrint.f16(half %x)
+ ret i64 %0
+}
+
; SOFTFP-LABEL: testmsxs_builtin:
; SOFTFP: bl llrintf
; HARDFP-LABEL: testmsxs_builtin:
@@ -21,5 +31,16 @@ entry:
ret i64 %0
}
+; FIXME(#44744): incorrect libcall
+; SOFTFP-LABEL: testmsxq_builtin:
+; SOFTFP: bl llrintl
+; HARDFP-LABEL: testmsxq_builtin:
+; HARDFP: bl llrintl
+define i64 @testmsxq_builtin(fp128 %x) {
+entry:
+ %0 = tail call i64 @llvm.llrint.f128(fp128 %x)
+ ret i64 %0
+}
+
declare i64 @llvm.llrint.f32(float) nounwind readnone
declare i64 @llvm.llrint.f64(double) nounwind readnone
diff --git a/llvm/test/CodeGen/ARM/lrint-conv.ll b/llvm/test/CodeGen/ARM/lrint-conv.ll
index 192da565c12fd..9aa95112af533 100644
--- a/llvm/test/CodeGen/ARM/lrint-conv.ll
+++ b/llvm/test/CodeGen/ARM/lrint-conv.ll
@@ -1,6 +1,13 @@
; RUN: llc < %s -mtriple=arm-eabi -float-abi=soft | FileCheck %s --check-prefix=SOFTFP
; RUN: llc < %s -mtriple=arm-eabi -float-abi=hard | FileCheck %s --check-prefix=HARDFP
+; FIXME: crash
+; define i32 @testmswh_builtin(half %x) {
+; entry:
+; %0 = tail call i32 @llvm.lrint.i32.f16(half %x)
+; ret i32 %0
+; }
+
; SOFTFP-LABEL: testmsws_builtin:
; SOFTFP: bl lrintf
; HARDFP-LABEL: testmsws_builtin:
@@ -21,5 +28,16 @@ entry:
ret i32 %0
}
+; FIXME(#44744): incorrect libcall
+; SOFTFP-LABEL: testmswq_builtin:
+; SOFTFP: bl lrintl
+; HARDFP-LABEL: testmswq_builtin:
+; HARDFP: bl lrintl
+define i32 @testmswq_builtin(fp128 %x) {
+entry:
+ %0 = tail call i32 @llvm.lrint.i32.f128(fp128 %x)
+ ret i32 %0
+}
+
declare i32 @llvm.lrint.i32.f32(float) nounwind readnone
declare i32 @llvm.lrint.i32.f64(double) nounwind readnone
diff --git a/llvm/test/CodeGen/ARM/vector-llrint.ll b/llvm/test/CodeGen/ARM/vector-llrint.ll
new file mode 100644
index 0000000000000..870947fac063e
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/vector-llrint.ll
@@ -0,0 +1,11126 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armv7-unknown-none-eabihf | FileCheck %s --check-prefix=LE
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armv7-unknown-none-eabihf | FileCheck %s --check-prefix=LE
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefix=LE-NEON
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefix=LE-NEON
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armebv7-unknown-none-eabihf | FileCheck %s --check-prefix=BE
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armebv7-unknown-none-eabihf | FileCheck %s --check-prefix=BE
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefix=BE-NEON
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefix=BE-NEON
+
+define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) {
+; LE-LABEL: llrint_v1i64_v1f16:
+; LE: @ %bb.0:
+; LE-NEXT: .save {r11, lr}
+; LE-NEXT: push {r11, lr}
+; LE-NEXT: vmov r0, s0
+; LE-NEXT: bl __aeabi_f2h
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: vmov s0, r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.32 d0[0], r0
+; LE-NEXT: vmov.32 d0[1], r1
+; LE-NEXT: pop {r11, pc}
+;
+; LE-NEON-LABEL: llrint_v1i64_v1f16:
+; LE-NEON: @ %bb.0:
+; LE-NEON-NEXT: .save {r11, lr}
+; LE-NEON-NEXT: push {r11, lr}
+; LE-NEON-NEXT: vmov r0, s0
+; LE-NEON-NEXT: bl __aeabi_f2h
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: vmov s0, r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.32 d0[0], r0
+; LE-NEON-NEXT: vmov.32 d0[1], r1
+; LE-NEON-NEXT: pop {r11, pc}
+;
+; BE-LABEL: llrint_v1i64_v1f16:
+; BE: @ %bb.0:
+; BE-NEXT: .save {r11, lr}
+; BE-NEXT: push {r11, lr}
+; BE-NEXT: vmov r0, s0
+; BE-NEXT: bl __aeabi_f2h
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: vmov s0, r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.32 d16[0], r0
+; BE-NEXT: vmov.32 d16[1], r1
+; BE-NEXT: vrev64.32 d0, d16
+; BE-NEXT: pop {r11, pc}
+;
+; BE-NEON-LABEL: llrint_v1i64_v1f16:
+; BE-NEON: @ %bb.0:
+; BE-NEON-NEXT: .save {r11, lr}
+; BE-NEON-NEXT: push {r11, lr}
+; BE-NEON-NEXT: vmov r0, s0
+; BE-NEON-NEXT: bl __aeabi_f2h
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: vmov s0, r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.32 d16[0], r0
+; BE-NEON-NEXT: vmov.32 d16[1], r1
+; BE-NEON-NEXT: vrev64.32 d0, d16
+; BE-NEON-NEXT: pop {r11, pc}
+ %a = call <1 x i64> @llvm.llrint.v1i64.v1f16(<1 x half> %x)
+ ret <1 x i64> %a
+}
+declare <1 x i64> @llvm.llrint.v1i64.v1f16(<1 x half>)
+
+define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) {
+; LE-LABEL: llrint_v1i64_v2f16:
+; LE: @ %bb.0:
+; LE-NEXT: .save {r4, r5, r11, lr}
+; LE-NEXT: push {r4, r5, r11, lr}
+; LE-NEXT: .vsave {d8, d9}
+; LE-NEXT: vpush {d8, d9}
+; LE-NEXT: vmov r0, s1
+; LE-NEXT: vmov.f32 s16, s0
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: vmov s0, r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: mov r4, r0
+; LE-NEXT: vmov r0, s16
+; LE-NEXT: mov r5, r1
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: vmov s0, r0
+; LE-NEXT: vmov.32 d9[0], r4
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.32 d8[0], r0
+; LE-NEXT: vmov.32 d9[1], r5
+; LE-NEXT: vmov.32 d8[1], r1
+; LE-NEXT: vorr q0, q4, q4
+; LE-NEXT: vpop {d8, d9}
+; LE-NEXT: pop {r4, r5, r11, pc}
+;
+; LE-NEON-LABEL: llrint_v1i64_v2f16:
+; LE-NEON: @ %bb.0:
+; LE-NEON-NEXT: .save {r4, r5, r11, lr}
+; LE-NEON-NEXT: push {r4, r5, r11, lr}
+; LE-NEON-NEXT: .vsave {d8, d9}
+; LE-NEON-NEXT: vpush {d8, d9}
+; LE-NEON-NEXT: vmov r0, s1
+; LE-NEON-NEXT: vmov.f32 s16, s0
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: vmov s0, r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: mov r4, r0
+; LE-NEON-NEXT: vmov r0, s16
+; LE-NEON-NEXT: mov r5, r1
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: vmov s0, r0
+; LE-NEON-NEXT: vmov.32 d9[0], r4
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.32 d8[0], r0
+; LE-NEON-NEXT: vmov.32 d9[1], r5
+; LE-NEON-NEXT: vmov.32 d8[1], r1
+; LE-NEON-NEXT: vorr q0, q4, q4
+; LE-NEON-NEXT: vpop {d8, d9}
+; LE-NEON-NEXT: pop {r4, r5, r11, pc}
+;
+; BE-LABEL: llrint_v1i64_v2f16:
+; BE: @ %bb.0:
+; BE-NEXT: .save {r4, r5, r11, lr}
+; BE-NEXT: push {r4, r5, r11, lr}
+; BE-NEXT: .vsave {d8}
+; BE-NEXT: vpush {d8}
+; BE-NEXT: vmov r0, s1
+; BE-NEXT: vmov.f32 s16, s0
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: vmov s0, r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: mov r4, r0
+; BE-NEXT: vmov r0, s16
+; BE-NEXT: mov r5, r1
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: vmov s0, r0
+; BE-NEXT: vmov.32 d8[0], r4
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.32 d16[0], r0
+; BE-NEXT: vmov.32 d8[1], r5
+; BE-NEXT: vmov.32 d16[1], r1
+; BE-NEXT: vrev64.32 d1, d8
+; BE-NEXT: vrev64.32 d0, d16
+; BE-NEXT: vpop {d8}
+; BE-NEXT: pop {r4, r5, r11, pc}
+;
+; BE-NEON-LABEL: llrint_v1i64_v2f16:
+; BE-NEON: @ %bb.0:
+; BE-NEON-NEXT: .save {r4, r5, r11, lr}
+; BE-NEON-NEXT: push {r4, r5, r11, lr}
+; BE-NEON-NEXT: .vsave {d8}
+; BE-NEON-NEXT: vpush {d8}
+; BE-NEON-NEXT: vmov r0, s1
+; BE-NEON-NEXT: vmov.f32 s16, s0
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: vmov s0, r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: mov r4, r0
+; BE-NEON-NEXT: vmov r0, s16
+; BE-NEON-NEXT: mov r5, r1
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: vmov s0, r0
+; BE-NEON-NEXT: vmov.32 d8[0], r4
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.32 d16[0], r0
+; BE-NEON-NEXT: vmov.32 d8[1], r5
+; BE-NEON-NEXT: vmov.32 d16[1], r1
+; BE-NEON-NEXT: vrev64.32 d1, d8
+; BE-NEON-NEXT: vrev64.32 d0, d16
+; BE-NEON-NEXT: vpop {d8}
+; BE-NEON-NEXT: pop {r4, r5, r11, pc}
+ %a = call <2 x i64> @llvm.llrint.v2i64.v2f16(<2 x half> %x)
+ ret <2 x i64> %a
+}
+declare <2 x i64> @llvm.llrint.v2i64.v2f16(<2 x half>)
+
+define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) {
+; LE-LABEL: llrint_v4i64_v4f16:
+; LE: @ %bb.0:
+; LE-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; LE-NEXT: push {r4, r5, r6, r7, r11, lr}
+; LE-NEXT: .vsave {d12, d13}
+; LE-NEXT: vpush {d12, d13}
+; LE-NEXT: .vsave {d8, d9, d10}
+; LE-NEXT: vpush {d8, d9, d10}
+; LE-NEXT: vmov r0, s1
+; LE-NEXT: vmov.f32 s16, s3
+; LE-NEXT: vmov.f32 s20, s2
+; LE-NEXT: vmov.f32 s18, s0
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: vmov s0, r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: mov r5, r0
+; LE-NEXT: vmov r0, s18
+; LE-NEXT: mov r4, r1
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: mov r7, r0
+; LE-NEXT: vmov r0, s16
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: vmov s0, r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov s0, r7
+; LE-NEXT: mov r6, r1
+; LE-NEXT: vmov.32 d9[0], r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.32 d12[0], r0
+; LE-NEXT: vmov r0, s20
+; LE-NEXT: mov r7, r1
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: vmov s0, r0
+; LE-NEXT: vmov.32 d13[0], r5
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.32 d8[0], r0
+; LE-NEXT: vmov.32 d13[1], r4
+; LE-NEXT: vmov.32 d9[1], r6
+; LE-NEXT: vmov.32 d12[1], r7
+; LE-NEXT: vmov.32 d8[1], r1
+; LE-NEXT: vorr q0, q6, q6
+; LE-NEXT: vorr q1, q4, q4
+; LE-NEXT: vpop {d8, d9, d10}
+; LE-NEXT: vpop {d12, d13}
+; LE-NEXT: pop {r4, r5, r6, r7, r11, pc}
+;
+; LE-NEON-LABEL: llrint_v4i64_v4f16:
+; LE-NEON: @ %bb.0:
+; LE-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; LE-NEON-NEXT: push {r4, r5, r6, r7, r11, lr}
+; LE-NEON-NEXT: .vsave {d12, d13}
+; LE-NEON-NEXT: vpush {d12, d13}
+; LE-NEON-NEXT: .vsave {d8, d9, d10}
+; LE-NEON-NEXT: vpush {d8, d9, d10}
+; LE-NEON-NEXT: vmov r0, s1
+; LE-NEON-NEXT: vmov.f32 s16, s3
+; LE-NEON-NEXT: vmov.f32 s20, s2
+; LE-NEON-NEXT: vmov.f32 s18, s0
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: vmov s0, r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: mov r5, r0
+; LE-NEON-NEXT: vmov r0, s18
+; LE-NEON-NEXT: mov r4, r1
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: mov r7, r0
+; LE-NEON-NEXT: vmov r0, s16
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: vmov s0, r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov s0, r7
+; LE-NEON-NEXT: mov r6, r1
+; LE-NEON-NEXT: vmov.32 d9[0], r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.32 d12[0], r0
+; LE-NEON-NEXT: vmov r0, s20
+; LE-NEON-NEXT: mov r7, r1
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: vmov s0, r0
+; LE-NEON-NEXT: vmov.32 d13[0], r5
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.32 d8[0], r0
+; LE-NEON-NEXT: vmov.32 d13[1], r4
+; LE-NEON-NEXT: vmov.32 d9[1], r6
+; LE-NEON-NEXT: vmov.32 d12[1], r7
+; LE-NEON-NEXT: vmov.32 d8[1], r1
+; LE-NEON-NEXT: vorr q0, q6, q6
+; LE-NEON-NEXT: vorr q1, q4, q4
+; LE-NEON-NEXT: vpop {d8, d9, d10}
+; LE-NEON-NEXT: vpop {d12, d13}
+; LE-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc}
+;
+; BE-LABEL: llrint_v4i64_v4f16:
+; BE: @ %bb.0:
+; BE-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; BE-NEXT: push {r4, r5, r6, r7, r11, lr}
+; BE-NEXT: .vsave {d8, d9, d10}
+; BE-NEXT: vpush {d8, d9, d10}
+; BE-NEXT: vmov r0, s1
+; BE-NEXT: vmov.f32 s16, s3
+; BE-NEXT: vmov.f32 s18, s2
+; BE-NEXT: vmov.f32 s20, s0
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: vmov s0, r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: mov r5, r0
+; BE-NEXT: vmov r0, s20
+; BE-NEXT: mov r4, r1
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: mov r7, r0
+; BE-NEXT: vmov r0, s16
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: vmov s0, r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov s0, r7
+; BE-NEXT: mov r6, r1
+; BE-NEXT: vmov.32 d8[0], r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.32 d10[0], r0
+; BE-NEXT: vmov r0, s18
+; BE-NEXT: mov r7, r1
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: vmov s0, r0
+; BE-NEXT: vmov.32 d9[0], r5
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.32 d16[0], r0
+; BE-NEXT: vmov.32 d9[1], r4
+; BE-NEXT: vmov.32 d8[1], r6
+; BE-NEXT: vmov.32 d10[1], r7
+; BE-NEXT: vmov.32 d16[1], r1
+; BE-NEXT: vrev64.32 d1, d9
+; BE-NEXT: vrev64.32 d3, d8
+; BE-NEXT: vrev64.32 d0, d10
+; BE-NEXT: vrev64.32 d2, d16
+; BE-NEXT: vpop {d8, d9, d10}
+; BE-NEXT: pop {r4, r5, r6, r7, r11, pc}
+;
+; BE-NEON-LABEL: llrint_v4i64_v4f16:
+; BE-NEON: @ %bb.0:
+; BE-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; BE-NEON-NEXT: push {r4, r5, r6, r7, r11, lr}
+; BE-NEON-NEXT: .vsave {d8, d9, d10}
+; BE-NEON-NEXT: vpush {d8, d9, d10}
+; BE-NEON-NEXT: vmov r0, s1
+; BE-NEON-NEXT: vmov.f32 s16, s3
+; BE-NEON-NEXT: vmov.f32 s18, s2
+; BE-NEON-NEXT: vmov.f32 s20, s0
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: vmov s0, r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: mov r5, r0
+; BE-NEON-NEXT: vmov r0, s20
+; BE-NEON-NEXT: mov r4, r1
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: mov r7, r0
+; BE-NEON-NEXT: vmov r0, s16
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: vmov s0, r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov s0, r7
+; BE-NEON-NEXT: mov r6, r1
+; BE-NEON-NEXT: vmov.32 d8[0], r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.32 d10[0], r0
+; BE-NEON-NEXT: vmov r0, s18
+; BE-NEON-NEXT: mov r7, r1
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: vmov s0, r0
+; BE-NEON-NEXT: vmov.32 d9[0], r5
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.32 d16[0], r0
+; BE-NEON-NEXT: vmov.32 d9[1], r4
+; BE-NEON-NEXT: vmov.32 d8[1], r6
+; BE-NEON-NEXT: vmov.32 d10[1], r7
+; BE-NEON-NEXT: vmov.32 d16[1], r1
+; BE-NEON-NEXT: vrev64.32 d1, d9
+; BE-NEON-NEXT: vrev64.32 d3, d8
+; BE-NEON-NEXT: vrev64.32 d0, d10
+; BE-NEON-NEXT: vrev64.32 d2, d16
+; BE-NEON-NEXT: vpop {d8, d9, d10}
+; BE-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc}
+ %a = call <4 x i64> @llvm.llrint.v4i64.v4f16(<4 x half> %x)
+ ret <4 x i64> %a
+}
+declare <4 x i64> @llvm.llrint.v4i64.v4f16(<4 x half>)
+
+define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) {
+; LE-LABEL: llrint_v8i64_v8f16:
+; LE: @ %bb.0:
+; LE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEXT: .pad #4
+; LE-NEXT: sub sp, sp, #4
+; LE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT: .pad #8
+; LE-NEXT: sub sp, sp, #8
+; LE-NEXT: vmov r0, s1
+; LE-NEXT: vstr s6, [sp, #4] @ 4-byte Spill
+; LE-NEXT: vmov.f32 s16, s7
+; LE-NEXT: vmov.f32 s18, s5
+; LE-NEXT: vmov.f32 s20, s4
+; LE-NEXT: vmov.f32 s22, s3
+; LE-NEXT: vmov.f32 s24, s2
+; LE-NEXT: vmov.f32 s26, s0
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: vmov s0, r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: mov r9, r0
+; LE-NEXT: vmov r0, s26
+; LE-NEXT: str r1, [sp] @ 4-byte Spill
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: mov r10, r0
+; LE-NEXT: vmov r0, s22
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: mov r5, r0
+; LE-NEXT: vmov r0, s24
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: mov r7, r0
+; LE-NEXT: vmov r0, s18
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: mov r6, r0
+; LE-NEXT: vmov r0, s20
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: mov r4, r0
+; LE-NEXT: vmov r0, s16
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: vmov s0, r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov s0, r4
+; LE-NEXT: mov r11, r1
+; LE-NEXT: vmov.32 d11[0], r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov s0, r6
+; LE-NEXT: mov r8, r1
+; LE-NEXT: vmov.32 d12[0], r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov s0, r7
+; LE-NEXT: mov r6, r1
+; LE-NEXT: vmov.32 d13[0], r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov s0, r5
+; LE-NEXT: mov r7, r1
+; LE-NEXT: vmov.32 d14[0], r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov s0, r10
+; LE-NEXT: mov r5, r1
+; LE-NEXT: vmov.32 d15[0], r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vldr s0, [sp, #4] @ 4-byte Reload
+; LE-NEXT: mov r4, r1
+; LE-NEXT: vmov.32 d8[0], r0
+; LE-NEXT: vmov r0, s0
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: vmov s0, r0
+; LE-NEXT: vmov.32 d9[0], r9
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.32 d10[0], r0
+; LE-NEXT: ldr r0, [sp] @ 4-byte Reload
+; LE-NEXT: vmov.32 d15[1], r5
+; LE-NEXT: vmov.32 d9[1], r0
+; LE-NEXT: vmov.32 d13[1], r6
+; LE-NEXT: vmov.32 d11[1], r11
+; LE-NEXT: vmov.32 d8[1], r4
+; LE-NEXT: vmov.32 d14[1], r7
+; LE-NEXT: vorr q0, q4, q4
+; LE-NEXT: vmov.32 d12[1], r8
+; LE-NEXT: vorr q1, q7, q7
+; LE-NEXT: vmov.32 d10[1], r1
+; LE-NEXT: vorr q2, q6, q6
+; LE-NEXT: vorr q3, q5, q5
+; LE-NEXT: add sp, sp, #8
+; LE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT: add sp, sp, #4
+; LE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; LE-NEON-LABEL: llrint_v8i64_v8f16:
+; LE-NEON: @ %bb.0:
+; LE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEON-NEXT: .pad #4
+; LE-NEON-NEXT: sub sp, sp, #4
+; LE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT: .pad #8
+; LE-NEON-NEXT: sub sp, sp, #8
+; LE-NEON-NEXT: vmov r0, s1
+; LE-NEON-NEXT: vstr s6, [sp, #4] @ 4-byte Spill
+; LE-NEON-NEXT: vmov.f32 s16, s7
+; LE-NEON-NEXT: vmov.f32 s18, s5
+; LE-NEON-NEXT: vmov.f32 s20, s4
+; LE-NEON-NEXT: vmov.f32 s22, s3
+; LE-NEON-NEXT: vmov.f32 s24, s2
+; LE-NEON-NEXT: vmov.f32 s26, s0
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: vmov s0, r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: mov r9, r0
+; LE-NEON-NEXT: vmov r0, s26
+; LE-NEON-NEXT: str r1, [sp] @ 4-byte Spill
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: mov r10, r0
+; LE-NEON-NEXT: vmov r0, s22
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: mov r5, r0
+; LE-NEON-NEXT: vmov r0, s24
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: mov r7, r0
+; LE-NEON-NEXT: vmov r0, s18
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: mov r6, r0
+; LE-NEON-NEXT: vmov r0, s20
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: mov r4, r0
+; LE-NEON-NEXT: vmov r0, s16
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: vmov s0, r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov s0, r4
+; LE-NEON-NEXT: mov r11, r1
+; LE-NEON-NEXT: vmov.32 d11[0], r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov s0, r6
+; LE-NEON-NEXT: mov r8, r1
+; LE-NEON-NEXT: vmov.32 d12[0], r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov s0, r7
+; LE-NEON-NEXT: mov r6, r1
+; LE-NEON-NEXT: vmov.32 d13[0], r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov s0, r5
+; LE-NEON-NEXT: mov r7, r1
+; LE-NEON-NEXT: vmov.32 d14[0], r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov s0, r10
+; LE-NEON-NEXT: mov r5, r1
+; LE-NEON-NEXT: vmov.32 d15[0], r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vldr s0, [sp, #4] @ 4-byte Reload
+; LE-NEON-NEXT: mov r4, r1
+; LE-NEON-NEXT: vmov.32 d8[0], r0
+; LE-NEON-NEXT: vmov r0, s0
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: vmov s0, r0
+; LE-NEON-NEXT: vmov.32 d9[0], r9
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.32 d10[0], r0
+; LE-NEON-NEXT: ldr r0, [sp] @ 4-byte Reload
+; LE-NEON-NEXT: vmov.32 d15[1], r5
+; LE-NEON-NEXT: vmov.32 d9[1], r0
+; LE-NEON-NEXT: vmov.32 d13[1], r6
+; LE-NEON-NEXT: vmov.32 d11[1], r11
+; LE-NEON-NEXT: vmov.32 d8[1], r4
+; LE-NEON-NEXT: vmov.32 d14[1], r7
+; LE-NEON-NEXT: vorr q0, q4, q4
+; LE-NEON-NEXT: vmov.32 d12[1], r8
+; LE-NEON-NEXT: vorr q1, q7, q7
+; LE-NEON-NEXT: vmov.32 d10[1], r1
+; LE-NEON-NEXT: vorr q2, q6, q6
+; LE-NEON-NEXT: vorr q3, q5, q5
+; LE-NEON-NEXT: add sp, sp, #8
+; LE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT: add sp, sp, #4
+; LE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-LABEL: llrint_v8i64_v8f16:
+; BE: @ %bb.0:
+; BE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEXT: .pad #4
+; BE-NEXT: sub sp, sp, #4
+; BE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14}
+; BE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14}
+; BE-NEXT: .pad #8
+; BE-NEXT: sub sp, sp, #8
+; BE-NEXT: vmov r0, s1
+; BE-NEXT: vmov.f32 s18, s7
+; BE-NEXT: vmov.f32 s16, s6
+; BE-NEXT: vmov.f32 s20, s5
+; BE-NEXT: vmov.f32 s22, s4
+; BE-NEXT: vmov.f32 s24, s3
+; BE-NEXT: vmov.f32 s26, s2
+; BE-NEXT: vmov.f32 s28, s0
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: vmov s0, r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: mov r9, r0
+; BE-NEXT: vmov r0, s28
+; BE-NEXT: str r1, [sp, #4] @ 4-byte Spill
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: mov r10, r0
+; BE-NEXT: vmov r0, s24
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: mov r5, r0
+; BE-NEXT: vmov r0, s26
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: mov r7, r0
+; BE-NEXT: vmov r0, s20
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: mov r6, r0
+; BE-NEXT: vmov r0, s22
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: mov r4, r0
+; BE-NEXT: vmov r0, s18
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: vmov s0, r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov s0, r4
+; BE-NEXT: mov r11, r1
+; BE-NEXT: vmov.32 d9[0], r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov s0, r6
+; BE-NEXT: mov r8, r1
+; BE-NEXT: vmov.32 d10[0], r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov s0, r7
+; BE-NEXT: mov r6, r1
+; BE-NEXT: vmov.32 d11[0], r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov s0, r5
+; BE-NEXT: mov r7, r1
+; BE-NEXT: vmov.32 d12[0], r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov s0, r10
+; BE-NEXT: mov r5, r1
+; BE-NEXT: vmov.32 d13[0], r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.32 d14[0], r0
+; BE-NEXT: vmov r0, s16
+; BE-NEXT: mov r4, r1
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: vmov s0, r0
+; BE-NEXT: vmov.32 d8[0], r9
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.32 d16[0], r0
+; BE-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
+; BE-NEXT: vmov.32 d13[1], r5
+; BE-NEXT: vmov.32 d8[1], r0
+; BE-NEXT: vmov.32 d11[1], r6
+; BE-NEXT: vmov.32 d9[1], r11
+; BE-NEXT: vmov.32 d14[1], r4
+; BE-NEXT: vmov.32 d12[1], r7
+; BE-NEXT: vmov.32 d10[1], r8
+; BE-NEXT: vmov.32 d16[1], r1
+; BE-NEXT: vrev64.32 d1, d8
+; BE-NEXT: vrev64.32 d3, d13
+; BE-NEXT: vrev64.32 d5, d11
+; BE-NEXT: vrev64.32 d7, d9
+; BE-NEXT: vrev64.32 d0, d14
+; BE-NEXT: vrev64.32 d2, d12
+; BE-NEXT: vrev64.32 d4, d10
+; BE-NEXT: vrev64.32 d6, d16
+; BE-NEXT: add sp, sp, #8
+; BE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14}
+; BE-NEXT: add sp, sp, #4
+; BE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-NEON-LABEL: llrint_v8i64_v8f16:
+; BE-NEON: @ %bb.0:
+; BE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEON-NEXT: .pad #4
+; BE-NEON-NEXT: sub sp, sp, #4
+; BE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14}
+; BE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14}
+; BE-NEON-NEXT: .pad #8
+; BE-NEON-NEXT: sub sp, sp, #8
+; BE-NEON-NEXT: vmov r0, s1
+; BE-NEON-NEXT: vmov.f32 s18, s7
+; BE-NEON-NEXT: vmov.f32 s16, s6
+; BE-NEON-NEXT: vmov.f32 s20, s5
+; BE-NEON-NEXT: vmov.f32 s22, s4
+; BE-NEON-NEXT: vmov.f32 s24, s3
+; BE-NEON-NEXT: vmov.f32 s26, s2
+; BE-NEON-NEXT: vmov.f32 s28, s0
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: vmov s0, r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: mov r9, r0
+; BE-NEON-NEXT: vmov r0, s28
+; BE-NEON-NEXT: str r1, [sp, #4] @ 4-byte Spill
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: mov r10, r0
+; BE-NEON-NEXT: vmov r0, s24
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: mov r5, r0
+; BE-NEON-NEXT: vmov r0, s26
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: mov r7, r0
+; BE-NEON-NEXT: vmov r0, s20
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: mov r6, r0
+; BE-NEON-NEXT: vmov r0, s22
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: mov r4, r0
+; BE-NEON-NEXT: vmov r0, s18
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: vmov s0, r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov s0, r4
+; BE-NEON-NEXT: mov r11, r1
+; BE-NEON-NEXT: vmov.32 d9[0], r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov s0, r6
+; BE-NEON-NEXT: mov r8, r1
+; BE-NEON-NEXT: vmov.32 d10[0], r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov s0, r7
+; BE-NEON-NEXT: mov r6, r1
+; BE-NEON-NEXT: vmov.32 d11[0], r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov s0, r5
+; BE-NEON-NEXT: mov r7, r1
+; BE-NEON-NEXT: vmov.32 d12[0], r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov s0, r10
+; BE-NEON-NEXT: mov r5, r1
+; BE-NEON-NEXT: vmov.32 d13[0], r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.32 d14[0], r0
+; BE-NEON-NEXT: vmov r0, s16
+; BE-NEON-NEXT: mov r4, r1
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: vmov s0, r0
+; BE-NEON-NEXT: vmov.32 d8[0], r9
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.32 d16[0], r0
+; BE-NEON-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
+; BE-NEON-NEXT: vmov.32 d13[1], r5
+; BE-NEON-NEXT: vmov.32 d8[1], r0
+; BE-NEON-NEXT: vmov.32 d11[1], r6
+; BE-NEON-NEXT: vmov.32 d9[1], r11
+; BE-NEON-NEXT: vmov.32 d14[1], r4
+; BE-NEON-NEXT: vmov.32 d12[1], r7
+; BE-NEON-NEXT: vmov.32 d10[1], r8
+; BE-NEON-NEXT: vmov.32 d16[1], r1
+; BE-NEON-NEXT: vrev64.32 d1, d8
+; BE-NEON-NEXT: vrev64.32 d3, d13
+; BE-NEON-NEXT: vrev64.32 d5, d11
+; BE-NEON-NEXT: vrev64.32 d7, d9
+; BE-NEON-NEXT: vrev64.32 d0, d14
+; BE-NEON-NEXT: vrev64.32 d2, d12
+; BE-NEON-NEXT: vrev64.32 d4, d10
+; BE-NEON-NEXT: vrev64.32 d6, d16
+; BE-NEON-NEXT: add sp, sp, #8
+; BE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14}
+; BE-NEON-NEXT: add sp, sp, #4
+; BE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+ %a = call <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half> %x)
+ ret <8 x i64> %a
+}
+declare <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half>)
+
+define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) {
+; LE-LABEL: llrint_v16i64_v16f16:
+; LE: @ %bb.0:
+; LE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEXT: .pad #4
+; LE-NEXT: sub sp, sp, #4
+; LE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT: .pad #120
+; LE-NEXT: sub sp, sp, #120
+; LE-NEXT: mov r11, r0
+; LE-NEXT: vmov r0, s7
+; LE-NEXT: vstr s15, [sp, #24] @ 4-byte Spill
+; LE-NEXT: vmov.f32 s23, s13
+; LE-NEXT: vstr s14, [sp, #100] @ 4-byte Spill
+; LE-NEXT: vmov.f32 s25, s12
+; LE-NEXT: vmov.f32 s27, s11
+; LE-NEXT: vstr s10, [sp, #104] @ 4-byte Spill
+; LE-NEXT: vstr s9, [sp, #108] @ 4-byte Spill
+; LE-NEXT: vmov.f32 s24, s8
+; LE-NEXT: vmov.f32 s19, s6
+; LE-NEXT: vmov.f32 s29, s5
+; LE-NEXT: vmov.f32 s17, s4
+; LE-NEXT: vmov.f32 s16, s3
+; LE-NEXT: vmov.f32 s21, s2
+; LE-NEXT: vmov.f32 s26, s1
+; LE-NEXT: vmov.f32 s18, s0
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: vmov s0, r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: mov r7, r0
+; LE-NEXT: vmov r0, s25
+; LE-NEXT: str r1, [sp, #56] @ 4-byte Spill
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: vmov s0, r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: mov r5, r0
+; LE-NEXT: vmov r0, s27
+; LE-NEXT: str r1, [sp, #116] @ 4-byte Spill
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: vmov s0, r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: mov r6, r0
+; LE-NEXT: vmov r0, s29
+; LE-NEXT: str r1, [sp, #112] @ 4-byte Spill
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: vmov s0, r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.32 d15[0], r0
+; LE-NEXT: vmov r0, s23
+; LE-NEXT: mov r4, r1
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: vmov s0, r0
+; LE-NEXT: add lr, sp, #80
+; LE-NEXT: vmov.32 d17[0], r6
+; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT: bl llrintf
+; LE-NEXT: mov r6, r0
+; LE-NEXT: vmov r0, s17
+; LE-NEXT: vmov r8, s21
+; LE-NEXT: str r1, [sp, #76] @ 4-byte Spill
+; LE-NEXT: vmov r10, s19
+; LE-NEXT: vmov.32 d10[0], r5
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: vmov s0, r0
+; LE-NEXT: add lr, sp, #40
+; LE-NEXT: vmov.32 d11[0], r6
+; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.32 d14[0], r0
+; LE-NEXT: mov r0, r10
+; LE-NEXT: mov r9, r1
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: vmov s0, r0
+; LE-NEXT: vmov.32 d11[0], r7
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.32 d10[0], r0
+; LE-NEXT: mov r0, r8
+; LE-NEXT: mov r7, r1
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: mov r6, r0
+; LE-NEXT: ldr r0, [sp, #56] @ 4-byte Reload
+; LE-NEXT: vmov.32 d11[1], r0
+; LE-NEXT: vmov r0, s18
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: mov r5, r0
+; LE-NEXT: vmov r0, s16
+; LE-NEXT: vmov.32 d10[1], r7
+; LE-NEXT: add lr, sp, #56
+; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: vmov s0, r0
+; LE-NEXT: vmov.32 d15[1], r4
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.32 d9[0], r0
+; LE-NEXT: vmov r0, s26
+; LE-NEXT: add lr, sp, #24
+; LE-NEXT: vmov r8, s24
+; LE-NEXT: vmov.32 d14[1], r9
+; LE-NEXT: mov r10, r1
+; LE-NEXT: vmov s24, r5
+; LE-NEXT: vldr s0, [sp, #24] @ 4-byte Reload
+; LE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEXT: vmov r7, s0
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: vmov.f32 s0, s24
+; LE-NEXT: vmov s22, r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.f32 s0, s22
+; LE-NEXT: mov r5, r1
+; LE-NEXT: vmov.32 d14[0], r0
+; LE-NEXT: vmov s24, r6
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.32 d15[0], r0
+; LE-NEXT: mov r0, r7
+; LE-NEXT: mov r6, r1
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: vmov.f32 s0, s24
+; LE-NEXT: vmov s22, r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.f32 s0, s22
+; LE-NEXT: vmov.32 d8[0], r0
+; LE-NEXT: add lr, sp, #8
+; LE-NEXT: mov r9, r1
+; LE-NEXT: vmov.32 d15[1], r6
+; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.32 d13[0], r0
+; LE-NEXT: mov r0, r8
+; LE-NEXT: mov r6, r1
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: vldr s0, [sp, #100] @ 4-byte Reload
+; LE-NEXT: mov r7, r0
+; LE-NEXT: vmov.32 d14[1], r5
+; LE-NEXT: vmov r0, s0
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: vldr s0, [sp, #104] @ 4-byte Reload
+; LE-NEXT: vmov s20, r0
+; LE-NEXT: vmov.32 d13[1], r6
+; LE-NEXT: vmov r4, s0
+; LE-NEXT: vldr s0, [sp, #108] @ 4-byte Reload
+; LE-NEXT: vmov r0, s0
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: vmov.f32 s0, s20
+; LE-NEXT: vmov s16, r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.f32 s0, s16
+; LE-NEXT: mov r5, r1
+; LE-NEXT: vmov.32 d12[0], r0
+; LE-NEXT: vmov s18, r7
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.32 d11[0], r0
+; LE-NEXT: mov r0, r4
+; LE-NEXT: mov r6, r1
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: vmov.f32 s0, s18
+; LE-NEXT: vmov s16, r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.f32 s0, s16
+; LE-NEXT: vmov.32 d10[0], r0
+; LE-NEXT: mov r4, r1
+; LE-NEXT: vmov.32 d11[1], r6
+; LE-NEXT: bl llrintf
+; LE-NEXT: add lr, sp, #80
+; LE-NEXT: vmov.32 d10[1], r4
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #40
+; LE-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #8
+; LE-NEXT: vmov.32 d16[0], r0
+; LE-NEXT: ldr r0, [sp, #76] @ 4-byte Reload
+; LE-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #24
+; LE-NEXT: vmov.32 d19[1], r0
+; LE-NEXT: ldr r0, [sp, #116] @ 4-byte Reload
+; LE-NEXT: vmov.32 d21[1], r10
+; LE-NEXT: vmov.32 d18[1], r0
+; LE-NEXT: ldr r0, [sp, #112] @ 4-byte Reload
+; LE-NEXT: vmov.32 d12[1], r5
+; LE-NEXT: vmov.32 d17[1], r0
+; LE-NEXT: add r0, r11, #64
+; LE-NEXT: vmov.32 d16[1], r1
+; LE-NEXT: vst1.64 {d10, d11}, [r0:128]!
+; LE-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; LE-NEXT: vst1.64 {d18, d19}, [r0:128]!
+; LE-NEXT: vmov.32 d20[1], r9
+; LE-NEXT: vst1.64 {d12, d13}, [r0:128]
+; LE-NEXT: vst1.64 {d14, d15}, [r11:128]!
+; LE-NEXT: vst1.64 {d20, d21}, [r11:128]!
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #56
+; LE-NEXT: vst1.64 {d16, d17}, [r11:128]!
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: vst1.64 {d16, d17}, [r11:128]
+; LE-NEXT: add sp, sp, #120
+; LE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT: add sp, sp, #4
+; LE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; LE-NEON-LABEL: llrint_v16i64_v16f16:
+; LE-NEON: @ %bb.0:
+; LE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEON-NEXT: .pad #4
+; LE-NEON-NEXT: sub sp, sp, #4
+; LE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT: .pad #120
+; LE-NEON-NEXT: sub sp, sp, #120
+; LE-NEON-NEXT: mov r11, r0
+; LE-NEON-NEXT: vmov r0, s7
+; LE-NEON-NEXT: vstr s15, [sp, #24] @ 4-byte Spill
+; LE-NEON-NEXT: vmov.f32 s23, s13
+; LE-NEON-NEXT: vstr s14, [sp, #100] @ 4-byte Spill
+; LE-NEON-NEXT: vmov.f32 s25, s12
+; LE-NEON-NEXT: vmov.f32 s27, s11
+; LE-NEON-NEXT: vstr s10, [sp, #104] @ 4-byte Spill
+; LE-NEON-NEXT: vstr s9, [sp, #108] @ 4-byte Spill
+; LE-NEON-NEXT: vmov.f32 s24, s8
+; LE-NEON-NEXT: vmov.f32 s19, s6
+; LE-NEON-NEXT: vmov.f32 s29, s5
+; LE-NEON-NEXT: vmov.f32 s17, s4
+; LE-NEON-NEXT: vmov.f32 s16, s3
+; LE-NEON-NEXT: vmov.f32 s21, s2
+; LE-NEON-NEXT: vmov.f32 s26, s1
+; LE-NEON-NEXT: vmov.f32 s18, s0
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: vmov s0, r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: mov r7, r0
+; LE-NEON-NEXT: vmov r0, s25
+; LE-NEON-NEXT: str r1, [sp, #56] @ 4-byte Spill
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: vmov s0, r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: mov r5, r0
+; LE-NEON-NEXT: vmov r0, s27
+; LE-NEON-NEXT: str r1, [sp, #116] @ 4-byte Spill
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: vmov s0, r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: mov r6, r0
+; LE-NEON-NEXT: vmov r0, s29
+; LE-NEON-NEXT: str r1, [sp, #112] @ 4-byte Spill
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: vmov s0, r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.32 d15[0], r0
+; LE-NEON-NEXT: vmov r0, s23
+; LE-NEON-NEXT: mov r4, r1
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: vmov s0, r0
+; LE-NEON-NEXT: add lr, sp, #80
+; LE-NEON-NEXT: vmov.32 d17[0], r6
+; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: mov r6, r0
+; LE-NEON-NEXT: vmov r0, s17
+; LE-NEON-NEXT: vmov r8, s21
+; LE-NEON-NEXT: str r1, [sp, #76] @ 4-byte Spill
+; LE-NEON-NEXT: vmov r10, s19
+; LE-NEON-NEXT: vmov.32 d10[0], r5
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: vmov s0, r0
+; LE-NEON-NEXT: add lr, sp, #40
+; LE-NEON-NEXT: vmov.32 d11[0], r6
+; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.32 d14[0], r0
+; LE-NEON-NEXT: mov r0, r10
+; LE-NEON-NEXT: mov r9, r1
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: vmov s0, r0
+; LE-NEON-NEXT: vmov.32 d11[0], r7
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.32 d10[0], r0
+; LE-NEON-NEXT: mov r0, r8
+; LE-NEON-NEXT: mov r7, r1
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: mov r6, r0
+; LE-NEON-NEXT: ldr r0, [sp, #56] @ 4-byte Reload
+; LE-NEON-NEXT: vmov.32 d11[1], r0
+; LE-NEON-NEXT: vmov r0, s18
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: mov r5, r0
+; LE-NEON-NEXT: vmov r0, s16
+; LE-NEON-NEXT: vmov.32 d10[1], r7
+; LE-NEON-NEXT: add lr, sp, #56
+; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: vmov s0, r0
+; LE-NEON-NEXT: vmov.32 d15[1], r4
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.32 d9[0], r0
+; LE-NEON-NEXT: vmov r0, s26
+; LE-NEON-NEXT: add lr, sp, #24
+; LE-NEON-NEXT: vmov r8, s24
+; LE-NEON-NEXT: vmov.32 d14[1], r9
+; LE-NEON-NEXT: mov r10, r1
+; LE-NEON-NEXT: vmov s24, r5
+; LE-NEON-NEXT: vldr s0, [sp, #24] @ 4-byte Reload
+; LE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEON-NEXT: vmov r7, s0
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: vmov.f32 s0, s24
+; LE-NEON-NEXT: vmov s22, r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.f32 s0, s22
+; LE-NEON-NEXT: mov r5, r1
+; LE-NEON-NEXT: vmov.32 d14[0], r0
+; LE-NEON-NEXT: vmov s24, r6
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.32 d15[0], r0
+; LE-NEON-NEXT: mov r0, r7
+; LE-NEON-NEXT: mov r6, r1
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: vmov.f32 s0, s24
+; LE-NEON-NEXT: vmov s22, r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.f32 s0, s22
+; LE-NEON-NEXT: vmov.32 d8[0], r0
+; LE-NEON-NEXT: add lr, sp, #8
+; LE-NEON-NEXT: mov r9, r1
+; LE-NEON-NEXT: vmov.32 d15[1], r6
+; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.32 d13[0], r0
+; LE-NEON-NEXT: mov r0, r8
+; LE-NEON-NEXT: mov r6, r1
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: vldr s0, [sp, #100] @ 4-byte Reload
+; LE-NEON-NEXT: mov r7, r0
+; LE-NEON-NEXT: vmov.32 d14[1], r5
+; LE-NEON-NEXT: vmov r0, s0
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: vldr s0, [sp, #104] @ 4-byte Reload
+; LE-NEON-NEXT: vmov s20, r0
+; LE-NEON-NEXT: vmov.32 d13[1], r6
+; LE-NEON-NEXT: vmov r4, s0
+; LE-NEON-NEXT: vldr s0, [sp, #108] @ 4-byte Reload
+; LE-NEON-NEXT: vmov r0, s0
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: vmov.f32 s0, s20
+; LE-NEON-NEXT: vmov s16, r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.f32 s0, s16
+; LE-NEON-NEXT: mov r5, r1
+; LE-NEON-NEXT: vmov.32 d12[0], r0
+; LE-NEON-NEXT: vmov s18, r7
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.32 d11[0], r0
+; LE-NEON-NEXT: mov r0, r4
+; LE-NEON-NEXT: mov r6, r1
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: vmov.f32 s0, s18
+; LE-NEON-NEXT: vmov s16, r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.f32 s0, s16
+; LE-NEON-NEXT: vmov.32 d10[0], r0
+; LE-NEON-NEXT: mov r4, r1
+; LE-NEON-NEXT: vmov.32 d11[1], r6
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: add lr, sp, #80
+; LE-NEON-NEXT: vmov.32 d10[1], r4
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #40
+; LE-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #8
+; LE-NEON-NEXT: vmov.32 d16[0], r0
+; LE-NEON-NEXT: ldr r0, [sp, #76] @ 4-byte Reload
+; LE-NEON-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #24
+; LE-NEON-NEXT: vmov.32 d19[1], r0
+; LE-NEON-NEXT: ldr r0, [sp, #116] @ 4-byte Reload
+; LE-NEON-NEXT: vmov.32 d21[1], r10
+; LE-NEON-NEXT: vmov.32 d18[1], r0
+; LE-NEON-NEXT: ldr r0, [sp, #112] @ 4-byte Reload
+; LE-NEON-NEXT: vmov.32 d12[1], r5
+; LE-NEON-NEXT: vmov.32 d17[1], r0
+; LE-NEON-NEXT: add r0, r11, #64
+; LE-NEON-NEXT: vmov.32 d16[1], r1
+; LE-NEON-NEXT: vst1.64 {d10, d11}, [r0:128]!
+; LE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; LE-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]!
+; LE-NEON-NEXT: vmov.32 d20[1], r9
+; LE-NEON-NEXT: vst1.64 {d12, d13}, [r0:128]
+; LE-NEON-NEXT: vst1.64 {d14, d15}, [r11:128]!
+; LE-NEON-NEXT: vst1.64 {d20, d21}, [r11:128]!
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #56
+; LE-NEON-NEXT: vst1.64 {d16, d17}, [r11:128]!
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: vst1.64 {d16, d17}, [r11:128]
+; LE-NEON-NEXT: add sp, sp, #120
+; LE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT: add sp, sp, #4
+; LE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-LABEL: llrint_v16i64_v16f16:
+; BE: @ %bb.0:
+; BE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEXT: .pad #4
+; BE-NEXT: sub sp, sp, #4
+; BE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT: .pad #112
+; BE-NEXT: sub sp, sp, #112
+; BE-NEXT: mov r11, r0
+; BE-NEXT: vmov r0, s14
+; BE-NEXT: vmov.f32 s17, s15
+; BE-NEXT: vstr s13, [sp, #52] @ 4-byte Spill
+; BE-NEXT: vmov.f32 s21, s12
+; BE-NEXT: vstr s10, [sp, #68] @ 4-byte Spill
+; BE-NEXT: vmov.f32 s23, s11
+; BE-NEXT: vstr s7, [sp, #72] @ 4-byte Spill
+; BE-NEXT: vmov.f32 s19, s9
+; BE-NEXT: vstr s4, [sp, #28] @ 4-byte Spill
+; BE-NEXT: vmov.f32 s26, s8
+; BE-NEXT: vmov.f32 s24, s6
+; BE-NEXT: vmov.f32 s18, s5
+; BE-NEXT: vmov.f32 s25, s3
+; BE-NEXT: vmov.f32 s16, s2
+; BE-NEXT: vmov.f32 s27, s1
+; BE-NEXT: vmov.f32 s29, s0
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: vmov s0, r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: mov r8, r0
+; BE-NEXT: vmov r0, s29
+; BE-NEXT: mov r4, r1
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: mov r9, r0
+; BE-NEXT: vmov r0, s27
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: mov r7, r0
+; BE-NEXT: vmov r0, s21
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: mov r6, r0
+; BE-NEXT: vmov r0, s25
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: mov r5, r0
+; BE-NEXT: vmov r0, s23
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: vmov s0, r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.32 d16[0], r0
+; BE-NEXT: vmov s0, r5
+; BE-NEXT: str r1, [sp, #108] @ 4-byte Spill
+; BE-NEXT: vstr d16, [sp, #96] @ 8-byte Spill
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.32 d16[0], r0
+; BE-NEXT: vmov s0, r6
+; BE-NEXT: str r1, [sp, #92] @ 4-byte Spill
+; BE-NEXT: vstr d16, [sp, #80] @ 8-byte Spill
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.32 d16[0], r0
+; BE-NEXT: vmov s0, r7
+; BE-NEXT: str r1, [sp, #76] @ 4-byte Spill
+; BE-NEXT: vstr d16, [sp, #56] @ 8-byte Spill
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov s0, r9
+; BE-NEXT: mov r10, r1
+; BE-NEXT: vmov.32 d14[0], r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.32 d15[0], r0
+; BE-NEXT: vmov r0, s17
+; BE-NEXT: mov r5, r1
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: vmov s0, r0
+; BE-NEXT: vmov.32 d10[0], r8
+; BE-NEXT: vmov r6, s19
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.32 d11[0], r0
+; BE-NEXT: mov r0, r6
+; BE-NEXT: mov r7, r1
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: mov r6, r0
+; BE-NEXT: vmov r0, s18
+; BE-NEXT: vmov.32 d10[1], r4
+; BE-NEXT: vstr d10, [sp, #40] @ 8-byte Spill
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: mov r4, r0
+; BE-NEXT: vmov r0, s16
+; BE-NEXT: vmov.32 d11[1], r7
+; BE-NEXT: vstr d11, [sp, #32] @ 8-byte Spill
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: vmov.32 d15[1], r5
+; BE-NEXT: vmov s0, r0
+; BE-NEXT: vstr d15, [sp, #16] @ 8-byte Spill
+; BE-NEXT: bl llrintf
+; BE-NEXT: vldr s0, [sp, #28] @ 4-byte Reload
+; BE-NEXT: vmov r5, s26
+; BE-NEXT: vmov.32 d16[0], r0
+; BE-NEXT: vmov s26, r4
+; BE-NEXT: vmov r0, s0
+; BE-NEXT: mov r8, r1
+; BE-NEXT: vmov.32 d14[1], r10
+; BE-NEXT: vmov r4, s24
+; BE-NEXT: vstr d16, [sp] @ 8-byte Spill
+; BE-NEXT: vstr d14, [sp, #8] @ 8-byte Spill
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: vmov.f32 s0, s26
+; BE-NEXT: vmov s22, r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.f32 s0, s22
+; BE-NEXT: mov r7, r1
+; BE-NEXT: vmov.32 d13[0], r0
+; BE-NEXT: vmov s24, r6
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.32 d14[0], r0
+; BE-NEXT: mov r0, r4
+; BE-NEXT: mov r6, r1
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: vmov.f32 s0, s24
+; BE-NEXT: vmov s22, r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.f32 s0, s22
+; BE-NEXT: mov r9, r1
+; BE-NEXT: vmov.32 d12[0], r0
+; BE-NEXT: vmov.32 d14[1], r6
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.32 d11[0], r0
+; BE-NEXT: mov r0, r5
+; BE-NEXT: mov r6, r1
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: vldr s0, [sp, #52] @ 4-byte Reload
+; BE-NEXT: mov r4, r0
+; BE-NEXT: vmov.32 d13[1], r7
+; BE-NEXT: vmov r0, s0
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: vldr s0, [sp, #68] @ 4-byte Reload
+; BE-NEXT: vmov s20, r0
+; BE-NEXT: vmov.32 d11[1], r6
+; BE-NEXT: vmov r7, s0
+; BE-NEXT: vldr s0, [sp, #72] @ 4-byte Reload
+; BE-NEXT: vmov r0, s0
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: vmov.f32 s0, s20
+; BE-NEXT: vmov s16, r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.f32 s0, s16
+; BE-NEXT: mov r5, r1
+; BE-NEXT: vmov.32 d10[0], r0
+; BE-NEXT: vmov s18, r4
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.32 d15[0], r0
+; BE-NEXT: mov r0, r7
+; BE-NEXT: mov r4, r1
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: vmov.f32 s0, s18
+; BE-NEXT: vmov s16, r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.f32 s0, s16
+; BE-NEXT: mov r6, r1
+; BE-NEXT: vmov.32 d9[0], r0
+; BE-NEXT: vmov.32 d15[1], r4
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.32 d24[0], r0
+; BE-NEXT: ldr r0, [sp, #76] @ 4-byte Reload
+; BE-NEXT: vldr d23, [sp, #56] @ 8-byte Reload
+; BE-NEXT: vldr d20, [sp, #8] @ 8-byte Reload
+; BE-NEXT: vmov.32 d23[1], r0
+; BE-NEXT: ldr r0, [sp, #92] @ 4-byte Reload
+; BE-NEXT: vldr d22, [sp, #80] @ 8-byte Reload
+; BE-NEXT: vldr d26, [sp, #16] @ 8-byte Reload
+; BE-NEXT: vrev64.32 d21, d20
+; BE-NEXT: vmov.32 d22[1], r0
+; BE-NEXT: ldr r0, [sp, #108] @ 4-byte Reload
+; BE-NEXT: vldr d30, [sp] @ 8-byte Reload
+; BE-NEXT: vldr d25, [sp, #96] @ 8-byte Reload
+; BE-NEXT: vrev64.32 d20, d26
+; BE-NEXT: vldr d26, [sp, #32] @ 8-byte Reload
+; BE-NEXT: vmov.32 d10[1], r5
+; BE-NEXT: vmov.32 d12[1], r9
+; BE-NEXT: vldr d28, [sp, #40] @ 8-byte Reload
+; BE-NEXT: vrev64.32 d27, d26
+; BE-NEXT: vmov.32 d25[1], r0
+; BE-NEXT: add r0, r11, #64
+; BE-NEXT: vmov.32 d30[1], r8
+; BE-NEXT: vmov.32 d9[1], r6
+; BE-NEXT: vrev64.32 d26, d28
+; BE-NEXT: vrev64.32 d29, d10
+; BE-NEXT: vmov.32 d24[1], r1
+; BE-NEXT: vrev64.32 d1, d12
+; BE-NEXT: vrev64.32 d28, d23
+; BE-NEXT: vrev64.32 d23, d22
+; BE-NEXT: vrev64.32 d22, d30
+; BE-NEXT: vrev64.32 d31, d25
+; BE-NEXT: vrev64.32 d0, d9
+; BE-NEXT: vrev64.32 d30, d24
+; BE-NEXT: vst1.64 {d0, d1}, [r0:128]!
+; BE-NEXT: vst1.64 {d30, d31}, [r0:128]!
+; BE-NEXT: vst1.64 {d28, d29}, [r0:128]!
+; BE-NEXT: vrev64.32 d19, d13
+; BE-NEXT: vst1.64 {d26, d27}, [r0:128]
+; BE-NEXT: vst1.64 {d20, d21}, [r11:128]!
+; BE-NEXT: vrev64.32 d18, d14
+; BE-NEXT: vst1.64 {d22, d23}, [r11:128]!
+; BE-NEXT: vrev64.32 d17, d15
+; BE-NEXT: vrev64.32 d16, d11
+; BE-NEXT: vst1.64 {d18, d19}, [r11:128]!
+; BE-NEXT: vst1.64 {d16, d17}, [r11:128]
+; BE-NEXT: add sp, sp, #112
+; BE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT: add sp, sp, #4
+; BE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-NEON-LABEL: llrint_v16i64_v16f16:
+; BE-NEON: @ %bb.0:
+; BE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEON-NEXT: .pad #4
+; BE-NEON-NEXT: sub sp, sp, #4
+; BE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT: .pad #112
+; BE-NEON-NEXT: sub sp, sp, #112
+; BE-NEON-NEXT: mov r11, r0
+; BE-NEON-NEXT: vmov r0, s14
+; BE-NEON-NEXT: vmov.f32 s17, s15
+; BE-NEON-NEXT: vstr s13, [sp, #52] @ 4-byte Spill
+; BE-NEON-NEXT: vmov.f32 s21, s12
+; BE-NEON-NEXT: vstr s10, [sp, #68] @ 4-byte Spill
+; BE-NEON-NEXT: vmov.f32 s23, s11
+; BE-NEON-NEXT: vstr s7, [sp, #72] @ 4-byte Spill
+; BE-NEON-NEXT: vmov.f32 s19, s9
+; BE-NEON-NEXT: vstr s4, [sp, #28] @ 4-byte Spill
+; BE-NEON-NEXT: vmov.f32 s26, s8
+; BE-NEON-NEXT: vmov.f32 s24, s6
+; BE-NEON-NEXT: vmov.f32 s18, s5
+; BE-NEON-NEXT: vmov.f32 s25, s3
+; BE-NEON-NEXT: vmov.f32 s16, s2
+; BE-NEON-NEXT: vmov.f32 s27, s1
+; BE-NEON-NEXT: vmov.f32 s29, s0
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: vmov s0, r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: mov r8, r0
+; BE-NEON-NEXT: vmov r0, s29
+; BE-NEON-NEXT: mov r4, r1
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: mov r9, r0
+; BE-NEON-NEXT: vmov r0, s27
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: mov r7, r0
+; BE-NEON-NEXT: vmov r0, s21
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: mov r6, r0
+; BE-NEON-NEXT: vmov r0, s25
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: mov r5, r0
+; BE-NEON-NEXT: vmov r0, s23
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: vmov s0, r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.32 d16[0], r0
+; BE-NEON-NEXT: vmov s0, r5
+; BE-NEON-NEXT: str r1, [sp, #108] @ 4-byte Spill
+; BE-NEON-NEXT: vstr d16, [sp, #96] @ 8-byte Spill
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.32 d16[0], r0
+; BE-NEON-NEXT: vmov s0, r6
+; BE-NEON-NEXT: str r1, [sp, #92] @ 4-byte Spill
+; BE-NEON-NEXT: vstr d16, [sp, #80] @ 8-byte Spill
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.32 d16[0], r0
+; BE-NEON-NEXT: vmov s0, r7
+; BE-NEON-NEXT: str r1, [sp, #76] @ 4-byte Spill
+; BE-NEON-NEXT: vstr d16, [sp, #56] @ 8-byte Spill
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov s0, r9
+; BE-NEON-NEXT: mov r10, r1
+; BE-NEON-NEXT: vmov.32 d14[0], r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.32 d15[0], r0
+; BE-NEON-NEXT: vmov r0, s17
+; BE-NEON-NEXT: mov r5, r1
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: vmov s0, r0
+; BE-NEON-NEXT: vmov.32 d10[0], r8
+; BE-NEON-NEXT: vmov r6, s19
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.32 d11[0], r0
+; BE-NEON-NEXT: mov r0, r6
+; BE-NEON-NEXT: mov r7, r1
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: mov r6, r0
+; BE-NEON-NEXT: vmov r0, s18
+; BE-NEON-NEXT: vmov.32 d10[1], r4
+; BE-NEON-NEXT: vstr d10, [sp, #40] @ 8-byte Spill
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: mov r4, r0
+; BE-NEON-NEXT: vmov r0, s16
+; BE-NEON-NEXT: vmov.32 d11[1], r7
+; BE-NEON-NEXT: vstr d11, [sp, #32] @ 8-byte Spill
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: vmov.32 d15[1], r5
+; BE-NEON-NEXT: vmov s0, r0
+; BE-NEON-NEXT: vstr d15, [sp, #16] @ 8-byte Spill
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vldr s0, [sp, #28] @ 4-byte Reload
+; BE-NEON-NEXT: vmov r5, s26
+; BE-NEON-NEXT: vmov.32 d16[0], r0
+; BE-NEON-NEXT: vmov s26, r4
+; BE-NEON-NEXT: vmov r0, s0
+; BE-NEON-NEXT: mov r8, r1
+; BE-NEON-NEXT: vmov.32 d14[1], r10
+; BE-NEON-NEXT: vmov r4, s24
+; BE-NEON-NEXT: vstr d16, [sp] @ 8-byte Spill
+; BE-NEON-NEXT: vstr d14, [sp, #8] @ 8-byte Spill
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: vmov.f32 s0, s26
+; BE-NEON-NEXT: vmov s22, r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.f32 s0, s22
+; BE-NEON-NEXT: mov r7, r1
+; BE-NEON-NEXT: vmov.32 d13[0], r0
+; BE-NEON-NEXT: vmov s24, r6
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.32 d14[0], r0
+; BE-NEON-NEXT: mov r0, r4
+; BE-NEON-NEXT: mov r6, r1
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: vmov.f32 s0, s24
+; BE-NEON-NEXT: vmov s22, r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.f32 s0, s22
+; BE-NEON-NEXT: mov r9, r1
+; BE-NEON-NEXT: vmov.32 d12[0], r0
+; BE-NEON-NEXT: vmov.32 d14[1], r6
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.32 d11[0], r0
+; BE-NEON-NEXT: mov r0, r5
+; BE-NEON-NEXT: mov r6, r1
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: vldr s0, [sp, #52] @ 4-byte Reload
+; BE-NEON-NEXT: mov r4, r0
+; BE-NEON-NEXT: vmov.32 d13[1], r7
+; BE-NEON-NEXT: vmov r0, s0
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: vldr s0, [sp, #68] @ 4-byte Reload
+; BE-NEON-NEXT: vmov s20, r0
+; BE-NEON-NEXT: vmov.32 d11[1], r6
+; BE-NEON-NEXT: vmov r7, s0
+; BE-NEON-NEXT: vldr s0, [sp, #72] @ 4-byte Reload
+; BE-NEON-NEXT: vmov r0, s0
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: vmov.f32 s0, s20
+; BE-NEON-NEXT: vmov s16, r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.f32 s0, s16
+; BE-NEON-NEXT: mov r5, r1
+; BE-NEON-NEXT: vmov.32 d10[0], r0
+; BE-NEON-NEXT: vmov s18, r4
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.32 d15[0], r0
+; BE-NEON-NEXT: mov r0, r7
+; BE-NEON-NEXT: mov r4, r1
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: vmov.f32 s0, s18
+; BE-NEON-NEXT: vmov s16, r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.f32 s0, s16
+; BE-NEON-NEXT: mov r6, r1
+; BE-NEON-NEXT: vmov.32 d9[0], r0
+; BE-NEON-NEXT: vmov.32 d15[1], r4
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.32 d24[0], r0
+; BE-NEON-NEXT: ldr r0, [sp, #76] @ 4-byte Reload
+; BE-NEON-NEXT: vldr d23, [sp, #56] @ 8-byte Reload
+; BE-NEON-NEXT: vldr d20, [sp, #8] @ 8-byte Reload
+; BE-NEON-NEXT: vmov.32 d23[1], r0
+; BE-NEON-NEXT: ldr r0, [sp, #92] @ 4-byte Reload
+; BE-NEON-NEXT: vldr d22, [sp, #80] @ 8-byte Reload
+; BE-NEON-NEXT: vldr d26, [sp, #16] @ 8-byte Reload
+; BE-NEON-NEXT: vrev64.32 d21, d20
+; BE-NEON-NEXT: vmov.32 d22[1], r0
+; BE-NEON-NEXT: ldr r0, [sp, #108] @ 4-byte Reload
+; BE-NEON-NEXT: vldr d30, [sp] @ 8-byte Reload
+; BE-NEON-NEXT: vldr d25, [sp, #96] @ 8-byte Reload
+; BE-NEON-NEXT: vrev64.32 d20, d26
+; BE-NEON-NEXT: vldr d26, [sp, #32] @ 8-byte Reload
+; BE-NEON-NEXT: vmov.32 d10[1], r5
+; BE-NEON-NEXT: vmov.32 d12[1], r9
+; BE-NEON-NEXT: vldr d28, [sp, #40] @ 8-byte Reload
+; BE-NEON-NEXT: vrev64.32 d27, d26
+; BE-NEON-NEXT: vmov.32 d25[1], r0
+; BE-NEON-NEXT: add r0, r11, #64
+; BE-NEON-NEXT: vmov.32 d30[1], r8
+; BE-NEON-NEXT: vmov.32 d9[1], r6
+; BE-NEON-NEXT: vrev64.32 d26, d28
+; BE-NEON-NEXT: vrev64.32 d29, d10
+; BE-NEON-NEXT: vmov.32 d24[1], r1
+; BE-NEON-NEXT: vrev64.32 d1, d12
+; BE-NEON-NEXT: vrev64.32 d28, d23
+; BE-NEON-NEXT: vrev64.32 d23, d22
+; BE-NEON-NEXT: vrev64.32 d22, d30
+; BE-NEON-NEXT: vrev64.32 d31, d25
+; BE-NEON-NEXT: vrev64.32 d0, d9
+; BE-NEON-NEXT: vrev64.32 d30, d24
+; BE-NEON-NEXT: vst1.64 {d0, d1}, [r0:128]!
+; BE-NEON-NEXT: vst1.64 {d30, d31}, [r0:128]!
+; BE-NEON-NEXT: vst1.64 {d28, d29}, [r0:128]!
+; BE-NEON-NEXT: vrev64.32 d19, d13
+; BE-NEON-NEXT: vst1.64 {d26, d27}, [r0:128]
+; BE-NEON-NEXT: vst1.64 {d20, d21}, [r11:128]!
+; BE-NEON-NEXT: vrev64.32 d18, d14
+; BE-NEON-NEXT: vst1.64 {d22, d23}, [r11:128]!
+; BE-NEON-NEXT: vrev64.32 d17, d15
+; BE-NEON-NEXT: vrev64.32 d16, d11
+; BE-NEON-NEXT: vst1.64 {d18, d19}, [r11:128]!
+; BE-NEON-NEXT: vst1.64 {d16, d17}, [r11:128]
+; BE-NEON-NEXT: add sp, sp, #112
+; BE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT: add sp, sp, #4
+; BE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+ %a = call <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half> %x)
+ ret <16 x i64> %a
+}
+declare <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half>)
+
+define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
+; LE-LABEL: llrint_v32i64_v32f16:
+; LE: @ %bb.0:
+; LE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEXT: .pad #4
+; LE-NEXT: sub sp, sp, #4
+; LE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT: .pad #248
+; LE-NEXT: sub sp, sp, #248
+; LE-NEXT: add lr, sp, #256
+; LE-NEXT: mov r11, r0
+; LE-NEXT: vstr s15, [sp, #176] @ 4-byte Spill
+; LE-NEXT: vmov.f32 s19, s14
+; LE-NEXT: ldrh r0, [lr, #132]
+; LE-NEXT: vmov.f32 s17, s11
+; LE-NEXT: vstr s13, [sp, #196] @ 4-byte Spill
+; LE-NEXT: vstr s12, [sp, #112] @ 4-byte Spill
+; LE-NEXT: vstr s10, [sp, #136] @ 4-byte Spill
+; LE-NEXT: vstr s9, [sp, #160] @ 4-byte Spill
+; LE-NEXT: vstr s8, [sp, #200] @ 4-byte Spill
+; LE-NEXT: vstr s7, [sp, #100] @ 4-byte Spill
+; LE-NEXT: vstr s6, [sp, #116] @ 4-byte Spill
+; LE-NEXT: vstr s5, [sp, #76] @ 4-byte Spill
+; LE-NEXT: vstr s4, [sp, #120] @ 4-byte Spill
+; LE-NEXT: vstr s3, [sp, #156] @ 4-byte Spill
+; LE-NEXT: vstr s2, [sp, #192] @ 4-byte Spill
+; LE-NEXT: vstr s1, [sp, #104] @ 4-byte Spill
+; LE-NEXT: vstr s0, [sp, #108] @ 4-byte Spill
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: vmov s0, r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: add lr, sp, #256
+; LE-NEXT: str r0, [sp, #52] @ 4-byte Spill
+; LE-NEXT: str r1, [sp, #56] @ 4-byte Spill
+; LE-NEXT: ldrh r0, [lr, #108]
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: add lr, sp, #256
+; LE-NEXT: str r0, [sp, #32] @ 4-byte Spill
+; LE-NEXT: ldrh r0, [lr, #96]
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: add lr, sp, #256
+; LE-NEXT: mov r5, r0
+; LE-NEXT: ldrh r0, [lr, #100]
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: add lr, sp, #256
+; LE-NEXT: mov r7, r0
+; LE-NEXT: ldrh r0, [lr, #156]
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: add lr, sp, #256
+; LE-NEXT: mov r6, r0
+; LE-NEXT: ldrh r0, [lr, #152]
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: add lr, sp, #256
+; LE-NEXT: mov r4, r0
+; LE-NEXT: ldrh r0, [lr, #148]
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: vmov s0, r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov s0, r4
+; LE-NEXT: mov r8, r1
+; LE-NEXT: vmov.32 d13[0], r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov s0, r6
+; LE-NEXT: mov r4, r1
+; LE-NEXT: vmov.32 d14[0], r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov s0, r7
+; LE-NEXT: mov r6, r1
+; LE-NEXT: vmov.32 d15[0], r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov s0, r5
+; LE-NEXT: mov r7, r1
+; LE-NEXT: vmov.32 d11[0], r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: add lr, sp, #256
+; LE-NEXT: vmov.32 d10[0], r0
+; LE-NEXT: mov r5, r1
+; LE-NEXT: ldrh r0, [lr, #144]
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: add lr, sp, #256
+; LE-NEXT: mov r10, r0
+; LE-NEXT: vmov.32 d11[1], r7
+; LE-NEXT: ldrh r0, [lr, #104]
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: vmov.32 d10[1], r5
+; LE-NEXT: add lr, sp, #80
+; LE-NEXT: mov r7, r0
+; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #256
+; LE-NEXT: ldrh r0, [lr, #124]
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: add lr, sp, #256
+; LE-NEXT: mov r5, r0
+; LE-NEXT: vmov.32 d15[1], r6
+; LE-NEXT: ldrh r0, [lr, #120]
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: vmov.32 d14[1], r4
+; LE-NEXT: add lr, sp, #16
+; LE-NEXT: mov r6, r0
+; LE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #256
+; LE-NEXT: ldrh r0, [lr, #116]
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: add lr, sp, #256
+; LE-NEXT: vorr q5, q6, q6
+; LE-NEXT: mov r4, r0
+; LE-NEXT: ldrh r0, [lr, #112]
+; LE-NEXT: vmov.32 d11[1], r8
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: vmov s0, r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov s0, r4
+; LE-NEXT: str r1, [sp, #12] @ 4-byte Spill
+; LE-NEXT: vmov.32 d12[0], r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov s0, r6
+; LE-NEXT: add lr, sp, #216
+; LE-NEXT: vmov.32 d13[0], r0
+; LE-NEXT: str r1, [sp, #8] @ 4-byte Spill
+; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov s0, r5
+; LE-NEXT: mov r4, r1
+; LE-NEXT: vmov.32 d12[0], r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov s0, r7
+; LE-NEXT: add lr, sp, #232
+; LE-NEXT: vmov.32 d13[0], r0
+; LE-NEXT: mov r8, r1
+; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov s0, r10
+; LE-NEXT: mov r9, r1
+; LE-NEXT: vmov.32 d12[0], r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: add lr, sp, #256
+; LE-NEXT: vmov.32 d10[0], r0
+; LE-NEXT: mov r5, r1
+; LE-NEXT: ldrh r0, [lr, #140]
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: vmov s0, r0
+; LE-NEXT: ldr r0, [sp, #32] @ 4-byte Reload
+; LE-NEXT: vmov.32 d10[1], r5
+; LE-NEXT: add lr, sp, #32
+; LE-NEXT: vmov s16, r0
+; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEXT: bl llrintf
+; LE-NEXT: add lr, sp, #256
+; LE-NEXT: mov r6, r1
+; LE-NEXT: vmov.32 d15[0], r0
+; LE-NEXT: ldrh r1, [lr, #128]
+; LE-NEXT: mov r0, r1
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: vmov.f32 s0, s16
+; LE-NEXT: vmov s18, r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: add lr, sp, #256
+; LE-NEXT: mov r5, r1
+; LE-NEXT: vmov.32 d13[0], r0
+; LE-NEXT: ldr r1, [sp, #52] @ 4-byte Reload
+; LE-NEXT: ldrh r0, [lr, #136]
+; LE-NEXT: vmov.32 d15[1], r6
+; LE-NEXT: vmov.32 d11[0], r1
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: vmov.f32 s0, s18
+; LE-NEXT: vmov s16, r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.f32 s0, s16
+; LE-NEXT: vmov.32 d10[0], r0
+; LE-NEXT: ldr r0, [sp, #56] @ 4-byte Reload
+; LE-NEXT: mov r6, r1
+; LE-NEXT: vmov.32 d11[1], r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.32 d13[1], r5
+; LE-NEXT: add lr, sp, #56
+; LE-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
+; LE-NEXT: vmov.32 d12[1], r9
+; LE-NEXT: vmov.32 d14[0], r0
+; LE-NEXT: vmov r0, s19
+; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #232
+; LE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-NEXT: vmov.32 d13[1], r8
+; LE-NEXT: vmov.32 d12[1], r4
+; LE-NEXT: vmov.32 d10[1], r6
+; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #216
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: vmov.32 d17[1], r2
+; LE-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
+; LE-NEXT: vmov.32 d14[1], r1
+; LE-NEXT: add r1, r11, #192
+; LE-NEXT: vmov.32 d16[1], r2
+; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #32
+; LE-NEXT: vst1.64 {d10, d11}, [r1:128]!
+; LE-NEXT: vst1.64 {d14, d15}, [r1:128]!
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #16
+; LE-NEXT: vst1.64 {d16, d17}, [r1:128]!
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: vst1.64 {d16, d17}, [r1:128]
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: str r0, [sp, #52] @ 4-byte Spill
+; LE-NEXT: vmov r0, s17
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: vldr s0, [sp, #76] @ 4-byte Reload
+; LE-NEXT: mov r10, r0
+; LE-NEXT: vmov r0, s0
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: vldr s0, [sp, #100] @ 4-byte Reload
+; LE-NEXT: mov r4, r0
+; LE-NEXT: vmov r0, s0
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: vldr s0, [sp, #104] @ 4-byte Reload
+; LE-NEXT: mov r7, r0
+; LE-NEXT: vmov r0, s0
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: vldr s0, [sp, #108] @ 4-byte Reload
+; LE-NEXT: mov r5, r0
+; LE-NEXT: vmov r0, s0
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: vldr s0, [sp, #112] @ 4-byte Reload
+; LE-NEXT: mov r6, r0
+; LE-NEXT: vmov r0, s0
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: vmov s0, r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov s0, r6
+; LE-NEXT: str r1, [sp, #112] @ 4-byte Spill
+; LE-NEXT: vmov.32 d14[0], r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov s0, r5
+; LE-NEXT: mov r9, r1
+; LE-NEXT: vmov.32 d8[0], r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov s0, r7
+; LE-NEXT: mov r8, r1
+; LE-NEXT: vmov.32 d9[0], r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov s0, r4
+; LE-NEXT: mov r5, r1
+; LE-NEXT: vmov.32 d11[0], r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov s0, r10
+; LE-NEXT: mov r4, r1
+; LE-NEXT: vmov.32 d13[0], r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vldr s0, [sp, #116] @ 4-byte Reload
+; LE-NEXT: mov r6, r0
+; LE-NEXT: str r1, [sp, #108] @ 4-byte Spill
+; LE-NEXT: vmov.32 d11[1], r5
+; LE-NEXT: vmov r0, s0
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: vmov s0, r0
+; LE-NEXT: vmov.32 d13[1], r4
+; LE-NEXT: bl llrintf
+; LE-NEXT: vldr s0, [sp, #120] @ 4-byte Reload
+; LE-NEXT: mov r4, r0
+; LE-NEXT: mov r5, r1
+; LE-NEXT: vmov.32 d9[1], r8
+; LE-NEXT: vmov r0, s0
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: vldr s0, [sp, #136] @ 4-byte Reload
+; LE-NEXT: vmov.32 d10[0], r4
+; LE-NEXT: vmov r7, s0
+; LE-NEXT: vmov s0, r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.32 d12[0], r0
+; LE-NEXT: add lr, sp, #136
+; LE-NEXT: add r10, r11, #128
+; LE-NEXT: mov r0, r7
+; LE-NEXT: vmov.32 d10[1], r5
+; LE-NEXT: vmov.32 d12[1], r1
+; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #120
+; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #80
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #56
+; LE-NEXT: vmov.32 d13[0], r6
+; LE-NEXT: vst1.64 {d16, d17}, [r10:128]!
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: vst1.64 {d16, d17}, [r10:128]!
+; LE-NEXT: vldr s0, [sp, #156] @ 4-byte Reload
+; LE-NEXT: vmov r4, s0
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: vldr s0, [sp, #160] @ 4-byte Reload
+; LE-NEXT: mov r5, r0
+; LE-NEXT: ldr r0, [sp, #52] @ 4-byte Reload
+; LE-NEXT: vmov.32 d8[1], r9
+; LE-NEXT: vmov r7, s0
+; LE-NEXT: add lr, sp, #160
+; LE-NEXT: vldr s0, [sp, #176] @ 4-byte Reload
+; LE-NEXT: vmov s20, r0
+; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEXT: vmov r0, s0
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: vmov.f32 s0, s20
+; LE-NEXT: vmov s18, r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.f32 s0, s18
+; LE-NEXT: mov r6, r1
+; LE-NEXT: vmov.32 d10[0], r0
+; LE-NEXT: vmov s16, r5
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.32 d11[0], r0
+; LE-NEXT: mov r0, r7
+; LE-NEXT: mov r5, r1
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: vmov.f32 s0, s16
+; LE-NEXT: vmov s18, r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.f32 s0, s18
+; LE-NEXT: vmov.32 d12[0], r0
+; LE-NEXT: add lr, sp, #176
+; LE-NEXT: mov r8, r1
+; LE-NEXT: vmov.32 d11[1], r5
+; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.32 d13[0], r0
+; LE-NEXT: mov r0, r4
+; LE-NEXT: mov r5, r1
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: vldr s0, [sp, #196] @ 4-byte Reload
+; LE-NEXT: mov r7, r0
+; LE-NEXT: vmov.32 d10[1], r6
+; LE-NEXT: vmov r0, s0
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: vldr s0, [sp, #192] @ 4-byte Reload
+; LE-NEXT: vmov s16, r0
+; LE-NEXT: vmov.32 d13[1], r5
+; LE-NEXT: vmov r6, s0
+; LE-NEXT: vldr s0, [sp, #200] @ 4-byte Reload
+; LE-NEXT: vmov r0, s0
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: vmov.f32 s0, s16
+; LE-NEXT: vmov s18, r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.f32 s0, s18
+; LE-NEXT: add lr, sp, #200
+; LE-NEXT: vmov.32 d15[0], r0
+; LE-NEXT: mov r4, r1
+; LE-NEXT: vmov s16, r7
+; LE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.32 d12[0], r0
+; LE-NEXT: mov r0, r6
+; LE-NEXT: mov r5, r1
+; LE-NEXT: bl __aeabi_h2f
+; LE-NEXT: vmov.f32 s0, s16
+; LE-NEXT: vmov s18, r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.f32 s0, s18
+; LE-NEXT: mov r6, r1
+; LE-NEXT: vmov.32 d15[0], r0
+; LE-NEXT: vmov.32 d12[1], r5
+; LE-NEXT: bl llrintf
+; LE-NEXT: add lr, sp, #200
+; LE-NEXT: vmov.32 d14[0], r0
+; LE-NEXT: ldr r0, [sp, #112] @ 4-byte Reload
+; LE-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #176
+; LE-NEXT: vmov.32 d19[1], r4
+; LE-NEXT: vmov.32 d18[1], r0
+; LE-NEXT: ldr r0, [sp, #108] @ 4-byte Reload
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #216
+; LE-NEXT: vmov.32 d17[1], r0
+; LE-NEXT: add r0, r11, #64
+; LE-NEXT: vmov.32 d16[1], r8
+; LE-NEXT: vorr q10, q8, q8
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #232
+; LE-NEXT: vmov.32 d15[1], r6
+; LE-NEXT: vst1.64 {d16, d17}, [r10:128]!
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #160
+; LE-NEXT: vmov.32 d14[1], r1
+; LE-NEXT: vst1.64 {d16, d17}, [r10:128]
+; LE-NEXT: vst1.64 {d12, d13}, [r0:128]!
+; LE-NEXT: vst1.64 {d20, d21}, [r0:128]!
+; LE-NEXT: vst1.64 {d18, d19}, [r0:128]!
+; LE-NEXT: vst1.64 {d10, d11}, [r0:128]
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #120
+; LE-NEXT: vst1.64 {d16, d17}, [r11:128]!
+; LE-NEXT: vst1.64 {d14, d15}, [r11:128]!
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #136
+; LE-NEXT: vst1.64 {d16, d17}, [r11:128]!
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: vst1.64 {d16, d17}, [r11:128]
+; LE-NEXT: add sp, sp, #248
+; LE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT: add sp, sp, #4
+; LE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; LE-NEON-LABEL: llrint_v32i64_v32f16:
+; LE-NEON: @ %bb.0:
+; LE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEON-NEXT: .pad #4
+; LE-NEON-NEXT: sub sp, sp, #4
+; LE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT: .pad #248
+; LE-NEON-NEXT: sub sp, sp, #248
+; LE-NEON-NEXT: add lr, sp, #256
+; LE-NEON-NEXT: mov r11, r0
+; LE-NEON-NEXT: vstr s15, [sp, #176] @ 4-byte Spill
+; LE-NEON-NEXT: vmov.f32 s19, s14
+; LE-NEON-NEXT: ldrh r0, [lr, #132]
+; LE-NEON-NEXT: vmov.f32 s17, s11
+; LE-NEON-NEXT: vstr s13, [sp, #196] @ 4-byte Spill
+; LE-NEON-NEXT: vstr s12, [sp, #112] @ 4-byte Spill
+; LE-NEON-NEXT: vstr s10, [sp, #136] @ 4-byte Spill
+; LE-NEON-NEXT: vstr s9, [sp, #160] @ 4-byte Spill
+; LE-NEON-NEXT: vstr s8, [sp, #200] @ 4-byte Spill
+; LE-NEON-NEXT: vstr s7, [sp, #100] @ 4-byte Spill
+; LE-NEON-NEXT: vstr s6, [sp, #116] @ 4-byte Spill
+; LE-NEON-NEXT: vstr s5, [sp, #76] @ 4-byte Spill
+; LE-NEON-NEXT: vstr s4, [sp, #120] @ 4-byte Spill
+; LE-NEON-NEXT: vstr s3, [sp, #156] @ 4-byte Spill
+; LE-NEON-NEXT: vstr s2, [sp, #192] @ 4-byte Spill
+; LE-NEON-NEXT: vstr s1, [sp, #104] @ 4-byte Spill
+; LE-NEON-NEXT: vstr s0, [sp, #108] @ 4-byte Spill
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: vmov s0, r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: add lr, sp, #256
+; LE-NEON-NEXT: str r0, [sp, #52] @ 4-byte Spill
+; LE-NEON-NEXT: str r1, [sp, #56] @ 4-byte Spill
+; LE-NEON-NEXT: ldrh r0, [lr, #108]
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: add lr, sp, #256
+; LE-NEON-NEXT: str r0, [sp, #32] @ 4-byte Spill
+; LE-NEON-NEXT: ldrh r0, [lr, #96]
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: add lr, sp, #256
+; LE-NEON-NEXT: mov r5, r0
+; LE-NEON-NEXT: ldrh r0, [lr, #100]
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: add lr, sp, #256
+; LE-NEON-NEXT: mov r7, r0
+; LE-NEON-NEXT: ldrh r0, [lr, #156]
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: add lr, sp, #256
+; LE-NEON-NEXT: mov r6, r0
+; LE-NEON-NEXT: ldrh r0, [lr, #152]
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: add lr, sp, #256
+; LE-NEON-NEXT: mov r4, r0
+; LE-NEON-NEXT: ldrh r0, [lr, #148]
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: vmov s0, r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov s0, r4
+; LE-NEON-NEXT: mov r8, r1
+; LE-NEON-NEXT: vmov.32 d13[0], r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov s0, r6
+; LE-NEON-NEXT: mov r4, r1
+; LE-NEON-NEXT: vmov.32 d14[0], r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov s0, r7
+; LE-NEON-NEXT: mov r6, r1
+; LE-NEON-NEXT: vmov.32 d15[0], r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov s0, r5
+; LE-NEON-NEXT: mov r7, r1
+; LE-NEON-NEXT: vmov.32 d11[0], r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: add lr, sp, #256
+; LE-NEON-NEXT: vmov.32 d10[0], r0
+; LE-NEON-NEXT: mov r5, r1
+; LE-NEON-NEXT: ldrh r0, [lr, #144]
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: add lr, sp, #256
+; LE-NEON-NEXT: mov r10, r0
+; LE-NEON-NEXT: vmov.32 d11[1], r7
+; LE-NEON-NEXT: ldrh r0, [lr, #104]
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: vmov.32 d10[1], r5
+; LE-NEON-NEXT: add lr, sp, #80
+; LE-NEON-NEXT: mov r7, r0
+; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #256
+; LE-NEON-NEXT: ldrh r0, [lr, #124]
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: add lr, sp, #256
+; LE-NEON-NEXT: mov r5, r0
+; LE-NEON-NEXT: vmov.32 d15[1], r6
+; LE-NEON-NEXT: ldrh r0, [lr, #120]
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: vmov.32 d14[1], r4
+; LE-NEON-NEXT: add lr, sp, #16
+; LE-NEON-NEXT: mov r6, r0
+; LE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #256
+; LE-NEON-NEXT: ldrh r0, [lr, #116]
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: add lr, sp, #256
+; LE-NEON-NEXT: vorr q5, q6, q6
+; LE-NEON-NEXT: mov r4, r0
+; LE-NEON-NEXT: ldrh r0, [lr, #112]
+; LE-NEON-NEXT: vmov.32 d11[1], r8
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: vmov s0, r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov s0, r4
+; LE-NEON-NEXT: str r1, [sp, #12] @ 4-byte Spill
+; LE-NEON-NEXT: vmov.32 d12[0], r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov s0, r6
+; LE-NEON-NEXT: add lr, sp, #216
+; LE-NEON-NEXT: vmov.32 d13[0], r0
+; LE-NEON-NEXT: str r1, [sp, #8] @ 4-byte Spill
+; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov s0, r5
+; LE-NEON-NEXT: mov r4, r1
+; LE-NEON-NEXT: vmov.32 d12[0], r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov s0, r7
+; LE-NEON-NEXT: add lr, sp, #232
+; LE-NEON-NEXT: vmov.32 d13[0], r0
+; LE-NEON-NEXT: mov r8, r1
+; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov s0, r10
+; LE-NEON-NEXT: mov r9, r1
+; LE-NEON-NEXT: vmov.32 d12[0], r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: add lr, sp, #256
+; LE-NEON-NEXT: vmov.32 d10[0], r0
+; LE-NEON-NEXT: mov r5, r1
+; LE-NEON-NEXT: ldrh r0, [lr, #140]
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: vmov s0, r0
+; LE-NEON-NEXT: ldr r0, [sp, #32] @ 4-byte Reload
+; LE-NEON-NEXT: vmov.32 d10[1], r5
+; LE-NEON-NEXT: add lr, sp, #32
+; LE-NEON-NEXT: vmov s16, r0
+; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: add lr, sp, #256
+; LE-NEON-NEXT: mov r6, r1
+; LE-NEON-NEXT: vmov.32 d15[0], r0
+; LE-NEON-NEXT: ldrh r1, [lr, #128]
+; LE-NEON-NEXT: mov r0, r1
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: vmov.f32 s0, s16
+; LE-NEON-NEXT: vmov s18, r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: add lr, sp, #256
+; LE-NEON-NEXT: mov r5, r1
+; LE-NEON-NEXT: vmov.32 d13[0], r0
+; LE-NEON-NEXT: ldr r1, [sp, #52] @ 4-byte Reload
+; LE-NEON-NEXT: ldrh r0, [lr, #136]
+; LE-NEON-NEXT: vmov.32 d15[1], r6
+; LE-NEON-NEXT: vmov.32 d11[0], r1
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: vmov.f32 s0, s18
+; LE-NEON-NEXT: vmov s16, r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.f32 s0, s16
+; LE-NEON-NEXT: vmov.32 d10[0], r0
+; LE-NEON-NEXT: ldr r0, [sp, #56] @ 4-byte Reload
+; LE-NEON-NEXT: mov r6, r1
+; LE-NEON-NEXT: vmov.32 d11[1], r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.32 d13[1], r5
+; LE-NEON-NEXT: add lr, sp, #56
+; LE-NEON-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
+; LE-NEON-NEXT: vmov.32 d12[1], r9
+; LE-NEON-NEXT: vmov.32 d14[0], r0
+; LE-NEON-NEXT: vmov r0, s19
+; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #232
+; LE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-NEON-NEXT: vmov.32 d13[1], r8
+; LE-NEON-NEXT: vmov.32 d12[1], r4
+; LE-NEON-NEXT: vmov.32 d10[1], r6
+; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #216
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: vmov.32 d17[1], r2
+; LE-NEON-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
+; LE-NEON-NEXT: vmov.32 d14[1], r1
+; LE-NEON-NEXT: add r1, r11, #192
+; LE-NEON-NEXT: vmov.32 d16[1], r2
+; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #32
+; LE-NEON-NEXT: vst1.64 {d10, d11}, [r1:128]!
+; LE-NEON-NEXT: vst1.64 {d14, d15}, [r1:128]!
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #16
+; LE-NEON-NEXT: vst1.64 {d16, d17}, [r1:128]!
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: vst1.64 {d16, d17}, [r1:128]
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: str r0, [sp, #52] @ 4-byte Spill
+; LE-NEON-NEXT: vmov r0, s17
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: vldr s0, [sp, #76] @ 4-byte Reload
+; LE-NEON-NEXT: mov r10, r0
+; LE-NEON-NEXT: vmov r0, s0
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: vldr s0, [sp, #100] @ 4-byte Reload
+; LE-NEON-NEXT: mov r4, r0
+; LE-NEON-NEXT: vmov r0, s0
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: vldr s0, [sp, #104] @ 4-byte Reload
+; LE-NEON-NEXT: mov r7, r0
+; LE-NEON-NEXT: vmov r0, s0
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: vldr s0, [sp, #108] @ 4-byte Reload
+; LE-NEON-NEXT: mov r5, r0
+; LE-NEON-NEXT: vmov r0, s0
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: vldr s0, [sp, #112] @ 4-byte Reload
+; LE-NEON-NEXT: mov r6, r0
+; LE-NEON-NEXT: vmov r0, s0
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: vmov s0, r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov s0, r6
+; LE-NEON-NEXT: str r1, [sp, #112] @ 4-byte Spill
+; LE-NEON-NEXT: vmov.32 d14[0], r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov s0, r5
+; LE-NEON-NEXT: mov r9, r1
+; LE-NEON-NEXT: vmov.32 d8[0], r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov s0, r7
+; LE-NEON-NEXT: mov r8, r1
+; LE-NEON-NEXT: vmov.32 d9[0], r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov s0, r4
+; LE-NEON-NEXT: mov r5, r1
+; LE-NEON-NEXT: vmov.32 d11[0], r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov s0, r10
+; LE-NEON-NEXT: mov r4, r1
+; LE-NEON-NEXT: vmov.32 d13[0], r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vldr s0, [sp, #116] @ 4-byte Reload
+; LE-NEON-NEXT: mov r6, r0
+; LE-NEON-NEXT: str r1, [sp, #108] @ 4-byte Spill
+; LE-NEON-NEXT: vmov.32 d11[1], r5
+; LE-NEON-NEXT: vmov r0, s0
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: vmov s0, r0
+; LE-NEON-NEXT: vmov.32 d13[1], r4
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vldr s0, [sp, #120] @ 4-byte Reload
+; LE-NEON-NEXT: mov r4, r0
+; LE-NEON-NEXT: mov r5, r1
+; LE-NEON-NEXT: vmov.32 d9[1], r8
+; LE-NEON-NEXT: vmov r0, s0
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: vldr s0, [sp, #136] @ 4-byte Reload
+; LE-NEON-NEXT: vmov.32 d10[0], r4
+; LE-NEON-NEXT: vmov r7, s0
+; LE-NEON-NEXT: vmov s0, r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.32 d12[0], r0
+; LE-NEON-NEXT: add lr, sp, #136
+; LE-NEON-NEXT: add r10, r11, #128
+; LE-NEON-NEXT: mov r0, r7
+; LE-NEON-NEXT: vmov.32 d10[1], r5
+; LE-NEON-NEXT: vmov.32 d12[1], r1
+; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #120
+; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #80
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #56
+; LE-NEON-NEXT: vmov.32 d13[0], r6
+; LE-NEON-NEXT: vst1.64 {d16, d17}, [r10:128]!
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: vst1.64 {d16, d17}, [r10:128]!
+; LE-NEON-NEXT: vldr s0, [sp, #156] @ 4-byte Reload
+; LE-NEON-NEXT: vmov r4, s0
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: vldr s0, [sp, #160] @ 4-byte Reload
+; LE-NEON-NEXT: mov r5, r0
+; LE-NEON-NEXT: ldr r0, [sp, #52] @ 4-byte Reload
+; LE-NEON-NEXT: vmov.32 d8[1], r9
+; LE-NEON-NEXT: vmov r7, s0
+; LE-NEON-NEXT: add lr, sp, #160
+; LE-NEON-NEXT: vldr s0, [sp, #176] @ 4-byte Reload
+; LE-NEON-NEXT: vmov s20, r0
+; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEON-NEXT: vmov r0, s0
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: vmov.f32 s0, s20
+; LE-NEON-NEXT: vmov s18, r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.f32 s0, s18
+; LE-NEON-NEXT: mov r6, r1
+; LE-NEON-NEXT: vmov.32 d10[0], r0
+; LE-NEON-NEXT: vmov s16, r5
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.32 d11[0], r0
+; LE-NEON-NEXT: mov r0, r7
+; LE-NEON-NEXT: mov r5, r1
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: vmov.f32 s0, s16
+; LE-NEON-NEXT: vmov s18, r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.f32 s0, s18
+; LE-NEON-NEXT: vmov.32 d12[0], r0
+; LE-NEON-NEXT: add lr, sp, #176
+; LE-NEON-NEXT: mov r8, r1
+; LE-NEON-NEXT: vmov.32 d11[1], r5
+; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.32 d13[0], r0
+; LE-NEON-NEXT: mov r0, r4
+; LE-NEON-NEXT: mov r5, r1
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: vldr s0, [sp, #196] @ 4-byte Reload
+; LE-NEON-NEXT: mov r7, r0
+; LE-NEON-NEXT: vmov.32 d10[1], r6
+; LE-NEON-NEXT: vmov r0, s0
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: vldr s0, [sp, #192] @ 4-byte Reload
+; LE-NEON-NEXT: vmov s16, r0
+; LE-NEON-NEXT: vmov.32 d13[1], r5
+; LE-NEON-NEXT: vmov r6, s0
+; LE-NEON-NEXT: vldr s0, [sp, #200] @ 4-byte Reload
+; LE-NEON-NEXT: vmov r0, s0
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: vmov.f32 s0, s16
+; LE-NEON-NEXT: vmov s18, r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.f32 s0, s18
+; LE-NEON-NEXT: add lr, sp, #200
+; LE-NEON-NEXT: vmov.32 d15[0], r0
+; LE-NEON-NEXT: mov r4, r1
+; LE-NEON-NEXT: vmov s16, r7
+; LE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.32 d12[0], r0
+; LE-NEON-NEXT: mov r0, r6
+; LE-NEON-NEXT: mov r5, r1
+; LE-NEON-NEXT: bl __aeabi_h2f
+; LE-NEON-NEXT: vmov.f32 s0, s16
+; LE-NEON-NEXT: vmov s18, r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.f32 s0, s18
+; LE-NEON-NEXT: mov r6, r1
+; LE-NEON-NEXT: vmov.32 d15[0], r0
+; LE-NEON-NEXT: vmov.32 d12[1], r5
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: add lr, sp, #200
+; LE-NEON-NEXT: vmov.32 d14[0], r0
+; LE-NEON-NEXT: ldr r0, [sp, #112] @ 4-byte Reload
+; LE-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #176
+; LE-NEON-NEXT: vmov.32 d19[1], r4
+; LE-NEON-NEXT: vmov.32 d18[1], r0
+; LE-NEON-NEXT: ldr r0, [sp, #108] @ 4-byte Reload
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #216
+; LE-NEON-NEXT: vmov.32 d17[1], r0
+; LE-NEON-NEXT: add r0, r11, #64
+; LE-NEON-NEXT: vmov.32 d16[1], r8
+; LE-NEON-NEXT: vorr q10, q8, q8
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #232
+; LE-NEON-NEXT: vmov.32 d15[1], r6
+; LE-NEON-NEXT: vst1.64 {d16, d17}, [r10:128]!
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #160
+; LE-NEON-NEXT: vmov.32 d14[1], r1
+; LE-NEON-NEXT: vst1.64 {d16, d17}, [r10:128]
+; LE-NEON-NEXT: vst1.64 {d12, d13}, [r0:128]!
+; LE-NEON-NEXT: vst1.64 {d20, d21}, [r0:128]!
+; LE-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]!
+; LE-NEON-NEXT: vst1.64 {d10, d11}, [r0:128]
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #120
+; LE-NEON-NEXT: vst1.64 {d16, d17}, [r11:128]!
+; LE-NEON-NEXT: vst1.64 {d14, d15}, [r11:128]!
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #136
+; LE-NEON-NEXT: vst1.64 {d16, d17}, [r11:128]!
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: vst1.64 {d16, d17}, [r11:128]
+; LE-NEON-NEXT: add sp, sp, #248
+; LE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT: add sp, sp, #4
+; LE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-LABEL: llrint_v32i64_v32f16:
+; BE: @ %bb.0:
+; BE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEXT: .pad #4
+; BE-NEXT: sub sp, sp, #4
+; BE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT: .pad #176
+; BE-NEXT: sub sp, sp, #176
+; BE-NEXT: add lr, sp, #256
+; BE-NEXT: mov r10, r0
+; BE-NEXT: vstr s15, [sp, #112] @ 4-byte Spill
+; BE-NEXT: ldrh r0, [lr, #74]
+; BE-NEXT: vstr s14, [sp, #80] @ 4-byte Spill
+; BE-NEXT: vstr s13, [sp, #48] @ 4-byte Spill
+; BE-NEXT: vstr s12, [sp, #148] @ 4-byte Spill
+; BE-NEXT: vstr s11, [sp, #76] @ 4-byte Spill
+; BE-NEXT: vstr s10, [sp, #152] @ 4-byte Spill
+; BE-NEXT: vstr s9, [sp, #156] @ 4-byte Spill
+; BE-NEXT: vstr s8, [sp, #120] @ 4-byte Spill
+; BE-NEXT: vstr s7, [sp, #136] @ 4-byte Spill
+; BE-NEXT: vstr s6, [sp, #132] @ 4-byte Spill
+; BE-NEXT: vstr s5, [sp, #144] @ 4-byte Spill
+; BE-NEXT: vstr s4, [sp, #64] @ 4-byte Spill
+; BE-NEXT: vstr s3, [sp, #104] @ 4-byte Spill
+; BE-NEXT: vstr s2, [sp, #88] @ 4-byte Spill
+; BE-NEXT: vstr s1, [sp, #56] @ 4-byte Spill
+; BE-NEXT: vstr s0, [sp, #96] @ 4-byte Spill
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: vmov s0, r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: add lr, sp, #256
+; BE-NEXT: mov r9, r0
+; BE-NEXT: mov r8, r1
+; BE-NEXT: ldrh r0, [lr, #62]
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: add lr, sp, #256
+; BE-NEXT: mov r6, r0
+; BE-NEXT: ldrh r0, [lr, #58]
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: add lr, sp, #256
+; BE-NEXT: mov r7, r0
+; BE-NEXT: ldrh r0, [lr, #66]
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: add lr, sp, #256
+; BE-NEXT: mov r4, r0
+; BE-NEXT: ldrh r0, [lr, #54]
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: add lr, sp, #256
+; BE-NEXT: mov r5, r0
+; BE-NEXT: ldrh r0, [lr, #50]
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: vmov s0, r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.32 d16[0], r0
+; BE-NEXT: vmov s0, r5
+; BE-NEXT: str r1, [sp, #44] @ 4-byte Spill
+; BE-NEXT: vstr d16, [sp, #168] @ 8-byte Spill
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.32 d16[0], r0
+; BE-NEXT: vmov s0, r4
+; BE-NEXT: str r1, [sp, #40] @ 4-byte Spill
+; BE-NEXT: vstr d16, [sp, #160] @ 8-byte Spill
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.32 d16[0], r0
+; BE-NEXT: vmov s0, r7
+; BE-NEXT: mov r4, r1
+; BE-NEXT: vstr d16, [sp, #32] @ 8-byte Spill
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.32 d16[0], r0
+; BE-NEXT: vmov s0, r6
+; BE-NEXT: mov r11, r1
+; BE-NEXT: vstr d16, [sp, #24] @ 8-byte Spill
+; BE-NEXT: bl llrintf
+; BE-NEXT: add lr, sp, #256
+; BE-NEXT: vmov.32 d16[0], r0
+; BE-NEXT: mov r6, r1
+; BE-NEXT: ldrh r0, [lr, #34]
+; BE-NEXT: vstr d16, [sp, #16] @ 8-byte Spill
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: vmov s0, r0
+; BE-NEXT: vmov.32 d8[0], r9
+; BE-NEXT: bl llrintf
+; BE-NEXT: add lr, sp, #256
+; BE-NEXT: mov r7, r1
+; BE-NEXT: vmov.32 d12[0], r0
+; BE-NEXT: ldrh r1, [lr, #38]
+; BE-NEXT: mov r0, r1
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: vmov.32 d8[1], r8
+; BE-NEXT: vmov s0, r0
+; BE-NEXT: vstr d8, [sp, #8] @ 8-byte Spill
+; BE-NEXT: bl llrintf
+; BE-NEXT: add lr, sp, #256
+; BE-NEXT: mov r5, r1
+; BE-NEXT: vmov.32 d13[0], r0
+; BE-NEXT: ldrh r1, [lr, #26]
+; BE-NEXT: mov r0, r1
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: vmov s0, r0
+; BE-NEXT: vmov.32 d12[1], r7
+; BE-NEXT: bl llrintf
+; BE-NEXT: add lr, sp, #256
+; BE-NEXT: mov r7, r1
+; BE-NEXT: vmov.32 d9[0], r0
+; BE-NEXT: ldrh r1, [lr, #30]
+; BE-NEXT: mov r0, r1
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: vmov s0, r0
+; BE-NEXT: vmov.32 d13[1], r5
+; BE-NEXT: bl llrintf
+; BE-NEXT: add lr, sp, #256
+; BE-NEXT: mov r5, r1
+; BE-NEXT: vmov.32 d15[0], r0
+; BE-NEXT: ldrh r1, [lr, #78]
+; BE-NEXT: mov r0, r1
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: vmov s0, r0
+; BE-NEXT: vmov.32 d9[1], r7
+; BE-NEXT: bl llrintf
+; BE-NEXT: add lr, sp, #256
+; BE-NEXT: mov r7, r1
+; BE-NEXT: vmov.32 d14[0], r0
+; BE-NEXT: ldrh r1, [lr, #82]
+; BE-NEXT: mov r0, r1
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: vmov s0, r0
+; BE-NEXT: vmov.32 d15[1], r5
+; BE-NEXT: bl llrintf
+; BE-NEXT: add lr, sp, #256
+; BE-NEXT: mov r5, r1
+; BE-NEXT: vmov.32 d8[0], r0
+; BE-NEXT: ldrh r1, [lr, #86]
+; BE-NEXT: mov r0, r1
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: vmov s0, r0
+; BE-NEXT: vmov.32 d14[1], r7
+; BE-NEXT: bl llrintf
+; BE-NEXT: add lr, sp, #256
+; BE-NEXT: mov r7, r1
+; BE-NEXT: vmov.32 d10[0], r0
+; BE-NEXT: ldrh r1, [lr, #70]
+; BE-NEXT: mov r0, r1
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: vmov s0, r0
+; BE-NEXT: vmov.32 d8[1], r5
+; BE-NEXT: bl llrintf
+; BE-NEXT: add lr, sp, #256
+; BE-NEXT: mov r5, r1
+; BE-NEXT: vmov.32 d11[0], r0
+; BE-NEXT: ldrh r1, [lr, #46]
+; BE-NEXT: mov r0, r1
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: vmov s0, r0
+; BE-NEXT: vmov.32 d10[1], r7
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.32 d25[0], r0
+; BE-NEXT: ldr r0, [sp, #40] @ 4-byte Reload
+; BE-NEXT: ldr r2, [sp, #44] @ 4-byte Reload
+; BE-NEXT: vldr d24, [sp, #160] @ 8-byte Reload
+; BE-NEXT: vldr s0, [sp, #48] @ 4-byte Reload
+; BE-NEXT: vmov.32 d24[1], r0
+; BE-NEXT: vmov r0, s0
+; BE-NEXT: vldr d26, [sp, #16] @ 8-byte Reload
+; BE-NEXT: vstr d24, [sp, #160] @ 8-byte Spill
+; BE-NEXT: vldr d24, [sp, #8] @ 8-byte Reload
+; BE-NEXT: vrev64.32 d23, d14
+; BE-NEXT: vldr d29, [sp, #24] @ 8-byte Reload
+; BE-NEXT: vrev64.32 d22, d24
+; BE-NEXT: vldr d24, [sp, #168] @ 8-byte Reload
+; BE-NEXT: vmov.32 d26[1], r6
+; BE-NEXT: vldr d28, [sp, #32] @ 8-byte Reload
+; BE-NEXT: vmov.32 d25[1], r1
+; BE-NEXT: add r1, r10, #192
+; BE-NEXT: vmov.32 d29[1], r11
+; BE-NEXT: add r11, r10, #128
+; BE-NEXT: vmov.32 d24[1], r2
+; BE-NEXT: vmov.32 d11[1], r5
+; BE-NEXT: vmov.32 d28[1], r4
+; BE-NEXT: vrev64.32 d27, d26
+; BE-NEXT: vstr d24, [sp, #168] @ 8-byte Spill
+; BE-NEXT: vstr d25, [sp, #48] @ 8-byte Spill
+; BE-NEXT: vrev64.32 d25, d11
+; BE-NEXT: vrev64.32 d26, d29
+; BE-NEXT: vrev64.32 d24, d28
+; BE-NEXT: vst1.64 {d26, d27}, [r1:128]!
+; BE-NEXT: vst1.64 {d24, d25}, [r1:128]!
+; BE-NEXT: vrev64.32 d21, d10
+; BE-NEXT: vrev64.32 d19, d15
+; BE-NEXT: vrev64.32 d17, d13
+; BE-NEXT: vrev64.32 d20, d8
+; BE-NEXT: vst1.64 {d22, d23}, [r1:128]!
+; BE-NEXT: vrev64.32 d18, d9
+; BE-NEXT: vrev64.32 d16, d12
+; BE-NEXT: vst1.64 {d20, d21}, [r1:128]
+; BE-NEXT: vst1.64 {d18, d19}, [r11:128]!
+; BE-NEXT: vst1.64 {d16, d17}, [r11:128]!
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: vmov s0, r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: add lr, sp, #256
+; BE-NEXT: mov r7, r0
+; BE-NEXT: mov r8, r1
+; BE-NEXT: ldrh r0, [lr, #42]
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: vldr s0, [sp, #56] @ 4-byte Reload
+; BE-NEXT: mov r4, r0
+; BE-NEXT: vmov r0, s0
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: vmov s0, r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov s0, r4
+; BE-NEXT: str r1, [sp, #44] @ 4-byte Spill
+; BE-NEXT: vmov.32 d10[0], r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: vldr s0, [sp, #64] @ 4-byte Reload
+; BE-NEXT: vmov.32 d16[0], r0
+; BE-NEXT: vmov r2, s0
+; BE-NEXT: vldr s0, [sp, #80] @ 4-byte Reload
+; BE-NEXT: vmov.32 d16[1], r1
+; BE-NEXT: vmov r4, s0
+; BE-NEXT: vldr s0, [sp, #76] @ 4-byte Reload
+; BE-NEXT: vstr d16, [sp, #80] @ 8-byte Spill
+; BE-NEXT: vmov r5, s0
+; BE-NEXT: mov r0, r2
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: vmov s0, r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.32 d11[0], r0
+; BE-NEXT: mov r0, r4
+; BE-NEXT: mov r9, r1
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: vmov s0, r0
+; BE-NEXT: vmov.32 d8[0], r7
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.32 d12[0], r0
+; BE-NEXT: mov r0, r5
+; BE-NEXT: mov r6, r1
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: vldr s0, [sp, #88] @ 4-byte Reload
+; BE-NEXT: mov r4, r0
+; BE-NEXT: vmov.32 d8[1], r8
+; BE-NEXT: vmov r7, s0
+; BE-NEXT: vldr s0, [sp, #96] @ 4-byte Reload
+; BE-NEXT: vstr d8, [sp, #88] @ 8-byte Spill
+; BE-NEXT: vmov r0, s0
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: vldr s0, [sp, #104] @ 4-byte Reload
+; BE-NEXT: vmov s19, r0
+; BE-NEXT: vmov.32 d12[1], r6
+; BE-NEXT: vmov r5, s0
+; BE-NEXT: vldr s0, [sp, #112] @ 4-byte Reload
+; BE-NEXT: vstr d12, [sp, #104] @ 8-byte Spill
+; BE-NEXT: vmov r0, s0
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: vmov.f32 s0, s19
+; BE-NEXT: vmov s30, r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.f32 s0, s30
+; BE-NEXT: mov r6, r1
+; BE-NEXT: vmov.32 d9[0], r0
+; BE-NEXT: vmov s17, r4
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.32 d12[0], r0
+; BE-NEXT: mov r0, r5
+; BE-NEXT: mov r4, r1
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: vmov.f32 s0, s17
+; BE-NEXT: vmov s30, r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.f32 s0, s30
+; BE-NEXT: vmov.32 d16[0], r0
+; BE-NEXT: str r1, [sp, #76] @ 4-byte Spill
+; BE-NEXT: vmov.32 d12[1], r4
+; BE-NEXT: vstr d16, [sp, #64] @ 8-byte Spill
+; BE-NEXT: vstr d12, [sp, #112] @ 8-byte Spill
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.32 d16[0], r0
+; BE-NEXT: mov r0, r7
+; BE-NEXT: mov r8, r1
+; BE-NEXT: vmov.32 d9[1], r6
+; BE-NEXT: vstr d16, [sp, #56] @ 8-byte Spill
+; BE-NEXT: vstr d9, [sp, #96] @ 8-byte Spill
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: vldr s0, [sp, #120] @ 4-byte Reload
+; BE-NEXT: mov r5, r0
+; BE-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
+; BE-NEXT: vmov r7, s0
+; BE-NEXT: vldr s0, [sp, #132] @ 4-byte Reload
+; BE-NEXT: vmov.32 d10[1], r0
+; BE-NEXT: vmov r0, s0
+; BE-NEXT: vstr d10, [sp, #120] @ 8-byte Spill
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: vldr s0, [sp, #136] @ 4-byte Reload
+; BE-NEXT: vmov s26, r0
+; BE-NEXT: vmov.32 d11[1], r9
+; BE-NEXT: vmov r4, s0
+; BE-NEXT: vldr s0, [sp, #144] @ 4-byte Reload
+; BE-NEXT: vstr d11, [sp, #136] @ 8-byte Spill
+; BE-NEXT: vmov r0, s0
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: vmov.f32 s0, s26
+; BE-NEXT: vmov s22, r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.f32 s0, s22
+; BE-NEXT: mov r6, r1
+; BE-NEXT: vmov.32 d13[0], r0
+; BE-NEXT: vmov s24, r5
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.32 d14[0], r0
+; BE-NEXT: mov r0, r4
+; BE-NEXT: mov r5, r1
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: vmov.f32 s0, s24
+; BE-NEXT: vmov s22, r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.f32 s0, s22
+; BE-NEXT: mov r9, r1
+; BE-NEXT: vmov.32 d12[0], r0
+; BE-NEXT: vmov.32 d14[1], r5
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.32 d11[0], r0
+; BE-NEXT: mov r0, r7
+; BE-NEXT: mov r5, r1
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: vldr s0, [sp, #148] @ 4-byte Reload
+; BE-NEXT: mov r7, r0
+; BE-NEXT: vmov.32 d13[1], r6
+; BE-NEXT: vmov r0, s0
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: vldr s0, [sp, #152] @ 4-byte Reload
+; BE-NEXT: vmov s20, r0
+; BE-NEXT: vmov.32 d11[1], r5
+; BE-NEXT: vmov r4, s0
+; BE-NEXT: vldr s0, [sp, #156] @ 4-byte Reload
+; BE-NEXT: vmov r0, s0
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: vmov.f32 s0, s20
+; BE-NEXT: vmov s16, r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.f32 s0, s16
+; BE-NEXT: mov r6, r1
+; BE-NEXT: vmov.32 d10[0], r0
+; BE-NEXT: vmov s18, r7
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.32 d15[0], r0
+; BE-NEXT: mov r0, r4
+; BE-NEXT: mov r5, r1
+; BE-NEXT: bl __aeabi_h2f
+; BE-NEXT: vmov.f32 s0, s18
+; BE-NEXT: vmov s16, r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.f32 s0, s16
+; BE-NEXT: mov r4, r1
+; BE-NEXT: vmov.32 d9[0], r0
+; BE-NEXT: vmov.32 d15[1], r5
+; BE-NEXT: bl llrintf
+; BE-NEXT: vldr d16, [sp, #160] @ 8-byte Reload
+; BE-NEXT: vldr d20, [sp, #136] @ 8-byte Reload
+; BE-NEXT: vrev64.32 d19, d14
+; BE-NEXT: vrev64.32 d31, d16
+; BE-NEXT: vldr d16, [sp, #168] @ 8-byte Reload
+; BE-NEXT: vrev64.32 d18, d20
+; BE-NEXT: vldr d20, [sp, #120] @ 8-byte Reload
+; BE-NEXT: vldr d22, [sp, #96] @ 8-byte Reload
+; BE-NEXT: vmov.32 d28[0], r0
+; BE-NEXT: ldr r0, [sp, #76] @ 4-byte Reload
+; BE-NEXT: vrev64.32 d21, d20
+; BE-NEXT: vrev64.32 d30, d16
+; BE-NEXT: vldr d16, [sp, #48] @ 8-byte Reload
+; BE-NEXT: vldr d23, [sp, #64] @ 8-byte Reload
+; BE-NEXT: vrev64.32 d20, d22
+; BE-NEXT: vldr d22, [sp, #112] @ 8-byte Reload
+; BE-NEXT: vrev64.32 d1, d16
+; BE-NEXT: vldr d16, [sp, #80] @ 8-byte Reload
+; BE-NEXT: vmov.32 d23[1], r0
+; BE-NEXT: add r0, r10, #64
+; BE-NEXT: vrev64.32 d25, d22
+; BE-NEXT: vldr d22, [sp, #104] @ 8-byte Reload
+; BE-NEXT: vmov.32 d9[1], r4
+; BE-NEXT: vrev64.32 d0, d16
+; BE-NEXT: vmov.32 d28[1], r1
+; BE-NEXT: vldr d29, [sp, #56] @ 8-byte Reload
+; BE-NEXT: vrev64.32 d3, d15
+; BE-NEXT: vrev64.32 d24, d22
+; BE-NEXT: vldr d22, [sp, #88] @ 8-byte Reload
+; BE-NEXT: vmov.32 d10[1], r6
+; BE-NEXT: vrev64.32 d5, d23
+; BE-NEXT: vst1.64 {d0, d1}, [r11:128]!
+; BE-NEXT: vrev64.32 d2, d9
+; BE-NEXT: vrev64.32 d27, d22
+; BE-NEXT: vmov.32 d29[1], r8
+; BE-NEXT: vrev64.32 d4, d28
+; BE-NEXT: vst1.64 {d30, d31}, [r11:128]
+; BE-NEXT: vst1.64 {d2, d3}, [r0:128]!
+; BE-NEXT: vmov.32 d12[1], r9
+; BE-NEXT: vrev64.32 d26, d10
+; BE-NEXT: vst1.64 {d4, d5}, [r0:128]!
+; BE-NEXT: vrev64.32 d23, d29
+; BE-NEXT: vst1.64 {d26, d27}, [r0:128]!
+; BE-NEXT: vrev64.32 d22, d12
+; BE-NEXT: vst1.64 {d24, d25}, [r0:128]
+; BE-NEXT: vst1.64 {d20, d21}, [r10:128]!
+; BE-NEXT: vst1.64 {d22, d23}, [r10:128]!
+; BE-NEXT: vrev64.32 d17, d11
+; BE-NEXT: vrev64.32 d16, d13
+; BE-NEXT: vst1.64 {d18, d19}, [r10:128]!
+; BE-NEXT: vst1.64 {d16, d17}, [r10:128]
+; BE-NEXT: add sp, sp, #176
+; BE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT: add sp, sp, #4
+; BE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-NEON-LABEL: llrint_v32i64_v32f16:
+; BE-NEON: @ %bb.0:
+; BE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEON-NEXT: .pad #4
+; BE-NEON-NEXT: sub sp, sp, #4
+; BE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT: .pad #176
+; BE-NEON-NEXT: sub sp, sp, #176
+; BE-NEON-NEXT: add lr, sp, #256
+; BE-NEON-NEXT: mov r10, r0
+; BE-NEON-NEXT: vstr s15, [sp, #112] @ 4-byte Spill
+; BE-NEON-NEXT: ldrh r0, [lr, #74]
+; BE-NEON-NEXT: vstr s14, [sp, #80] @ 4-byte Spill
+; BE-NEON-NEXT: vstr s13, [sp, #48] @ 4-byte Spill
+; BE-NEON-NEXT: vstr s12, [sp, #148] @ 4-byte Spill
+; BE-NEON-NEXT: vstr s11, [sp, #76] @ 4-byte Spill
+; BE-NEON-NEXT: vstr s10, [sp, #152] @ 4-byte Spill
+; BE-NEON-NEXT: vstr s9, [sp, #156] @ 4-byte Spill
+; BE-NEON-NEXT: vstr s8, [sp, #120] @ 4-byte Spill
+; BE-NEON-NEXT: vstr s7, [sp, #136] @ 4-byte Spill
+; BE-NEON-NEXT: vstr s6, [sp, #132] @ 4-byte Spill
+; BE-NEON-NEXT: vstr s5, [sp, #144] @ 4-byte Spill
+; BE-NEON-NEXT: vstr s4, [sp, #64] @ 4-byte Spill
+; BE-NEON-NEXT: vstr s3, [sp, #104] @ 4-byte Spill
+; BE-NEON-NEXT: vstr s2, [sp, #88] @ 4-byte Spill
+; BE-NEON-NEXT: vstr s1, [sp, #56] @ 4-byte Spill
+; BE-NEON-NEXT: vstr s0, [sp, #96] @ 4-byte Spill
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: vmov s0, r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: add lr, sp, #256
+; BE-NEON-NEXT: mov r9, r0
+; BE-NEON-NEXT: mov r8, r1
+; BE-NEON-NEXT: ldrh r0, [lr, #62]
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: add lr, sp, #256
+; BE-NEON-NEXT: mov r6, r0
+; BE-NEON-NEXT: ldrh r0, [lr, #58]
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: add lr, sp, #256
+; BE-NEON-NEXT: mov r7, r0
+; BE-NEON-NEXT: ldrh r0, [lr, #66]
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: add lr, sp, #256
+; BE-NEON-NEXT: mov r4, r0
+; BE-NEON-NEXT: ldrh r0, [lr, #54]
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: add lr, sp, #256
+; BE-NEON-NEXT: mov r5, r0
+; BE-NEON-NEXT: ldrh r0, [lr, #50]
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: vmov s0, r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.32 d16[0], r0
+; BE-NEON-NEXT: vmov s0, r5
+; BE-NEON-NEXT: str r1, [sp, #44] @ 4-byte Spill
+; BE-NEON-NEXT: vstr d16, [sp, #168] @ 8-byte Spill
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.32 d16[0], r0
+; BE-NEON-NEXT: vmov s0, r4
+; BE-NEON-NEXT: str r1, [sp, #40] @ 4-byte Spill
+; BE-NEON-NEXT: vstr d16, [sp, #160] @ 8-byte Spill
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.32 d16[0], r0
+; BE-NEON-NEXT: vmov s0, r7
+; BE-NEON-NEXT: mov r4, r1
+; BE-NEON-NEXT: vstr d16, [sp, #32] @ 8-byte Spill
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.32 d16[0], r0
+; BE-NEON-NEXT: vmov s0, r6
+; BE-NEON-NEXT: mov r11, r1
+; BE-NEON-NEXT: vstr d16, [sp, #24] @ 8-byte Spill
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: add lr, sp, #256
+; BE-NEON-NEXT: vmov.32 d16[0], r0
+; BE-NEON-NEXT: mov r6, r1
+; BE-NEON-NEXT: ldrh r0, [lr, #34]
+; BE-NEON-NEXT: vstr d16, [sp, #16] @ 8-byte Spill
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: vmov s0, r0
+; BE-NEON-NEXT: vmov.32 d8[0], r9
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: add lr, sp, #256
+; BE-NEON-NEXT: mov r7, r1
+; BE-NEON-NEXT: vmov.32 d12[0], r0
+; BE-NEON-NEXT: ldrh r1, [lr, #38]
+; BE-NEON-NEXT: mov r0, r1
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: vmov.32 d8[1], r8
+; BE-NEON-NEXT: vmov s0, r0
+; BE-NEON-NEXT: vstr d8, [sp, #8] @ 8-byte Spill
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: add lr, sp, #256
+; BE-NEON-NEXT: mov r5, r1
+; BE-NEON-NEXT: vmov.32 d13[0], r0
+; BE-NEON-NEXT: ldrh r1, [lr, #26]
+; BE-NEON-NEXT: mov r0, r1
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: vmov s0, r0
+; BE-NEON-NEXT: vmov.32 d12[1], r7
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: add lr, sp, #256
+; BE-NEON-NEXT: mov r7, r1
+; BE-NEON-NEXT: vmov.32 d9[0], r0
+; BE-NEON-NEXT: ldrh r1, [lr, #30]
+; BE-NEON-NEXT: mov r0, r1
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: vmov s0, r0
+; BE-NEON-NEXT: vmov.32 d13[1], r5
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: add lr, sp, #256
+; BE-NEON-NEXT: mov r5, r1
+; BE-NEON-NEXT: vmov.32 d15[0], r0
+; BE-NEON-NEXT: ldrh r1, [lr, #78]
+; BE-NEON-NEXT: mov r0, r1
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: vmov s0, r0
+; BE-NEON-NEXT: vmov.32 d9[1], r7
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: add lr, sp, #256
+; BE-NEON-NEXT: mov r7, r1
+; BE-NEON-NEXT: vmov.32 d14[0], r0
+; BE-NEON-NEXT: ldrh r1, [lr, #82]
+; BE-NEON-NEXT: mov r0, r1
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: vmov s0, r0
+; BE-NEON-NEXT: vmov.32 d15[1], r5
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: add lr, sp, #256
+; BE-NEON-NEXT: mov r5, r1
+; BE-NEON-NEXT: vmov.32 d8[0], r0
+; BE-NEON-NEXT: ldrh r1, [lr, #86]
+; BE-NEON-NEXT: mov r0, r1
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: vmov s0, r0
+; BE-NEON-NEXT: vmov.32 d14[1], r7
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: add lr, sp, #256
+; BE-NEON-NEXT: mov r7, r1
+; BE-NEON-NEXT: vmov.32 d10[0], r0
+; BE-NEON-NEXT: ldrh r1, [lr, #70]
+; BE-NEON-NEXT: mov r0, r1
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: vmov s0, r0
+; BE-NEON-NEXT: vmov.32 d8[1], r5
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: add lr, sp, #256
+; BE-NEON-NEXT: mov r5, r1
+; BE-NEON-NEXT: vmov.32 d11[0], r0
+; BE-NEON-NEXT: ldrh r1, [lr, #46]
+; BE-NEON-NEXT: mov r0, r1
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: vmov s0, r0
+; BE-NEON-NEXT: vmov.32 d10[1], r7
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.32 d25[0], r0
+; BE-NEON-NEXT: ldr r0, [sp, #40] @ 4-byte Reload
+; BE-NEON-NEXT: ldr r2, [sp, #44] @ 4-byte Reload
+; BE-NEON-NEXT: vldr d24, [sp, #160] @ 8-byte Reload
+; BE-NEON-NEXT: vldr s0, [sp, #48] @ 4-byte Reload
+; BE-NEON-NEXT: vmov.32 d24[1], r0
+; BE-NEON-NEXT: vmov r0, s0
+; BE-NEON-NEXT: vldr d26, [sp, #16] @ 8-byte Reload
+; BE-NEON-NEXT: vstr d24, [sp, #160] @ 8-byte Spill
+; BE-NEON-NEXT: vldr d24, [sp, #8] @ 8-byte Reload
+; BE-NEON-NEXT: vrev64.32 d23, d14
+; BE-NEON-NEXT: vldr d29, [sp, #24] @ 8-byte Reload
+; BE-NEON-NEXT: vrev64.32 d22, d24
+; BE-NEON-NEXT: vldr d24, [sp, #168] @ 8-byte Reload
+; BE-NEON-NEXT: vmov.32 d26[1], r6
+; BE-NEON-NEXT: vldr d28, [sp, #32] @ 8-byte Reload
+; BE-NEON-NEXT: vmov.32 d25[1], r1
+; BE-NEON-NEXT: add r1, r10, #192
+; BE-NEON-NEXT: vmov.32 d29[1], r11
+; BE-NEON-NEXT: add r11, r10, #128
+; BE-NEON-NEXT: vmov.32 d24[1], r2
+; BE-NEON-NEXT: vmov.32 d11[1], r5
+; BE-NEON-NEXT: vmov.32 d28[1], r4
+; BE-NEON-NEXT: vrev64.32 d27, d26
+; BE-NEON-NEXT: vstr d24, [sp, #168] @ 8-byte Spill
+; BE-NEON-NEXT: vstr d25, [sp, #48] @ 8-byte Spill
+; BE-NEON-NEXT: vrev64.32 d25, d11
+; BE-NEON-NEXT: vrev64.32 d26, d29
+; BE-NEON-NEXT: vrev64.32 d24, d28
+; BE-NEON-NEXT: vst1.64 {d26, d27}, [r1:128]!
+; BE-NEON-NEXT: vst1.64 {d24, d25}, [r1:128]!
+; BE-NEON-NEXT: vrev64.32 d21, d10
+; BE-NEON-NEXT: vrev64.32 d19, d15
+; BE-NEON-NEXT: vrev64.32 d17, d13
+; BE-NEON-NEXT: vrev64.32 d20, d8
+; BE-NEON-NEXT: vst1.64 {d22, d23}, [r1:128]!
+; BE-NEON-NEXT: vrev64.32 d18, d9
+; BE-NEON-NEXT: vrev64.32 d16, d12
+; BE-NEON-NEXT: vst1.64 {d20, d21}, [r1:128]
+; BE-NEON-NEXT: vst1.64 {d18, d19}, [r11:128]!
+; BE-NEON-NEXT: vst1.64 {d16, d17}, [r11:128]!
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: vmov s0, r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: add lr, sp, #256
+; BE-NEON-NEXT: mov r7, r0
+; BE-NEON-NEXT: mov r8, r1
+; BE-NEON-NEXT: ldrh r0, [lr, #42]
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: vldr s0, [sp, #56] @ 4-byte Reload
+; BE-NEON-NEXT: mov r4, r0
+; BE-NEON-NEXT: vmov r0, s0
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: vmov s0, r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov s0, r4
+; BE-NEON-NEXT: str r1, [sp, #44] @ 4-byte Spill
+; BE-NEON-NEXT: vmov.32 d10[0], r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vldr s0, [sp, #64] @ 4-byte Reload
+; BE-NEON-NEXT: vmov.32 d16[0], r0
+; BE-NEON-NEXT: vmov r2, s0
+; BE-NEON-NEXT: vldr s0, [sp, #80] @ 4-byte Reload
+; BE-NEON-NEXT: vmov.32 d16[1], r1
+; BE-NEON-NEXT: vmov r4, s0
+; BE-NEON-NEXT: vldr s0, [sp, #76] @ 4-byte Reload
+; BE-NEON-NEXT: vstr d16, [sp, #80] @ 8-byte Spill
+; BE-NEON-NEXT: vmov r5, s0
+; BE-NEON-NEXT: mov r0, r2
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: vmov s0, r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.32 d11[0], r0
+; BE-NEON-NEXT: mov r0, r4
+; BE-NEON-NEXT: mov r9, r1
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: vmov s0, r0
+; BE-NEON-NEXT: vmov.32 d8[0], r7
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.32 d12[0], r0
+; BE-NEON-NEXT: mov r0, r5
+; BE-NEON-NEXT: mov r6, r1
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: vldr s0, [sp, #88] @ 4-byte Reload
+; BE-NEON-NEXT: mov r4, r0
+; BE-NEON-NEXT: vmov.32 d8[1], r8
+; BE-NEON-NEXT: vmov r7, s0
+; BE-NEON-NEXT: vldr s0, [sp, #96] @ 4-byte Reload
+; BE-NEON-NEXT: vstr d8, [sp, #88] @ 8-byte Spill
+; BE-NEON-NEXT: vmov r0, s0
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: vldr s0, [sp, #104] @ 4-byte Reload
+; BE-NEON-NEXT: vmov s19, r0
+; BE-NEON-NEXT: vmov.32 d12[1], r6
+; BE-NEON-NEXT: vmov r5, s0
+; BE-NEON-NEXT: vldr s0, [sp, #112] @ 4-byte Reload
+; BE-NEON-NEXT: vstr d12, [sp, #104] @ 8-byte Spill
+; BE-NEON-NEXT: vmov r0, s0
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: vmov.f32 s0, s19
+; BE-NEON-NEXT: vmov s30, r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.f32 s0, s30
+; BE-NEON-NEXT: mov r6, r1
+; BE-NEON-NEXT: vmov.32 d9[0], r0
+; BE-NEON-NEXT: vmov s17, r4
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.32 d12[0], r0
+; BE-NEON-NEXT: mov r0, r5
+; BE-NEON-NEXT: mov r4, r1
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: vmov.f32 s0, s17
+; BE-NEON-NEXT: vmov s30, r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.f32 s0, s30
+; BE-NEON-NEXT: vmov.32 d16[0], r0
+; BE-NEON-NEXT: str r1, [sp, #76] @ 4-byte Spill
+; BE-NEON-NEXT: vmov.32 d12[1], r4
+; BE-NEON-NEXT: vstr d16, [sp, #64] @ 8-byte Spill
+; BE-NEON-NEXT: vstr d12, [sp, #112] @ 8-byte Spill
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.32 d16[0], r0
+; BE-NEON-NEXT: mov r0, r7
+; BE-NEON-NEXT: mov r8, r1
+; BE-NEON-NEXT: vmov.32 d9[1], r6
+; BE-NEON-NEXT: vstr d16, [sp, #56] @ 8-byte Spill
+; BE-NEON-NEXT: vstr d9, [sp, #96] @ 8-byte Spill
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: vldr s0, [sp, #120] @ 4-byte Reload
+; BE-NEON-NEXT: mov r5, r0
+; BE-NEON-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
+; BE-NEON-NEXT: vmov r7, s0
+; BE-NEON-NEXT: vldr s0, [sp, #132] @ 4-byte Reload
+; BE-NEON-NEXT: vmov.32 d10[1], r0
+; BE-NEON-NEXT: vmov r0, s0
+; BE-NEON-NEXT: vstr d10, [sp, #120] @ 8-byte Spill
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: vldr s0, [sp, #136] @ 4-byte Reload
+; BE-NEON-NEXT: vmov s26, r0
+; BE-NEON-NEXT: vmov.32 d11[1], r9
+; BE-NEON-NEXT: vmov r4, s0
+; BE-NEON-NEXT: vldr s0, [sp, #144] @ 4-byte Reload
+; BE-NEON-NEXT: vstr d11, [sp, #136] @ 8-byte Spill
+; BE-NEON-NEXT: vmov r0, s0
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: vmov.f32 s0, s26
+; BE-NEON-NEXT: vmov s22, r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.f32 s0, s22
+; BE-NEON-NEXT: mov r6, r1
+; BE-NEON-NEXT: vmov.32 d13[0], r0
+; BE-NEON-NEXT: vmov s24, r5
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.32 d14[0], r0
+; BE-NEON-NEXT: mov r0, r4
+; BE-NEON-NEXT: mov r5, r1
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: vmov.f32 s0, s24
+; BE-NEON-NEXT: vmov s22, r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.f32 s0, s22
+; BE-NEON-NEXT: mov r9, r1
+; BE-NEON-NEXT: vmov.32 d12[0], r0
+; BE-NEON-NEXT: vmov.32 d14[1], r5
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.32 d11[0], r0
+; BE-NEON-NEXT: mov r0, r7
+; BE-NEON-NEXT: mov r5, r1
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: vldr s0, [sp, #148] @ 4-byte Reload
+; BE-NEON-NEXT: mov r7, r0
+; BE-NEON-NEXT: vmov.32 d13[1], r6
+; BE-NEON-NEXT: vmov r0, s0
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: vldr s0, [sp, #152] @ 4-byte Reload
+; BE-NEON-NEXT: vmov s20, r0
+; BE-NEON-NEXT: vmov.32 d11[1], r5
+; BE-NEON-NEXT: vmov r4, s0
+; BE-NEON-NEXT: vldr s0, [sp, #156] @ 4-byte Reload
+; BE-NEON-NEXT: vmov r0, s0
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: vmov.f32 s0, s20
+; BE-NEON-NEXT: vmov s16, r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.f32 s0, s16
+; BE-NEON-NEXT: mov r6, r1
+; BE-NEON-NEXT: vmov.32 d10[0], r0
+; BE-NEON-NEXT: vmov s18, r7
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.32 d15[0], r0
+; BE-NEON-NEXT: mov r0, r4
+; BE-NEON-NEXT: mov r5, r1
+; BE-NEON-NEXT: bl __aeabi_h2f
+; BE-NEON-NEXT: vmov.f32 s0, s18
+; BE-NEON-NEXT: vmov s16, r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.f32 s0, s16
+; BE-NEON-NEXT: mov r4, r1
+; BE-NEON-NEXT: vmov.32 d9[0], r0
+; BE-NEON-NEXT: vmov.32 d15[1], r5
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vldr d16, [sp, #160] @ 8-byte Reload
+; BE-NEON-NEXT: vldr d20, [sp, #136] @ 8-byte Reload
+; BE-NEON-NEXT: vrev64.32 d19, d14
+; BE-NEON-NEXT: vrev64.32 d31, d16
+; BE-NEON-NEXT: vldr d16, [sp, #168] @ 8-byte Reload
+; BE-NEON-NEXT: vrev64.32 d18, d20
+; BE-NEON-NEXT: vldr d20, [sp, #120] @ 8-byte Reload
+; BE-NEON-NEXT: vldr d22, [sp, #96] @ 8-byte Reload
+; BE-NEON-NEXT: vmov.32 d28[0], r0
+; BE-NEON-NEXT: ldr r0, [sp, #76] @ 4-byte Reload
+; BE-NEON-NEXT: vrev64.32 d21, d20
+; BE-NEON-NEXT: vrev64.32 d30, d16
+; BE-NEON-NEXT: vldr d16, [sp, #48] @ 8-byte Reload
+; BE-NEON-NEXT: vldr d23, [sp, #64] @ 8-byte Reload
+; BE-NEON-NEXT: vrev64.32 d20, d22
+; BE-NEON-NEXT: vldr d22, [sp, #112] @ 8-byte Reload
+; BE-NEON-NEXT: vrev64.32 d1, d16
+; BE-NEON-NEXT: vldr d16, [sp, #80] @ 8-byte Reload
+; BE-NEON-NEXT: vmov.32 d23[1], r0
+; BE-NEON-NEXT: add r0, r10, #64
+; BE-NEON-NEXT: vrev64.32 d25, d22
+; BE-NEON-NEXT: vldr d22, [sp, #104] @ 8-byte Reload
+; BE-NEON-NEXT: vmov.32 d9[1], r4
+; BE-NEON-NEXT: vrev64.32 d0, d16
+; BE-NEON-NEXT: vmov.32 d28[1], r1
+; BE-NEON-NEXT: vldr d29, [sp, #56] @ 8-byte Reload
+; BE-NEON-NEXT: vrev64.32 d3, d15
+; BE-NEON-NEXT: vrev64.32 d24, d22
+; BE-NEON-NEXT: vldr d22, [sp, #88] @ 8-byte Reload
+; BE-NEON-NEXT: vmov.32 d10[1], r6
+; BE-NEON-NEXT: vrev64.32 d5, d23
+; BE-NEON-NEXT: vst1.64 {d0, d1}, [r11:128]!
+; BE-NEON-NEXT: vrev64.32 d2, d9
+; BE-NEON-NEXT: vrev64.32 d27, d22
+; BE-NEON-NEXT: vmov.32 d29[1], r8
+; BE-NEON-NEXT: vrev64.32 d4, d28
+; BE-NEON-NEXT: vst1.64 {d30, d31}, [r11:128]
+; BE-NEON-NEXT: vst1.64 {d2, d3}, [r0:128]!
+; BE-NEON-NEXT: vmov.32 d12[1], r9
+; BE-NEON-NEXT: vrev64.32 d26, d10
+; BE-NEON-NEXT: vst1.64 {d4, d5}, [r0:128]!
+; BE-NEON-NEXT: vrev64.32 d23, d29
+; BE-NEON-NEXT: vst1.64 {d26, d27}, [r0:128]!
+; BE-NEON-NEXT: vrev64.32 d22, d12
+; BE-NEON-NEXT: vst1.64 {d24, d25}, [r0:128]
+; BE-NEON-NEXT: vst1.64 {d20, d21}, [r10:128]!
+; BE-NEON-NEXT: vst1.64 {d22, d23}, [r10:128]!
+; BE-NEON-NEXT: vrev64.32 d17, d11
+; BE-NEON-NEXT: vrev64.32 d16, d13
+; BE-NEON-NEXT: vst1.64 {d18, d19}, [r10:128]!
+; BE-NEON-NEXT: vst1.64 {d16, d17}, [r10:128]
+; BE-NEON-NEXT: add sp, sp, #176
+; BE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT: add sp, sp, #4
+; BE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+ %a = call <32 x i64> @llvm.llrint.v32i64.v32f16(<32 x half> %x)
+ ret <32 x i64> %a
+}
+declare <32 x i64> @llvm.llrint.v32i64.v32f16(<32 x half>)
+
+define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) {
+; LE-LABEL: llrint_v1i64_v1f32:
+; LE: @ %bb.0:
+; LE-NEXT: .save {r11, lr}
+; LE-NEXT: push {r11, lr}
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.32 d0[0], r0
+; LE-NEXT: vmov.32 d0[1], r1
+; LE-NEXT: pop {r11, pc}
+;
+; LE-NEON-LABEL: llrint_v1i64_v1f32:
+; LE-NEON: @ %bb.0:
+; LE-NEON-NEXT: .save {r11, lr}
+; LE-NEON-NEXT: push {r11, lr}
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.32 d0[0], r0
+; LE-NEON-NEXT: vmov.32 d0[1], r1
+; LE-NEON-NEXT: pop {r11, pc}
+;
+; BE-LABEL: llrint_v1i64_v1f32:
+; BE: @ %bb.0:
+; BE-NEXT: .save {r11, lr}
+; BE-NEXT: push {r11, lr}
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.32 d16[0], r0
+; BE-NEXT: vmov.32 d16[1], r1
+; BE-NEXT: vrev64.32 d0, d16
+; BE-NEXT: pop {r11, pc}
+;
+; BE-NEON-LABEL: llrint_v1i64_v1f32:
+; BE-NEON: @ %bb.0:
+; BE-NEON-NEXT: .save {r11, lr}
+; BE-NEON-NEXT: push {r11, lr}
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.32 d16[0], r0
+; BE-NEON-NEXT: vmov.32 d16[1], r1
+; BE-NEON-NEXT: vrev64.32 d0, d16
+; BE-NEON-NEXT: pop {r11, pc}
+ %a = call <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float> %x)
+ ret <1 x i64> %a
+}
+declare <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float>)
+
+define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) {
+; LE-LABEL: llrint_v2i64_v2f32:
+; LE: @ %bb.0:
+; LE-NEXT: .save {r4, lr}
+; LE-NEXT: push {r4, lr}
+; LE-NEXT: .vsave {d10, d11}
+; LE-NEXT: vpush {d10, d11}
+; LE-NEXT: .vsave {d8}
+; LE-NEXT: vpush {d8}
+; LE-NEXT: vmov.f64 d8, d0
+; LE-NEXT: vmov.f32 s0, s17
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.f32 s0, s16
+; LE-NEXT: mov r4, r1
+; LE-NEXT: vmov.32 d11[0], r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.32 d10[0], r0
+; LE-NEXT: vmov.32 d11[1], r4
+; LE-NEXT: vmov.32 d10[1], r1
+; LE-NEXT: vorr q0, q5, q5
+; LE-NEXT: vpop {d8}
+; LE-NEXT: vpop {d10, d11}
+; LE-NEXT: pop {r4, pc}
+;
+; LE-NEON-LABEL: llrint_v2i64_v2f32:
+; LE-NEON: @ %bb.0:
+; LE-NEON-NEXT: .save {r4, lr}
+; LE-NEON-NEXT: push {r4, lr}
+; LE-NEON-NEXT: .vsave {d10, d11}
+; LE-NEON-NEXT: vpush {d10, d11}
+; LE-NEON-NEXT: .vsave {d8}
+; LE-NEON-NEXT: vpush {d8}
+; LE-NEON-NEXT: vmov.f64 d8, d0
+; LE-NEON-NEXT: vmov.f32 s0, s17
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.f32 s0, s16
+; LE-NEON-NEXT: mov r4, r1
+; LE-NEON-NEXT: vmov.32 d11[0], r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.32 d10[0], r0
+; LE-NEON-NEXT: vmov.32 d11[1], r4
+; LE-NEON-NEXT: vmov.32 d10[1], r1
+; LE-NEON-NEXT: vorr q0, q5, q5
+; LE-NEON-NEXT: vpop {d8}
+; LE-NEON-NEXT: vpop {d10, d11}
+; LE-NEON-NEXT: pop {r4, pc}
+;
+; BE-LABEL: llrint_v2i64_v2f32:
+; BE: @ %bb.0:
+; BE-NEXT: .save {r4, lr}
+; BE-NEXT: push {r4, lr}
+; BE-NEXT: .vsave {d10, d11}
+; BE-NEXT: vpush {d10, d11}
+; BE-NEXT: .vsave {d8}
+; BE-NEXT: vpush {d8}
+; BE-NEXT: vrev64.32 d8, d0
+; BE-NEXT: vmov.f32 s0, s17
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.f32 s0, s16
+; BE-NEXT: mov r4, r1
+; BE-NEXT: vmov.32 d11[0], r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.32 d10[0], r0
+; BE-NEXT: vmov.32 d11[1], r4
+; BE-NEXT: vmov.32 d10[1], r1
+; BE-NEXT: vrev64.32 q0, q5
+; BE-NEXT: vpop {d8}
+; BE-NEXT: vpop {d10, d11}
+; BE-NEXT: pop {r4, pc}
+;
+; BE-NEON-LABEL: llrint_v2i64_v2f32:
+; BE-NEON: @ %bb.0:
+; BE-NEON-NEXT: .save {r4, lr}
+; BE-NEON-NEXT: push {r4, lr}
+; BE-NEON-NEXT: .vsave {d10, d11}
+; BE-NEON-NEXT: vpush {d10, d11}
+; BE-NEON-NEXT: .vsave {d8}
+; BE-NEON-NEXT: vpush {d8}
+; BE-NEON-NEXT: vrev64.32 d8, d0
+; BE-NEON-NEXT: vmov.f32 s0, s17
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.f32 s0, s16
+; BE-NEON-NEXT: mov r4, r1
+; BE-NEON-NEXT: vmov.32 d11[0], r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.32 d10[0], r0
+; BE-NEON-NEXT: vmov.32 d11[1], r4
+; BE-NEON-NEXT: vmov.32 d10[1], r1
+; BE-NEON-NEXT: vrev64.32 q0, q5
+; BE-NEON-NEXT: vpop {d8}
+; BE-NEON-NEXT: vpop {d10, d11}
+; BE-NEON-NEXT: pop {r4, pc}
+ %a = call <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float> %x)
+ ret <2 x i64> %a
+}
+declare <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float>)
+
+define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) {
+; LE-LABEL: llrint_v4i64_v4f32:
+; LE: @ %bb.0:
+; LE-NEXT: .save {r4, r5, r6, lr}
+; LE-NEXT: push {r4, r5, r6, lr}
+; LE-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
+; LE-NEXT: vpush {d8, d9, d10, d11, d12, d13}
+; LE-NEXT: vorr q5, q0, q0
+; LE-NEXT: vmov.f32 s0, s23
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.f32 s0, s20
+; LE-NEXT: mov r4, r1
+; LE-NEXT: vmov.32 d9[0], r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.f32 s0, s21
+; LE-NEXT: mov r5, r1
+; LE-NEXT: vmov.32 d12[0], r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.f32 s0, s22
+; LE-NEXT: mov r6, r1
+; LE-NEXT: vmov.32 d13[0], r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.32 d8[0], r0
+; LE-NEXT: vmov.32 d13[1], r6
+; LE-NEXT: vmov.32 d9[1], r4
+; LE-NEXT: vmov.32 d12[1], r5
+; LE-NEXT: vmov.32 d8[1], r1
+; LE-NEXT: vorr q0, q6, q6
+; LE-NEXT: vorr q1, q4, q4
+; LE-NEXT: vpop {d8, d9, d10, d11, d12, d13}
+; LE-NEXT: pop {r4, r5, r6, pc}
+;
+; LE-NEON-LABEL: llrint_v4i64_v4f32:
+; LE-NEON: @ %bb.0:
+; LE-NEON-NEXT: .save {r4, r5, r6, lr}
+; LE-NEON-NEXT: push {r4, r5, r6, lr}
+; LE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
+; LE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13}
+; LE-NEON-NEXT: vorr q5, q0, q0
+; LE-NEON-NEXT: vmov.f32 s0, s23
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.f32 s0, s20
+; LE-NEON-NEXT: mov r4, r1
+; LE-NEON-NEXT: vmov.32 d9[0], r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.f32 s0, s21
+; LE-NEON-NEXT: mov r5, r1
+; LE-NEON-NEXT: vmov.32 d12[0], r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.f32 s0, s22
+; LE-NEON-NEXT: mov r6, r1
+; LE-NEON-NEXT: vmov.32 d13[0], r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.32 d8[0], r0
+; LE-NEON-NEXT: vmov.32 d13[1], r6
+; LE-NEON-NEXT: vmov.32 d9[1], r4
+; LE-NEON-NEXT: vmov.32 d12[1], r5
+; LE-NEON-NEXT: vmov.32 d8[1], r1
+; LE-NEON-NEXT: vorr q0, q6, q6
+; LE-NEON-NEXT: vorr q1, q4, q4
+; LE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13}
+; LE-NEON-NEXT: pop {r4, r5, r6, pc}
+;
+; BE-LABEL: llrint_v4i64_v4f32:
+; BE: @ %bb.0:
+; BE-NEXT: .save {r4, r5, r6, lr}
+; BE-NEXT: push {r4, r5, r6, lr}
+; BE-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
+; BE-NEXT: vpush {d8, d9, d10, d11, d12, d13}
+; BE-NEXT: vrev64.32 d8, d1
+; BE-NEXT: vrev64.32 d9, d0
+; BE-NEXT: vmov.f32 s0, s17
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.f32 s0, s18
+; BE-NEXT: mov r4, r1
+; BE-NEXT: vmov.32 d11[0], r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.f32 s0, s19
+; BE-NEXT: mov r5, r1
+; BE-NEXT: vmov.32 d12[0], r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.f32 s0, s16
+; BE-NEXT: mov r6, r1
+; BE-NEXT: vmov.32 d13[0], r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.32 d10[0], r0
+; BE-NEXT: vmov.32 d13[1], r6
+; BE-NEXT: vmov.32 d11[1], r4
+; BE-NEXT: vmov.32 d12[1], r5
+; BE-NEXT: vmov.32 d10[1], r1
+; BE-NEXT: vrev64.32 q0, q6
+; BE-NEXT: vrev64.32 q1, q5
+; BE-NEXT: vpop {d8, d9, d10, d11, d12, d13}
+; BE-NEXT: pop {r4, r5, r6, pc}
+;
+; BE-NEON-LABEL: llrint_v4i64_v4f32:
+; BE-NEON: @ %bb.0:
+; BE-NEON-NEXT: .save {r4, r5, r6, lr}
+; BE-NEON-NEXT: push {r4, r5, r6, lr}
+; BE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
+; BE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13}
+; BE-NEON-NEXT: vrev64.32 d8, d1
+; BE-NEON-NEXT: vrev64.32 d9, d0
+; BE-NEON-NEXT: vmov.f32 s0, s17
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.f32 s0, s18
+; BE-NEON-NEXT: mov r4, r1
+; BE-NEON-NEXT: vmov.32 d11[0], r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.f32 s0, s19
+; BE-NEON-NEXT: mov r5, r1
+; BE-NEON-NEXT: vmov.32 d12[0], r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.f32 s0, s16
+; BE-NEON-NEXT: mov r6, r1
+; BE-NEON-NEXT: vmov.32 d13[0], r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.32 d10[0], r0
+; BE-NEON-NEXT: vmov.32 d13[1], r6
+; BE-NEON-NEXT: vmov.32 d11[1], r4
+; BE-NEON-NEXT: vmov.32 d12[1], r5
+; BE-NEON-NEXT: vmov.32 d10[1], r1
+; BE-NEON-NEXT: vrev64.32 q0, q6
+; BE-NEON-NEXT: vrev64.32 q1, q5
+; BE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13}
+; BE-NEON-NEXT: pop {r4, r5, r6, pc}
+ %a = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> %x)
+ ret <4 x i64> %a
+}
+declare <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float>)
+
+define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) {
+; LE-LABEL: llrint_v8i64_v8f32:
+; LE: @ %bb.0:
+; LE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT: .pad #40
+; LE-NEXT: sub sp, sp, #40
+; LE-NEXT: vorr q6, q1, q1
+; LE-NEXT: add lr, sp, #24
+; LE-NEXT: vorr q7, q0, q0
+; LE-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill
+; LE-NEXT: vmov.f32 s0, s27
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.f32 s0, s24
+; LE-NEXT: mov r8, r1
+; LE-NEXT: vmov.32 d9[0], r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.f32 s0, s25
+; LE-NEXT: mov r9, r1
+; LE-NEXT: vmov.32 d10[0], r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vorr q6, q7, q7
+; LE-NEXT: add lr, sp, #8
+; LE-NEXT: mov r10, r1
+; LE-NEXT: vmov.32 d11[0], r0
+; LE-NEXT: vmov.f32 s0, s26
+; LE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.f32 s0, s27
+; LE-NEXT: mov r7, r1
+; LE-NEXT: vmov.32 d14[0], r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.f32 s0, s24
+; LE-NEXT: mov r4, r1
+; LE-NEXT: vmov.32 d15[0], r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: add lr, sp, #8
+; LE-NEXT: mov r5, r1
+; LE-NEXT: vmov.32 d12[0], r0
+; LE-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-NEXT: vmov.f32 s0, s1
+; LE-NEXT: bl llrintf
+; LE-NEXT: add lr, sp, #24
+; LE-NEXT: mov r6, r1
+; LE-NEXT: vmov.32 d13[0], r0
+; LE-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-NEXT: vmov.f32 s0, s2
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.32 d8[0], r0
+; LE-NEXT: vmov.32 d13[1], r6
+; LE-NEXT: vmov.32 d15[1], r4
+; LE-NEXT: vmov.32 d11[1], r10
+; LE-NEXT: vmov.32 d9[1], r8
+; LE-NEXT: vmov.32 d12[1], r5
+; LE-NEXT: vmov.32 d14[1], r7
+; LE-NEXT: vorr q0, q6, q6
+; LE-NEXT: vmov.32 d10[1], r9
+; LE-NEXT: vorr q1, q7, q7
+; LE-NEXT: vmov.32 d8[1], r1
+; LE-NEXT: vorr q2, q5, q5
+; LE-NEXT: vorr q3, q4, q4
+; LE-NEXT: add sp, sp, #40
+; LE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; LE-NEON-LABEL: llrint_v8i64_v8f32:
+; LE-NEON: @ %bb.0:
+; LE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT: .pad #40
+; LE-NEON-NEXT: sub sp, sp, #40
+; LE-NEON-NEXT: vorr q6, q1, q1
+; LE-NEON-NEXT: add lr, sp, #24
+; LE-NEON-NEXT: vorr q7, q0, q0
+; LE-NEON-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill
+; LE-NEON-NEXT: vmov.f32 s0, s27
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.f32 s0, s24
+; LE-NEON-NEXT: mov r8, r1
+; LE-NEON-NEXT: vmov.32 d9[0], r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.f32 s0, s25
+; LE-NEON-NEXT: mov r9, r1
+; LE-NEON-NEXT: vmov.32 d10[0], r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vorr q6, q7, q7
+; LE-NEON-NEXT: add lr, sp, #8
+; LE-NEON-NEXT: mov r10, r1
+; LE-NEON-NEXT: vmov.32 d11[0], r0
+; LE-NEON-NEXT: vmov.f32 s0, s26
+; LE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.f32 s0, s27
+; LE-NEON-NEXT: mov r7, r1
+; LE-NEON-NEXT: vmov.32 d14[0], r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.f32 s0, s24
+; LE-NEON-NEXT: mov r4, r1
+; LE-NEON-NEXT: vmov.32 d15[0], r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: add lr, sp, #8
+; LE-NEON-NEXT: mov r5, r1
+; LE-NEON-NEXT: vmov.32 d12[0], r0
+; LE-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-NEON-NEXT: vmov.f32 s0, s1
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: add lr, sp, #24
+; LE-NEON-NEXT: mov r6, r1
+; LE-NEON-NEXT: vmov.32 d13[0], r0
+; LE-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-NEON-NEXT: vmov.f32 s0, s2
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.32 d8[0], r0
+; LE-NEON-NEXT: vmov.32 d13[1], r6
+; LE-NEON-NEXT: vmov.32 d15[1], r4
+; LE-NEON-NEXT: vmov.32 d11[1], r10
+; LE-NEON-NEXT: vmov.32 d9[1], r8
+; LE-NEON-NEXT: vmov.32 d12[1], r5
+; LE-NEON-NEXT: vmov.32 d14[1], r7
+; LE-NEON-NEXT: vorr q0, q6, q6
+; LE-NEON-NEXT: vmov.32 d10[1], r9
+; LE-NEON-NEXT: vorr q1, q7, q7
+; LE-NEON-NEXT: vmov.32 d8[1], r1
+; LE-NEON-NEXT: vorr q2, q5, q5
+; LE-NEON-NEXT: vorr q3, q4, q4
+; LE-NEON-NEXT: add sp, sp, #40
+; LE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; BE-LABEL: llrint_v8i64_v8f32:
+; BE: @ %bb.0:
+; BE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT: .pad #32
+; BE-NEXT: sub sp, sp, #32
+; BE-NEXT: vorr q4, q1, q1
+; BE-NEXT: add lr, sp, #8
+; BE-NEXT: vorr q5, q0, q0
+; BE-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; BE-NEXT: vrev64.32 d12, d8
+; BE-NEXT: vmov.f32 s0, s25
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.f32 s0, s24
+; BE-NEXT: mov r8, r1
+; BE-NEXT: vmov.32 d15[0], r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: vrev64.32 d0, d11
+; BE-NEXT: mov r9, r1
+; BE-NEXT: vrev64.32 d8, d9
+; BE-NEXT: vorr d9, d0, d0
+; BE-NEXT: vmov.32 d14[0], r0
+; BE-NEXT: vstr d8, [sp, #24] @ 8-byte Spill
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.f32 s0, s17
+; BE-NEXT: mov r10, r1
+; BE-NEXT: vmov.32 d10[0], r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: add lr, sp, #8
+; BE-NEXT: vmov.f32 s0, s19
+; BE-NEXT: mov r7, r1
+; BE-NEXT: vmov.32 d13[0], r0
+; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT: vrev64.32 d8, d16
+; BE-NEXT: vstr d8, [sp, #8] @ 8-byte Spill
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.f32 s0, s16
+; BE-NEXT: mov r4, r1
+; BE-NEXT: vmov.32 d11[0], r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: vldr d0, [sp, #8] @ 8-byte Reload
+; BE-NEXT: mov r5, r1
+; BE-NEXT: vmov.32 d8[0], r0
+; BE-NEXT: vmov.f32 s0, s1
+; BE-NEXT: bl llrintf
+; BE-NEXT: vldr d0, [sp, #24] @ 8-byte Reload
+; BE-NEXT: mov r6, r1
+; BE-NEXT: @ kill: def $s0 killed $s0 killed $d0
+; BE-NEXT: vmov.32 d9[0], r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.32 d12[0], r0
+; BE-NEXT: vmov.32 d9[1], r6
+; BE-NEXT: vmov.32 d11[1], r4
+; BE-NEXT: vmov.32 d15[1], r8
+; BE-NEXT: vmov.32 d13[1], r7
+; BE-NEXT: vmov.32 d8[1], r5
+; BE-NEXT: vmov.32 d10[1], r10
+; BE-NEXT: vmov.32 d14[1], r9
+; BE-NEXT: vmov.32 d12[1], r1
+; BE-NEXT: vrev64.32 q0, q4
+; BE-NEXT: vrev64.32 q1, q5
+; BE-NEXT: vrev64.32 q2, q7
+; BE-NEXT: vrev64.32 q3, q6
+; BE-NEXT: add sp, sp, #32
+; BE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; BE-NEON-LABEL: llrint_v8i64_v8f32:
+; BE-NEON: @ %bb.0:
+; BE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT: .pad #32
+; BE-NEON-NEXT: sub sp, sp, #32
+; BE-NEON-NEXT: vorr q4, q1, q1
+; BE-NEON-NEXT: add lr, sp, #8
+; BE-NEON-NEXT: vorr q5, q0, q0
+; BE-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; BE-NEON-NEXT: vrev64.32 d12, d8
+; BE-NEON-NEXT: vmov.f32 s0, s25
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.f32 s0, s24
+; BE-NEON-NEXT: mov r8, r1
+; BE-NEON-NEXT: vmov.32 d15[0], r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vrev64.32 d0, d11
+; BE-NEON-NEXT: mov r9, r1
+; BE-NEON-NEXT: vrev64.32 d8, d9
+; BE-NEON-NEXT: vorr d9, d0, d0
+; BE-NEON-NEXT: vmov.32 d14[0], r0
+; BE-NEON-NEXT: vstr d8, [sp, #24] @ 8-byte Spill
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.f32 s0, s17
+; BE-NEON-NEXT: mov r10, r1
+; BE-NEON-NEXT: vmov.32 d10[0], r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: add lr, sp, #8
+; BE-NEON-NEXT: vmov.f32 s0, s19
+; BE-NEON-NEXT: mov r7, r1
+; BE-NEON-NEXT: vmov.32 d13[0], r0
+; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT: vrev64.32 d8, d16
+; BE-NEON-NEXT: vstr d8, [sp, #8] @ 8-byte Spill
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.f32 s0, s16
+; BE-NEON-NEXT: mov r4, r1
+; BE-NEON-NEXT: vmov.32 d11[0], r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vldr d0, [sp, #8] @ 8-byte Reload
+; BE-NEON-NEXT: mov r5, r1
+; BE-NEON-NEXT: vmov.32 d8[0], r0
+; BE-NEON-NEXT: vmov.f32 s0, s1
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vldr d0, [sp, #24] @ 8-byte Reload
+; BE-NEON-NEXT: mov r6, r1
+; BE-NEON-NEXT: @ kill: def $s0 killed $s0 killed $d0
+; BE-NEON-NEXT: vmov.32 d9[0], r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.32 d12[0], r0
+; BE-NEON-NEXT: vmov.32 d9[1], r6
+; BE-NEON-NEXT: vmov.32 d11[1], r4
+; BE-NEON-NEXT: vmov.32 d15[1], r8
+; BE-NEON-NEXT: vmov.32 d13[1], r7
+; BE-NEON-NEXT: vmov.32 d8[1], r5
+; BE-NEON-NEXT: vmov.32 d10[1], r10
+; BE-NEON-NEXT: vmov.32 d14[1], r9
+; BE-NEON-NEXT: vmov.32 d12[1], r1
+; BE-NEON-NEXT: vrev64.32 q0, q4
+; BE-NEON-NEXT: vrev64.32 q1, q5
+; BE-NEON-NEXT: vrev64.32 q2, q7
+; BE-NEON-NEXT: vrev64.32 q3, q6
+; BE-NEON-NEXT: add sp, sp, #32
+; BE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+ %a = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> %x)
+ ret <8 x i64> %a
+}
+declare <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float>)
+
+define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) {
+; LE-LABEL: llrint_v16i64_v16f32:
+; LE: @ %bb.0:
+; LE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEXT: .pad #4
+; LE-NEXT: sub sp, sp, #4
+; LE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT: .pad #160
+; LE-NEXT: sub sp, sp, #160
+; LE-NEXT: add lr, sp, #112
+; LE-NEXT: vorr q5, q3, q3
+; LE-NEXT: vorr q6, q0, q0
+; LE-NEXT: mov r4, r0
+; LE-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #48
+; LE-NEXT: vorr q7, q1, q1
+; LE-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-NEXT: vmov.f32 s0, s23
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.f32 s0, s24
+; LE-NEXT: add lr, sp, #144
+; LE-NEXT: vmov.32 d17[0], r0
+; LE-NEXT: str r1, [sp, #108] @ 4-byte Spill
+; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.f32 s0, s25
+; LE-NEXT: str r1, [sp, #84] @ 4-byte Spill
+; LE-NEXT: vmov.32 d8[0], r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.f32 s0, s28
+; LE-NEXT: add lr, sp, #128
+; LE-NEXT: vmov.32 d9[0], r0
+; LE-NEXT: str r1, [sp, #44] @ 4-byte Spill
+; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.f32 s0, s29
+; LE-NEXT: mov r9, r1
+; LE-NEXT: vmov.32 d8[0], r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.f32 s0, s30
+; LE-NEXT: mov r6, r1
+; LE-NEXT: vmov.32 d9[0], r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.f32 s0, s31
+; LE-NEXT: mov r5, r1
+; LE-NEXT: vmov.32 d12[0], r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: add lr, sp, #112
+; LE-NEXT: mov r7, r1
+; LE-NEXT: vmov.32 d13[0], r0
+; LE-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload
+; LE-NEXT: vmov.f32 s0, s29
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.f32 s0, s22
+; LE-NEXT: add lr, sp, #24
+; LE-NEXT: vmov.32 d17[0], r0
+; LE-NEXT: mov r11, r1
+; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT: vmov.32 d13[1], r7
+; LE-NEXT: bl llrintf
+; LE-NEXT: add lr, sp, #144
+; LE-NEXT: vmov.f32 s0, s21
+; LE-NEXT: vmov.32 d12[1], r5
+; LE-NEXT: str r1, [sp, #40] @ 4-byte Spill
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: vmov.32 d16[0], r0
+; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #88
+; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.f32 s0, s20
+; LE-NEXT: mov r10, r1
+; LE-NEXT: vmov.32 d13[0], r0
+; LE-NEXT: vmov.32 d9[1], r6
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.f32 s0, s31
+; LE-NEXT: vmov.32 d12[0], r0
+; LE-NEXT: add lr, sp, #8
+; LE-NEXT: mov r8, r1
+; LE-NEXT: vmov.32 d8[1], r9
+; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #64
+; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEXT: bl llrintf
+; LE-NEXT: add lr, sp, #128
+; LE-NEXT: vmov.32 d9[0], r0
+; LE-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
+; LE-NEXT: mov r9, r1
+; LE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #48
+; LE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-NEXT: vmov.f32 s0, s27
+; LE-NEXT: vmov.32 d11[1], r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.f32 s0, s26
+; LE-NEXT: vmov.32 d15[0], r0
+; LE-NEXT: ldr r0, [sp, #84] @ 4-byte Reload
+; LE-NEXT: add lr, sp, #128
+; LE-NEXT: mov r7, r1
+; LE-NEXT: vmov.32 d10[1], r0
+; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.32 d14[0], r0
+; LE-NEXT: add lr, sp, #144
+; LE-NEXT: ldr r0, [sp, #108] @ 4-byte Reload
+; LE-NEXT: mov r5, r1
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: vmov.32 d17[1], r0
+; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #112
+; LE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEXT: vmov.f32 s0, s20
+; LE-NEXT: bl llrintf
+; LE-NEXT: add lr, sp, #24
+; LE-NEXT: vmov.f32 s0, s22
+; LE-NEXT: mov r6, r1
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: vmov.32 d16[0], r0
+; LE-NEXT: vmov.32 d17[1], r11
+; LE-NEXT: vorr q6, q8, q8
+; LE-NEXT: bl llrintf
+; LE-NEXT: add lr, sp, #144
+; LE-NEXT: vmov.32 d8[0], r0
+; LE-NEXT: ldr r0, [sp, #40] @ 4-byte Reload
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #8
+; LE-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #128
+; LE-NEXT: vmov.32 d9[1], r9
+; LE-NEXT: vmov.32 d12[1], r6
+; LE-NEXT: vmov.32 d19[1], r10
+; LE-NEXT: vmov.32 d8[1], r1
+; LE-NEXT: vmov.32 d16[1], r0
+; LE-NEXT: add r0, r4, #64
+; LE-NEXT: vmov.32 d18[1], r8
+; LE-NEXT: vst1.64 {d12, d13}, [r0:128]!
+; LE-NEXT: vst1.64 {d8, d9}, [r0:128]!
+; LE-NEXT: vst1.64 {d18, d19}, [r0:128]!
+; LE-NEXT: vst1.64 {d16, d17}, [r0:128]
+; LE-NEXT: vmov.32 d15[1], r7
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #64
+; LE-NEXT: vmov.32 d14[1], r5
+; LE-NEXT: vst1.64 {d16, d17}, [r4:128]!
+; LE-NEXT: vst1.64 {d14, d15}, [r4:128]!
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #88
+; LE-NEXT: vst1.64 {d16, d17}, [r4:128]!
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: vst1.64 {d16, d17}, [r4:128]
+; LE-NEXT: add sp, sp, #160
+; LE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT: add sp, sp, #4
+; LE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; LE-NEON-LABEL: llrint_v16i64_v16f32:
+; LE-NEON: @ %bb.0:
+; LE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEON-NEXT: .pad #4
+; LE-NEON-NEXT: sub sp, sp, #4
+; LE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT: .pad #160
+; LE-NEON-NEXT: sub sp, sp, #160
+; LE-NEON-NEXT: add lr, sp, #112
+; LE-NEON-NEXT: vorr q5, q3, q3
+; LE-NEON-NEXT: vorr q6, q0, q0
+; LE-NEON-NEXT: mov r4, r0
+; LE-NEON-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #48
+; LE-NEON-NEXT: vorr q7, q1, q1
+; LE-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-NEON-NEXT: vmov.f32 s0, s23
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.f32 s0, s24
+; LE-NEON-NEXT: add lr, sp, #144
+; LE-NEON-NEXT: vmov.32 d17[0], r0
+; LE-NEON-NEXT: str r1, [sp, #108] @ 4-byte Spill
+; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.f32 s0, s25
+; LE-NEON-NEXT: str r1, [sp, #84] @ 4-byte Spill
+; LE-NEON-NEXT: vmov.32 d8[0], r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.f32 s0, s28
+; LE-NEON-NEXT: add lr, sp, #128
+; LE-NEON-NEXT: vmov.32 d9[0], r0
+; LE-NEON-NEXT: str r1, [sp, #44] @ 4-byte Spill
+; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.f32 s0, s29
+; LE-NEON-NEXT: mov r9, r1
+; LE-NEON-NEXT: vmov.32 d8[0], r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.f32 s0, s30
+; LE-NEON-NEXT: mov r6, r1
+; LE-NEON-NEXT: vmov.32 d9[0], r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.f32 s0, s31
+; LE-NEON-NEXT: mov r5, r1
+; LE-NEON-NEXT: vmov.32 d12[0], r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: add lr, sp, #112
+; LE-NEON-NEXT: mov r7, r1
+; LE-NEON-NEXT: vmov.32 d13[0], r0
+; LE-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload
+; LE-NEON-NEXT: vmov.f32 s0, s29
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.f32 s0, s22
+; LE-NEON-NEXT: add lr, sp, #24
+; LE-NEON-NEXT: vmov.32 d17[0], r0
+; LE-NEON-NEXT: mov r11, r1
+; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT: vmov.32 d13[1], r7
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: add lr, sp, #144
+; LE-NEON-NEXT: vmov.f32 s0, s21
+; LE-NEON-NEXT: vmov.32 d12[1], r5
+; LE-NEON-NEXT: str r1, [sp, #40] @ 4-byte Spill
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: vmov.32 d16[0], r0
+; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #88
+; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.f32 s0, s20
+; LE-NEON-NEXT: mov r10, r1
+; LE-NEON-NEXT: vmov.32 d13[0], r0
+; LE-NEON-NEXT: vmov.32 d9[1], r6
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.f32 s0, s31
+; LE-NEON-NEXT: vmov.32 d12[0], r0
+; LE-NEON-NEXT: add lr, sp, #8
+; LE-NEON-NEXT: mov r8, r1
+; LE-NEON-NEXT: vmov.32 d8[1], r9
+; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #64
+; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: add lr, sp, #128
+; LE-NEON-NEXT: vmov.32 d9[0], r0
+; LE-NEON-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
+; LE-NEON-NEXT: mov r9, r1
+; LE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #48
+; LE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-NEON-NEXT: vmov.f32 s0, s27
+; LE-NEON-NEXT: vmov.32 d11[1], r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.f32 s0, s26
+; LE-NEON-NEXT: vmov.32 d15[0], r0
+; LE-NEON-NEXT: ldr r0, [sp, #84] @ 4-byte Reload
+; LE-NEON-NEXT: add lr, sp, #128
+; LE-NEON-NEXT: mov r7, r1
+; LE-NEON-NEXT: vmov.32 d10[1], r0
+; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.32 d14[0], r0
+; LE-NEON-NEXT: add lr, sp, #144
+; LE-NEON-NEXT: ldr r0, [sp, #108] @ 4-byte Reload
+; LE-NEON-NEXT: mov r5, r1
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: vmov.32 d17[1], r0
+; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #112
+; LE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEON-NEXT: vmov.f32 s0, s20
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: add lr, sp, #24
+; LE-NEON-NEXT: vmov.f32 s0, s22
+; LE-NEON-NEXT: mov r6, r1
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: vmov.32 d16[0], r0
+; LE-NEON-NEXT: vmov.32 d17[1], r11
+; LE-NEON-NEXT: vorr q6, q8, q8
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: add lr, sp, #144
+; LE-NEON-NEXT: vmov.32 d8[0], r0
+; LE-NEON-NEXT: ldr r0, [sp, #40] @ 4-byte Reload
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #8
+; LE-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #128
+; LE-NEON-NEXT: vmov.32 d9[1], r9
+; LE-NEON-NEXT: vmov.32 d12[1], r6
+; LE-NEON-NEXT: vmov.32 d19[1], r10
+; LE-NEON-NEXT: vmov.32 d8[1], r1
+; LE-NEON-NEXT: vmov.32 d16[1], r0
+; LE-NEON-NEXT: add r0, r4, #64
+; LE-NEON-NEXT: vmov.32 d18[1], r8
+; LE-NEON-NEXT: vst1.64 {d12, d13}, [r0:128]!
+; LE-NEON-NEXT: vst1.64 {d8, d9}, [r0:128]!
+; LE-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]!
+; LE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]
+; LE-NEON-NEXT: vmov.32 d15[1], r7
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #64
+; LE-NEON-NEXT: vmov.32 d14[1], r5
+; LE-NEON-NEXT: vst1.64 {d16, d17}, [r4:128]!
+; LE-NEON-NEXT: vst1.64 {d14, d15}, [r4:128]!
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #88
+; LE-NEON-NEXT: vst1.64 {d16, d17}, [r4:128]!
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: vst1.64 {d16, d17}, [r4:128]
+; LE-NEON-NEXT: add sp, sp, #160
+; LE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT: add sp, sp, #4
+; LE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-LABEL: llrint_v16i64_v16f32:
+; BE: @ %bb.0:
+; BE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEXT: .pad #4
+; BE-NEXT: sub sp, sp, #4
+; BE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT: .pad #144
+; BE-NEXT: sub sp, sp, #144
+; BE-NEXT: vorr q6, q3, q3
+; BE-NEXT: add lr, sp, #112
+; BE-NEXT: vorr q7, q0, q0
+; BE-NEXT: mov r4, r0
+; BE-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill
+; BE-NEXT: add lr, sp, #96
+; BE-NEXT: vrev64.32 d8, d13
+; BE-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill
+; BE-NEXT: vmov.f32 s0, s17
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.f32 s0, s16
+; BE-NEXT: str r1, [sp, #88] @ 4-byte Spill
+; BE-NEXT: vmov.32 d11[0], r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: vrev64.32 d8, d14
+; BE-NEXT: add lr, sp, #128
+; BE-NEXT: vmov.32 d10[0], r0
+; BE-NEXT: str r1, [sp, #92] @ 4-byte Spill
+; BE-NEXT: vmov.f32 s0, s16
+; BE-NEXT: vrev64.32 d9, d12
+; BE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEXT: vstr d9, [sp, #64] @ 8-byte Spill
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.f32 s0, s19
+; BE-NEXT: mov r9, r1
+; BE-NEXT: vmov.32 d12[0], r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.f32 s0, s17
+; BE-NEXT: str r1, [sp, #84] @ 4-byte Spill
+; BE-NEXT: vmov.32 d11[0], r0
+; BE-NEXT: vrev64.32 d9, d15
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.f32 s0, s18
+; BE-NEXT: mov r6, r1
+; BE-NEXT: vmov.32 d13[0], r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.f32 s0, s19
+; BE-NEXT: mov r5, r1
+; BE-NEXT: vmov.32 d14[0], r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: vldr d0, [sp, #64] @ 8-byte Reload
+; BE-NEXT: mov r7, r1
+; BE-NEXT: @ kill: def $s0 killed $s0 killed $d0
+; BE-NEXT: vmov.32 d15[0], r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.32 d10[0], r0
+; BE-NEXT: add lr, sp, #40
+; BE-NEXT: str r1, [sp, #60] @ 4-byte Spill
+; BE-NEXT: vmov.32 d15[1], r7
+; BE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEXT: add lr, sp, #96
+; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT: vrev64.32 d8, d16
+; BE-NEXT: vmov.f32 s0, s17
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.f32 s0, s16
+; BE-NEXT: vmov.32 d14[1], r5
+; BE-NEXT: add lr, sp, #64
+; BE-NEXT: mov r10, r1
+; BE-NEXT: vmov.32 d11[0], r0
+; BE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.32 d10[0], r0
+; BE-NEXT: add lr, sp, #24
+; BE-NEXT: mov r11, r1
+; BE-NEXT: vmov.32 d13[1], r6
+; BE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEXT: add lr, sp, #96
+; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT: vrev64.32 d8, d17
+; BE-NEXT: vmov.f32 s0, s17
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.f32 s0, s16
+; BE-NEXT: vmov.32 d12[1], r9
+; BE-NEXT: add lr, sp, #96
+; BE-NEXT: mov r8, r1
+; BE-NEXT: vmov.32 d11[0], r0
+; BE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.32 d10[0], r0
+; BE-NEXT: add lr, sp, #8
+; BE-NEXT: ldr r0, [sp, #88] @ 4-byte Reload
+; BE-NEXT: mov r9, r1
+; BE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEXT: add lr, sp, #112
+; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT: add lr, sp, #128
+; BE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEXT: vrev64.32 d8, d16
+; BE-NEXT: vmov.32 d11[1], r0
+; BE-NEXT: vmov.f32 s0, s17
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.f32 s0, s16
+; BE-NEXT: vmov.32 d15[0], r0
+; BE-NEXT: ldr r0, [sp, #92] @ 4-byte Reload
+; BE-NEXT: add lr, sp, #128
+; BE-NEXT: mov r7, r1
+; BE-NEXT: vmov.32 d10[1], r0
+; BE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEXT: bl llrintf
+; BE-NEXT: add lr, sp, #112
+; BE-NEXT: vmov.32 d14[0], r0
+; BE-NEXT: ldr r0, [sp, #84] @ 4-byte Reload
+; BE-NEXT: mov r5, r1
+; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT: add lr, sp, #40
+; BE-NEXT: vrev64.32 d8, d17
+; BE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-NEXT: vmov.f32 s0, s17
+; BE-NEXT: vmov.32 d13[1], r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.f32 s0, s16
+; BE-NEXT: vmov.32 d11[0], r0
+; BE-NEXT: ldr r0, [sp, #60] @ 4-byte Reload
+; BE-NEXT: mov r6, r1
+; BE-NEXT: vmov.32 d12[1], r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: add lr, sp, #24
+; BE-NEXT: vmov.32 d10[0], r0
+; BE-NEXT: add r0, r4, #64
+; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT: add lr, sp, #8
+; BE-NEXT: vmov.32 d17[1], r10
+; BE-NEXT: vmov.32 d16[1], r11
+; BE-NEXT: vorr q12, q8, q8
+; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT: add lr, sp, #128
+; BE-NEXT: vmov.32 d15[1], r7
+; BE-NEXT: vmov.32 d11[1], r6
+; BE-NEXT: vmov.32 d14[1], r5
+; BE-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-NEXT: add lr, sp, #96
+; BE-NEXT: vmov.32 d10[1], r1
+; BE-NEXT: vmov.32 d17[1], r8
+; BE-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload
+; BE-NEXT: add lr, sp, #64
+; BE-NEXT: vmov.32 d16[1], r9
+; BE-NEXT: vrev64.32 q14, q7
+; BE-NEXT: vorr q13, q8, q8
+; BE-NEXT: vrev64.32 q15, q5
+; BE-NEXT: vldmia lr, {d22, d23} @ 16-byte Reload
+; BE-NEXT: vrev64.32 q8, q6
+; BE-NEXT: vst1.64 {d28, d29}, [r0:128]!
+; BE-NEXT: vst1.64 {d30, d31}, [r0:128]!
+; BE-NEXT: vrev64.32 q9, q9
+; BE-NEXT: vrev64.32 q10, q10
+; BE-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-NEXT: vrev64.32 q11, q11
+; BE-NEXT: vrev64.32 q12, q12
+; BE-NEXT: vst1.64 {d18, d19}, [r0:128]
+; BE-NEXT: vst1.64 {d20, d21}, [r4:128]!
+; BE-NEXT: vst1.64 {d22, d23}, [r4:128]!
+; BE-NEXT: vrev64.32 q13, q13
+; BE-NEXT: vst1.64 {d24, d25}, [r4:128]!
+; BE-NEXT: vst1.64 {d26, d27}, [r4:128]
+; BE-NEXT: add sp, sp, #144
+; BE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT: add sp, sp, #4
+; BE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-NEON-LABEL: llrint_v16i64_v16f32:
+; BE-NEON: @ %bb.0:
+; BE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEON-NEXT: .pad #4
+; BE-NEON-NEXT: sub sp, sp, #4
+; BE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT: .pad #144
+; BE-NEON-NEXT: sub sp, sp, #144
+; BE-NEON-NEXT: vorr q6, q3, q3
+; BE-NEON-NEXT: add lr, sp, #112
+; BE-NEON-NEXT: vorr q7, q0, q0
+; BE-NEON-NEXT: mov r4, r0
+; BE-NEON-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill
+; BE-NEON-NEXT: add lr, sp, #96
+; BE-NEON-NEXT: vrev64.32 d8, d13
+; BE-NEON-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill
+; BE-NEON-NEXT: vmov.f32 s0, s17
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.f32 s0, s16
+; BE-NEON-NEXT: str r1, [sp, #88] @ 4-byte Spill
+; BE-NEON-NEXT: vmov.32 d11[0], r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vrev64.32 d8, d14
+; BE-NEON-NEXT: add lr, sp, #128
+; BE-NEON-NEXT: vmov.32 d10[0], r0
+; BE-NEON-NEXT: str r1, [sp, #92] @ 4-byte Spill
+; BE-NEON-NEXT: vmov.f32 s0, s16
+; BE-NEON-NEXT: vrev64.32 d9, d12
+; BE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEON-NEXT: vstr d9, [sp, #64] @ 8-byte Spill
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.f32 s0, s19
+; BE-NEON-NEXT: mov r9, r1
+; BE-NEON-NEXT: vmov.32 d12[0], r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.f32 s0, s17
+; BE-NEON-NEXT: str r1, [sp, #84] @ 4-byte Spill
+; BE-NEON-NEXT: vmov.32 d11[0], r0
+; BE-NEON-NEXT: vrev64.32 d9, d15
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.f32 s0, s18
+; BE-NEON-NEXT: mov r6, r1
+; BE-NEON-NEXT: vmov.32 d13[0], r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.f32 s0, s19
+; BE-NEON-NEXT: mov r5, r1
+; BE-NEON-NEXT: vmov.32 d14[0], r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vldr d0, [sp, #64] @ 8-byte Reload
+; BE-NEON-NEXT: mov r7, r1
+; BE-NEON-NEXT: @ kill: def $s0 killed $s0 killed $d0
+; BE-NEON-NEXT: vmov.32 d15[0], r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.32 d10[0], r0
+; BE-NEON-NEXT: add lr, sp, #40
+; BE-NEON-NEXT: str r1, [sp, #60] @ 4-byte Spill
+; BE-NEON-NEXT: vmov.32 d15[1], r7
+; BE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEON-NEXT: add lr, sp, #96
+; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT: vrev64.32 d8, d16
+; BE-NEON-NEXT: vmov.f32 s0, s17
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.f32 s0, s16
+; BE-NEON-NEXT: vmov.32 d14[1], r5
+; BE-NEON-NEXT: add lr, sp, #64
+; BE-NEON-NEXT: mov r10, r1
+; BE-NEON-NEXT: vmov.32 d11[0], r0
+; BE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.32 d10[0], r0
+; BE-NEON-NEXT: add lr, sp, #24
+; BE-NEON-NEXT: mov r11, r1
+; BE-NEON-NEXT: vmov.32 d13[1], r6
+; BE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEON-NEXT: add lr, sp, #96
+; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT: vrev64.32 d8, d17
+; BE-NEON-NEXT: vmov.f32 s0, s17
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.f32 s0, s16
+; BE-NEON-NEXT: vmov.32 d12[1], r9
+; BE-NEON-NEXT: add lr, sp, #96
+; BE-NEON-NEXT: mov r8, r1
+; BE-NEON-NEXT: vmov.32 d11[0], r0
+; BE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.32 d10[0], r0
+; BE-NEON-NEXT: add lr, sp, #8
+; BE-NEON-NEXT: ldr r0, [sp, #88] @ 4-byte Reload
+; BE-NEON-NEXT: mov r9, r1
+; BE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEON-NEXT: add lr, sp, #112
+; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT: add lr, sp, #128
+; BE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEON-NEXT: vrev64.32 d8, d16
+; BE-NEON-NEXT: vmov.32 d11[1], r0
+; BE-NEON-NEXT: vmov.f32 s0, s17
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.f32 s0, s16
+; BE-NEON-NEXT: vmov.32 d15[0], r0
+; BE-NEON-NEXT: ldr r0, [sp, #92] @ 4-byte Reload
+; BE-NEON-NEXT: add lr, sp, #128
+; BE-NEON-NEXT: mov r7, r1
+; BE-NEON-NEXT: vmov.32 d10[1], r0
+; BE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: add lr, sp, #112
+; BE-NEON-NEXT: vmov.32 d14[0], r0
+; BE-NEON-NEXT: ldr r0, [sp, #84] @ 4-byte Reload
+; BE-NEON-NEXT: mov r5, r1
+; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT: add lr, sp, #40
+; BE-NEON-NEXT: vrev64.32 d8, d17
+; BE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-NEON-NEXT: vmov.f32 s0, s17
+; BE-NEON-NEXT: vmov.32 d13[1], r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.f32 s0, s16
+; BE-NEON-NEXT: vmov.32 d11[0], r0
+; BE-NEON-NEXT: ldr r0, [sp, #60] @ 4-byte Reload
+; BE-NEON-NEXT: mov r6, r1
+; BE-NEON-NEXT: vmov.32 d12[1], r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: add lr, sp, #24
+; BE-NEON-NEXT: vmov.32 d10[0], r0
+; BE-NEON-NEXT: add r0, r4, #64
+; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT: add lr, sp, #8
+; BE-NEON-NEXT: vmov.32 d17[1], r10
+; BE-NEON-NEXT: vmov.32 d16[1], r11
+; BE-NEON-NEXT: vorr q12, q8, q8
+; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT: add lr, sp, #128
+; BE-NEON-NEXT: vmov.32 d15[1], r7
+; BE-NEON-NEXT: vmov.32 d11[1], r6
+; BE-NEON-NEXT: vmov.32 d14[1], r5
+; BE-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-NEON-NEXT: add lr, sp, #96
+; BE-NEON-NEXT: vmov.32 d10[1], r1
+; BE-NEON-NEXT: vmov.32 d17[1], r8
+; BE-NEON-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload
+; BE-NEON-NEXT: add lr, sp, #64
+; BE-NEON-NEXT: vmov.32 d16[1], r9
+; BE-NEON-NEXT: vrev64.32 q14, q7
+; BE-NEON-NEXT: vorr q13, q8, q8
+; BE-NEON-NEXT: vrev64.32 q15, q5
+; BE-NEON-NEXT: vldmia lr, {d22, d23} @ 16-byte Reload
+; BE-NEON-NEXT: vrev64.32 q8, q6
+; BE-NEON-NEXT: vst1.64 {d28, d29}, [r0:128]!
+; BE-NEON-NEXT: vst1.64 {d30, d31}, [r0:128]!
+; BE-NEON-NEXT: vrev64.32 q9, q9
+; BE-NEON-NEXT: vrev64.32 q10, q10
+; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-NEON-NEXT: vrev64.32 q11, q11
+; BE-NEON-NEXT: vrev64.32 q12, q12
+; BE-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]
+; BE-NEON-NEXT: vst1.64 {d20, d21}, [r4:128]!
+; BE-NEON-NEXT: vst1.64 {d22, d23}, [r4:128]!
+; BE-NEON-NEXT: vrev64.32 q13, q13
+; BE-NEON-NEXT: vst1.64 {d24, d25}, [r4:128]!
+; BE-NEON-NEXT: vst1.64 {d26, d27}, [r4:128]
+; BE-NEON-NEXT: add sp, sp, #144
+; BE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT: add sp, sp, #4
+; BE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+ %a = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> %x)
+ ret <16 x i64> %a
+}
+declare <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float>)
+
+define <32 x i64> @llrint_v32i64_v32f32(<32 x float> %x) {
+; LE-LABEL: llrint_v32i64_v32f32:
+; LE: @ %bb.0:
+; LE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEXT: .pad #4
+; LE-NEXT: sub sp, sp, #4
+; LE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT: .pad #184
+; LE-NEXT: sub sp, sp, #184
+; LE-NEXT: add lr, sp, #152
+; LE-NEXT: vorr q7, q3, q3
+; LE-NEXT: vorr q4, q2, q2
+; LE-NEXT: mov r5, r0
+; LE-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #88
+; LE-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-NEXT: vmov.f32 s0, s3
+; LE-NEXT: str r0, [sp, #68] @ 4-byte Spill
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.f32 s0, s18
+; LE-NEXT: add lr, sp, #168
+; LE-NEXT: vmov.32 d17[0], r0
+; LE-NEXT: str r1, [sp, #16] @ 4-byte Spill
+; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.f32 s0, s16
+; LE-NEXT: mov r8, r1
+; LE-NEXT: vmov.32 d10[0], r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.f32 s0, s17
+; LE-NEXT: mov r6, r1
+; LE-NEXT: vmov.32 d12[0], r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.f32 s0, s19
+; LE-NEXT: mov r4, r1
+; LE-NEXT: vmov.32 d13[0], r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.f32 s0, s31
+; LE-NEXT: mov r7, r1
+; LE-NEXT: vmov.32 d11[0], r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.f32 s0, s30
+; LE-NEXT: str r1, [sp, #8] @ 4-byte Spill
+; LE-NEXT: vmov.32 d9[0], r0
+; LE-NEXT: vmov.32 d11[1], r7
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.f32 s0, s29
+; LE-NEXT: vmov.32 d8[0], r0
+; LE-NEXT: add lr, sp, #104
+; LE-NEXT: str r1, [sp, #12] @ 4-byte Spill
+; LE-NEXT: vmov.32 d13[1], r4
+; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.32 d9[0], r0
+; LE-NEXT: add r0, sp, #320
+; LE-NEXT: add lr, sp, #120
+; LE-NEXT: mov r11, r1
+; LE-NEXT: vld1.64 {d0, d1}, [r0]
+; LE-NEXT: add r0, sp, #304
+; LE-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #72
+; LE-NEXT: vld1.64 {d0, d1}, [r0]
+; LE-NEXT: add r0, sp, #336
+; LE-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #32
+; LE-NEXT: vld1.64 {d0, d1}, [r0]
+; LE-NEXT: add r0, sp, #288
+; LE-NEXT: vmov.32 d12[1], r6
+; LE-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #48
+; LE-NEXT: vld1.64 {d0, d1}, [r0]
+; LE-NEXT: vmov.32 d10[1], r8
+; LE-NEXT: add r8, r5, #64
+; LE-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #152
+; LE-NEXT: vst1.64 {d12, d13}, [r8:128]!
+; LE-NEXT: vst1.64 {d10, d11}, [r8:128]!
+; LE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-NEXT: vmov.f32 s0, s27
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.f32 s0, s28
+; LE-NEXT: mov r4, r1
+; LE-NEXT: vmov.32 d11[0], r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.f32 s0, s26
+; LE-NEXT: mov r9, r1
+; LE-NEXT: vmov.32 d8[0], r0
+; LE-NEXT: vmov.32 d11[1], r4
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.32 d10[0], r0
+; LE-NEXT: add lr, sp, #136
+; LE-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
+; LE-NEXT: mov r10, r1
+; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #168
+; LE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #88
+; LE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-NEXT: vmov.f32 s0, s26
+; LE-NEXT: vmov.32 d11[1], r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.f32 s0, s25
+; LE-NEXT: vmov.32 d10[0], r0
+; LE-NEXT: add lr, sp, #168
+; LE-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
+; LE-NEXT: mov r7, r1
+; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #104
+; LE-NEXT: vorr q5, q6, q6
+; LE-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload
+; LE-NEXT: vmov.32 d15[1], r0
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.f32 s0, s20
+; LE-NEXT: vmov.32 d13[0], r0
+; LE-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
+; LE-NEXT: add lr, sp, #104
+; LE-NEXT: mov r4, r1
+; LE-NEXT: vmov.32 d14[1], r0
+; LE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.32 d12[0], r0
+; LE-NEXT: add lr, sp, #152
+; LE-NEXT: mov r6, r1
+; LE-NEXT: vorr q7, q6, q6
+; LE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-NEXT: vmov.32 d9[1], r11
+; LE-NEXT: vmov.f32 s0, s25
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.f32 s0, s24
+; LE-NEXT: mov r5, r1
+; LE-NEXT: vmov.32 d11[0], r0
+; LE-NEXT: vmov.32 d8[1], r9
+; LE-NEXT: bl llrintf
+; LE-NEXT: add lr, sp, #136
+; LE-NEXT: mov r11, r1
+; LE-NEXT: vmov.32 d10[0], r0
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: vmov.32 d16[1], r10
+; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #120
+; LE-NEXT: vst1.64 {d8, d9}, [r8:128]!
+; LE-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-NEXT: vmov.f32 s0, s1
+; LE-NEXT: bl llrintf
+; LE-NEXT: add lr, sp, #152
+; LE-NEXT: vmov.32 d17[0], r0
+; LE-NEXT: mov r10, r1
+; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #104
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #72
+; LE-NEXT: vst1.64 {d16, d17}, [r8:128]
+; LE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEXT: vmov.f32 s0, s19
+; LE-NEXT: bl llrintf
+; LE-NEXT: add lr, sp, #168
+; LE-NEXT: vmov.f32 s0, s18
+; LE-NEXT: mov r8, r1
+; LE-NEXT: vmov.32 d13[0], r0
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: vmov.32 d16[1], r7
+; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.f32 s0, s17
+; LE-NEXT: vmov.32 d12[0], r0
+; LE-NEXT: add lr, sp, #104
+; LE-NEXT: mov r7, r1
+; LE-NEXT: vmov.32 d15[1], r4
+; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.f32 s0, s16
+; LE-NEXT: mov r9, r1
+; LE-NEXT: vmov.32 d13[0], r0
+; LE-NEXT: vmov.32 d14[1], r6
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.32 d12[0], r0
+; LE-NEXT: add lr, sp, #88
+; LE-NEXT: mov r6, r1
+; LE-NEXT: vmov.32 d11[1], r5
+; LE-NEXT: vmov.32 d10[1], r11
+; LE-NEXT: ldr r11, [sp, #68] @ 4-byte Reload
+; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #16
+; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #32
+; LE-NEXT: vst1.64 {d14, d15}, [r11:128]!
+; LE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEXT: vmov.f32 s0, s23
+; LE-NEXT: bl llrintf
+; LE-NEXT: add lr, sp, #152
+; LE-NEXT: mov r4, r1
+; LE-NEXT: vmov.32 d9[0], r0
+; LE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #120
+; LE-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-NEXT: @ kill: def $s0 killed $s0 killed $q0
+; LE-NEXT: vmov.32 d13[1], r10
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.f32 s0, s22
+; LE-NEXT: vmov.32 d12[0], r0
+; LE-NEXT: add lr, sp, #152
+; LE-NEXT: mov r5, r1
+; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #104
+; LE-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload
+; LE-NEXT: vmov.32 d15[1], r8
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.f32 s0, s21
+; LE-NEXT: vmov.32 d8[0], r0
+; LE-NEXT: add lr, sp, #72
+; LE-NEXT: mov r8, r1
+; LE-NEXT: vmov.32 d14[1], r7
+; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #104
+; LE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.f32 s0, s20
+; LE-NEXT: add lr, sp, #88
+; LE-NEXT: mov r7, r1
+; LE-NEXT: vmov.32 d9[0], r0
+; LE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-NEXT: vmov.32 d13[1], r9
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.32 d8[0], r0
+; LE-NEXT: add lr, sp, #32
+; LE-NEXT: mov r9, r1
+; LE-NEXT: vmov.32 d12[1], r6
+; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #88
+; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #120
+; LE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEXT: vmov.f32 s0, s19
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.f32 s0, s18
+; LE-NEXT: add lr, sp, #72
+; LE-NEXT: mov r10, r1
+; LE-NEXT: vmov.32 d15[0], r0
+; LE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-NEXT: vmov.32 d13[1], r4
+; LE-NEXT: bl llrintf
+; LE-NEXT: add lr, sp, #152
+; LE-NEXT: mov r4, r1
+; LE-NEXT: vmov.32 d14[0], r0
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: vmov.32 d16[1], r5
+; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #168
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #48
+; LE-NEXT: vst1.64 {d16, d17}, [r11:128]!
+; LE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEXT: vmov.f32 s0, s21
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.f32 s0, s20
+; LE-NEXT: vmov.32 d12[1], r8
+; LE-NEXT: add lr, sp, #72
+; LE-NEXT: mov r5, r1
+; LE-NEXT: vmov.32 d9[0], r0
+; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEXT: bl llrintf
+; LE-NEXT: vmov.f32 s0, s23
+; LE-NEXT: add lr, sp, #32
+; LE-NEXT: mov r6, r1
+; LE-NEXT: vmov.32 d8[0], r0
+; LE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-NEXT: vmov.32 d13[1], r7
+; LE-NEXT: bl llrintf
+; LE-NEXT: add lr, sp, #48
+; LE-NEXT: mov r7, r1
+; LE-NEXT: vmov.32 d11[0], r0
+; LE-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-NEXT: vmov.f32 s0, s2
+; LE-NEXT: vmov.32 d12[1], r9
+; LE-NEXT: bl llrintf
+; LE-NEXT: add lr, sp, #16
+; LE-NEXT: vmov.32 d10[0], r0
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #136
+; LE-NEXT: vmov.32 d11[1], r7
+; LE-NEXT: vst1.64 {d16, d17}, [r11:128]!
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #152
+; LE-NEXT: vmov.32 d15[1], r10
+; LE-NEXT: vst1.64 {d16, d17}, [r11:128]
+; LE-NEXT: vmov.32 d10[1], r1
+; LE-NEXT: ldr r1, [sp, #68] @ 4-byte Reload
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: add r0, r1, #192
+; LE-NEXT: add lr, sp, #72
+; LE-NEXT: vmov.32 d14[1], r4
+; LE-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; LE-NEXT: vst1.64 {d14, d15}, [r0:128]!
+; LE-NEXT: vst1.64 {d12, d13}, [r0:128]!
+; LE-NEXT: vmov.32 d9[1], r5
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #88
+; LE-NEXT: vmov.32 d8[1], r6
+; LE-NEXT: vst1.64 {d16, d17}, [r0:128]
+; LE-NEXT: add r0, r1, #128
+; LE-NEXT: vst1.64 {d8, d9}, [r0:128]!
+; LE-NEXT: vst1.64 {d10, d11}, [r0:128]!
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #104
+; LE-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: vst1.64 {d16, d17}, [r0:128]
+; LE-NEXT: add sp, sp, #184
+; LE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT: add sp, sp, #4
+; LE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; LE-NEON-LABEL: llrint_v32i64_v32f32:
+; LE-NEON: @ %bb.0:
+; LE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEON-NEXT: .pad #4
+; LE-NEON-NEXT: sub sp, sp, #4
+; LE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT: .pad #184
+; LE-NEON-NEXT: sub sp, sp, #184
+; LE-NEON-NEXT: add lr, sp, #152
+; LE-NEON-NEXT: vorr q7, q3, q3
+; LE-NEON-NEXT: vorr q4, q2, q2
+; LE-NEON-NEXT: mov r5, r0
+; LE-NEON-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #88
+; LE-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-NEON-NEXT: vmov.f32 s0, s3
+; LE-NEON-NEXT: str r0, [sp, #68] @ 4-byte Spill
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.f32 s0, s18
+; LE-NEON-NEXT: add lr, sp, #168
+; LE-NEON-NEXT: vmov.32 d17[0], r0
+; LE-NEON-NEXT: str r1, [sp, #16] @ 4-byte Spill
+; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.f32 s0, s16
+; LE-NEON-NEXT: mov r8, r1
+; LE-NEON-NEXT: vmov.32 d10[0], r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.f32 s0, s17
+; LE-NEON-NEXT: mov r6, r1
+; LE-NEON-NEXT: vmov.32 d12[0], r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.f32 s0, s19
+; LE-NEON-NEXT: mov r4, r1
+; LE-NEON-NEXT: vmov.32 d13[0], r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.f32 s0, s31
+; LE-NEON-NEXT: mov r7, r1
+; LE-NEON-NEXT: vmov.32 d11[0], r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.f32 s0, s30
+; LE-NEON-NEXT: str r1, [sp, #8] @ 4-byte Spill
+; LE-NEON-NEXT: vmov.32 d9[0], r0
+; LE-NEON-NEXT: vmov.32 d11[1], r7
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.f32 s0, s29
+; LE-NEON-NEXT: vmov.32 d8[0], r0
+; LE-NEON-NEXT: add lr, sp, #104
+; LE-NEON-NEXT: str r1, [sp, #12] @ 4-byte Spill
+; LE-NEON-NEXT: vmov.32 d13[1], r4
+; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.32 d9[0], r0
+; LE-NEON-NEXT: add r0, sp, #320
+; LE-NEON-NEXT: add lr, sp, #120
+; LE-NEON-NEXT: mov r11, r1
+; LE-NEON-NEXT: vld1.64 {d0, d1}, [r0]
+; LE-NEON-NEXT: add r0, sp, #304
+; LE-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #72
+; LE-NEON-NEXT: vld1.64 {d0, d1}, [r0]
+; LE-NEON-NEXT: add r0, sp, #336
+; LE-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #32
+; LE-NEON-NEXT: vld1.64 {d0, d1}, [r0]
+; LE-NEON-NEXT: add r0, sp, #288
+; LE-NEON-NEXT: vmov.32 d12[1], r6
+; LE-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #48
+; LE-NEON-NEXT: vld1.64 {d0, d1}, [r0]
+; LE-NEON-NEXT: vmov.32 d10[1], r8
+; LE-NEON-NEXT: add r8, r5, #64
+; LE-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #152
+; LE-NEON-NEXT: vst1.64 {d12, d13}, [r8:128]!
+; LE-NEON-NEXT: vst1.64 {d10, d11}, [r8:128]!
+; LE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-NEON-NEXT: vmov.f32 s0, s27
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.f32 s0, s28
+; LE-NEON-NEXT: mov r4, r1
+; LE-NEON-NEXT: vmov.32 d11[0], r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.f32 s0, s26
+; LE-NEON-NEXT: mov r9, r1
+; LE-NEON-NEXT: vmov.32 d8[0], r0
+; LE-NEON-NEXT: vmov.32 d11[1], r4
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.32 d10[0], r0
+; LE-NEON-NEXT: add lr, sp, #136
+; LE-NEON-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
+; LE-NEON-NEXT: mov r10, r1
+; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #168
+; LE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #88
+; LE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-NEON-NEXT: vmov.f32 s0, s26
+; LE-NEON-NEXT: vmov.32 d11[1], r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.f32 s0, s25
+; LE-NEON-NEXT: vmov.32 d10[0], r0
+; LE-NEON-NEXT: add lr, sp, #168
+; LE-NEON-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
+; LE-NEON-NEXT: mov r7, r1
+; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #104
+; LE-NEON-NEXT: vorr q5, q6, q6
+; LE-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload
+; LE-NEON-NEXT: vmov.32 d15[1], r0
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.f32 s0, s20
+; LE-NEON-NEXT: vmov.32 d13[0], r0
+; LE-NEON-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
+; LE-NEON-NEXT: add lr, sp, #104
+; LE-NEON-NEXT: mov r4, r1
+; LE-NEON-NEXT: vmov.32 d14[1], r0
+; LE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.32 d12[0], r0
+; LE-NEON-NEXT: add lr, sp, #152
+; LE-NEON-NEXT: mov r6, r1
+; LE-NEON-NEXT: vorr q7, q6, q6
+; LE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-NEON-NEXT: vmov.32 d9[1], r11
+; LE-NEON-NEXT: vmov.f32 s0, s25
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.f32 s0, s24
+; LE-NEON-NEXT: mov r5, r1
+; LE-NEON-NEXT: vmov.32 d11[0], r0
+; LE-NEON-NEXT: vmov.32 d8[1], r9
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: add lr, sp, #136
+; LE-NEON-NEXT: mov r11, r1
+; LE-NEON-NEXT: vmov.32 d10[0], r0
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: vmov.32 d16[1], r10
+; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #120
+; LE-NEON-NEXT: vst1.64 {d8, d9}, [r8:128]!
+; LE-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-NEON-NEXT: vmov.f32 s0, s1
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: add lr, sp, #152
+; LE-NEON-NEXT: vmov.32 d17[0], r0
+; LE-NEON-NEXT: mov r10, r1
+; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #104
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #72
+; LE-NEON-NEXT: vst1.64 {d16, d17}, [r8:128]
+; LE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEON-NEXT: vmov.f32 s0, s19
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: add lr, sp, #168
+; LE-NEON-NEXT: vmov.f32 s0, s18
+; LE-NEON-NEXT: mov r8, r1
+; LE-NEON-NEXT: vmov.32 d13[0], r0
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: vmov.32 d16[1], r7
+; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.f32 s0, s17
+; LE-NEON-NEXT: vmov.32 d12[0], r0
+; LE-NEON-NEXT: add lr, sp, #104
+; LE-NEON-NEXT: mov r7, r1
+; LE-NEON-NEXT: vmov.32 d15[1], r4
+; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.f32 s0, s16
+; LE-NEON-NEXT: mov r9, r1
+; LE-NEON-NEXT: vmov.32 d13[0], r0
+; LE-NEON-NEXT: vmov.32 d14[1], r6
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.32 d12[0], r0
+; LE-NEON-NEXT: add lr, sp, #88
+; LE-NEON-NEXT: mov r6, r1
+; LE-NEON-NEXT: vmov.32 d11[1], r5
+; LE-NEON-NEXT: vmov.32 d10[1], r11
+; LE-NEON-NEXT: ldr r11, [sp, #68] @ 4-byte Reload
+; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #16
+; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #32
+; LE-NEON-NEXT: vst1.64 {d14, d15}, [r11:128]!
+; LE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEON-NEXT: vmov.f32 s0, s23
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: add lr, sp, #152
+; LE-NEON-NEXT: mov r4, r1
+; LE-NEON-NEXT: vmov.32 d9[0], r0
+; LE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #120
+; LE-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-NEON-NEXT: @ kill: def $s0 killed $s0 killed $q0
+; LE-NEON-NEXT: vmov.32 d13[1], r10
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.f32 s0, s22
+; LE-NEON-NEXT: vmov.32 d12[0], r0
+; LE-NEON-NEXT: add lr, sp, #152
+; LE-NEON-NEXT: mov r5, r1
+; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #104
+; LE-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload
+; LE-NEON-NEXT: vmov.32 d15[1], r8
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.f32 s0, s21
+; LE-NEON-NEXT: vmov.32 d8[0], r0
+; LE-NEON-NEXT: add lr, sp, #72
+; LE-NEON-NEXT: mov r8, r1
+; LE-NEON-NEXT: vmov.32 d14[1], r7
+; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #104
+; LE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.f32 s0, s20
+; LE-NEON-NEXT: add lr, sp, #88
+; LE-NEON-NEXT: mov r7, r1
+; LE-NEON-NEXT: vmov.32 d9[0], r0
+; LE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-NEON-NEXT: vmov.32 d13[1], r9
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.32 d8[0], r0
+; LE-NEON-NEXT: add lr, sp, #32
+; LE-NEON-NEXT: mov r9, r1
+; LE-NEON-NEXT: vmov.32 d12[1], r6
+; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #88
+; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #120
+; LE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEON-NEXT: vmov.f32 s0, s19
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.f32 s0, s18
+; LE-NEON-NEXT: add lr, sp, #72
+; LE-NEON-NEXT: mov r10, r1
+; LE-NEON-NEXT: vmov.32 d15[0], r0
+; LE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-NEON-NEXT: vmov.32 d13[1], r4
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: add lr, sp, #152
+; LE-NEON-NEXT: mov r4, r1
+; LE-NEON-NEXT: vmov.32 d14[0], r0
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: vmov.32 d16[1], r5
+; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #168
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #48
+; LE-NEON-NEXT: vst1.64 {d16, d17}, [r11:128]!
+; LE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEON-NEXT: vmov.f32 s0, s21
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.f32 s0, s20
+; LE-NEON-NEXT: vmov.32 d12[1], r8
+; LE-NEON-NEXT: add lr, sp, #72
+; LE-NEON-NEXT: mov r5, r1
+; LE-NEON-NEXT: vmov.32 d9[0], r0
+; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: vmov.f32 s0, s23
+; LE-NEON-NEXT: add lr, sp, #32
+; LE-NEON-NEXT: mov r6, r1
+; LE-NEON-NEXT: vmov.32 d8[0], r0
+; LE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-NEON-NEXT: vmov.32 d13[1], r7
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: add lr, sp, #48
+; LE-NEON-NEXT: mov r7, r1
+; LE-NEON-NEXT: vmov.32 d11[0], r0
+; LE-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-NEON-NEXT: vmov.f32 s0, s2
+; LE-NEON-NEXT: vmov.32 d12[1], r9
+; LE-NEON-NEXT: bl llrintf
+; LE-NEON-NEXT: add lr, sp, #16
+; LE-NEON-NEXT: vmov.32 d10[0], r0
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #136
+; LE-NEON-NEXT: vmov.32 d11[1], r7
+; LE-NEON-NEXT: vst1.64 {d16, d17}, [r11:128]!
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #152
+; LE-NEON-NEXT: vmov.32 d15[1], r10
+; LE-NEON-NEXT: vst1.64 {d16, d17}, [r11:128]
+; LE-NEON-NEXT: vmov.32 d10[1], r1
+; LE-NEON-NEXT: ldr r1, [sp, #68] @ 4-byte Reload
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: add r0, r1, #192
+; LE-NEON-NEXT: add lr, sp, #72
+; LE-NEON-NEXT: vmov.32 d14[1], r4
+; LE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; LE-NEON-NEXT: vst1.64 {d14, d15}, [r0:128]!
+; LE-NEON-NEXT: vst1.64 {d12, d13}, [r0:128]!
+; LE-NEON-NEXT: vmov.32 d9[1], r5
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #88
+; LE-NEON-NEXT: vmov.32 d8[1], r6
+; LE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]
+; LE-NEON-NEXT: add r0, r1, #128
+; LE-NEON-NEXT: vst1.64 {d8, d9}, [r0:128]!
+; LE-NEON-NEXT: vst1.64 {d10, d11}, [r0:128]!
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #104
+; LE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]
+; LE-NEON-NEXT: add sp, sp, #184
+; LE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT: add sp, sp, #4
+; LE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-LABEL: llrint_v32i64_v32f32:
+; BE: @ %bb.0:
+; BE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEXT: .pad #4
+; BE-NEXT: sub sp, sp, #4
+; BE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT: .pad #256
+; BE-NEXT: sub sp, sp, #256
+; BE-NEXT: add lr, sp, #208
+; BE-NEXT: str r0, [sp, #156] @ 4-byte Spill
+; BE-NEXT: add r0, sp, #408
+; BE-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill
+; BE-NEXT: add lr, sp, #120
+; BE-NEXT: vld1.64 {d10, d11}, [r0]
+; BE-NEXT: add r0, sp, #392
+; BE-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill
+; BE-NEXT: add lr, sp, #160
+; BE-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill
+; BE-NEXT: add lr, sp, #176
+; BE-NEXT: vrev64.32 d8, d10
+; BE-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; BE-NEXT: add lr, sp, #136
+; BE-NEXT: vmov.f32 s0, s17
+; BE-NEXT: vld1.64 {d12, d13}, [r0]
+; BE-NEXT: add r0, sp, #360
+; BE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-NEXT: add lr, sp, #192
+; BE-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-NEXT: add r0, sp, #376
+; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT: add lr, sp, #40
+; BE-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.f32 s0, s16
+; BE-NEXT: str r1, [sp, #88] @ 4-byte Spill
+; BE-NEXT: vmov.32 d15[0], r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: vrev64.32 d9, d11
+; BE-NEXT: add lr, sp, #240
+; BE-NEXT: vmov.32 d14[0], r0
+; BE-NEXT: str r1, [sp, #104] @ 4-byte Spill
+; BE-NEXT: vmov.f32 s0, s18
+; BE-NEXT: vrev64.32 d8, d13
+; BE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.f32 s0, s17
+; BE-NEXT: mov r5, r1
+; BE-NEXT: vmov.32 d14[0], r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.f32 s0, s19
+; BE-NEXT: add lr, sp, #192
+; BE-NEXT: str r1, [sp, #72] @ 4-byte Spill
+; BE-NEXT: vmov.32 d13[0], r0
+; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT: vrev64.32 d10, d16
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.f32 s0, s20
+; BE-NEXT: add lr, sp, #224
+; BE-NEXT: vmov.32 d15[0], r0
+; BE-NEXT: mov r4, r1
+; BE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.f32 s0, s21
+; BE-NEXT: mov r7, r1
+; BE-NEXT: vmov.32 d14[0], r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.f32 s0, s16
+; BE-NEXT: mov r6, r1
+; BE-NEXT: vmov.32 d15[0], r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.32 d12[0], r0
+; BE-NEXT: add lr, sp, #24
+; BE-NEXT: mov r9, r1
+; BE-NEXT: vmov.32 d15[1], r6
+; BE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-NEXT: add lr, sp, #192
+; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT: vrev64.32 d8, d17
+; BE-NEXT: vmov.f32 s0, s17
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.f32 s0, s16
+; BE-NEXT: vmov.32 d14[1], r7
+; BE-NEXT: add lr, sp, #56
+; BE-NEXT: mov r10, r1
+; BE-NEXT: vmov.32 d11[0], r0
+; BE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.32 d10[0], r0
+; BE-NEXT: add lr, sp, #192
+; BE-NEXT: mov r11, r1
+; BE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEXT: add lr, sp, #40
+; BE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-NEXT: add lr, sp, #224
+; BE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEXT: vrev64.32 d8, d12
+; BE-NEXT: vmov.32 d11[1], r4
+; BE-NEXT: vmov.f32 s0, s17
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.f32 s0, s16
+; BE-NEXT: vmov.32 d10[1], r5
+; BE-NEXT: add lr, sp, #224
+; BE-NEXT: mov r8, r1
+; BE-NEXT: vmov.32 d15[0], r0
+; BE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEXT: bl llrintf
+; BE-NEXT: vrev64.32 d8, d13
+; BE-NEXT: add lr, sp, #8
+; BE-NEXT: vmov.32 d14[0], r0
+; BE-NEXT: ldr r0, [sp, #88] @ 4-byte Reload
+; BE-NEXT: vmov.f32 s0, s17
+; BE-NEXT: mov r5, r1
+; BE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-NEXT: add lr, sp, #240
+; BE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEXT: vmov.32 d11[1], r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.f32 s0, s16
+; BE-NEXT: vmov.32 d15[0], r0
+; BE-NEXT: ldr r0, [sp, #104] @ 4-byte Reload
+; BE-NEXT: add lr, sp, #240
+; BE-NEXT: mov r4, r1
+; BE-NEXT: vmov.32 d10[1], r0
+; BE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEXT: bl llrintf
+; BE-NEXT: add lr, sp, #136
+; BE-NEXT: vmov.32 d14[0], r0
+; BE-NEXT: ldr r0, [sp, #72] @ 4-byte Reload
+; BE-NEXT: mov r6, r1
+; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT: add lr, sp, #24
+; BE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-NEXT: vrev64.32 d8, d16
+; BE-NEXT: vmov.32 d13[1], r0
+; BE-NEXT: vmov.f32 s0, s17
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.f32 s0, s16
+; BE-NEXT: mov r7, r1
+; BE-NEXT: vmov.32 d11[0], r0
+; BE-NEXT: vmov.32 d12[1], r9
+; BE-NEXT: bl llrintf
+; BE-NEXT: add lr, sp, #192
+; BE-NEXT: vmov.32 d15[1], r4
+; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT: add lr, sp, #8
+; BE-NEXT: vmov.32 d17[1], r10
+; BE-NEXT: vmov.32 d16[1], r11
+; BE-NEXT: vorr q9, q8, q8
+; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT: add lr, sp, #192
+; BE-NEXT: vmov.32 d17[1], r8
+; BE-NEXT: vmov.32 d16[1], r5
+; BE-NEXT: vorr q10, q8, q8
+; BE-NEXT: vrev64.32 q8, q6
+; BE-NEXT: vmov.32 d14[1], r6
+; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT: add lr, sp, #240
+; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT: vmov.32 d10[0], r0
+; BE-NEXT: vrev64.32 q8, q8
+; BE-NEXT: vmov.32 d11[1], r7
+; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT: add lr, sp, #224
+; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT: vmov.32 d10[1], r1
+; BE-NEXT: vrev64.32 q8, q8
+; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT: add lr, sp, #56
+; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT: add lr, sp, #136
+; BE-NEXT: vrev64.32 q8, q8
+; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT: add lr, sp, #104
+; BE-NEXT: vrev64.32 q8, q9
+; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT: add lr, sp, #88
+; BE-NEXT: vrev64.32 q8, q10
+; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT: add lr, sp, #72
+; BE-NEXT: vrev64.32 q8, q7
+; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT: add lr, sp, #208
+; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT: add lr, sp, #56
+; BE-NEXT: vrev64.32 d8, d17
+; BE-NEXT: vrev64.32 q8, q5
+; BE-NEXT: vmov.f32 s0, s17
+; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.f32 s0, s16
+; BE-NEXT: mov r4, r1
+; BE-NEXT: vmov.32 d13[0], r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: add lr, sp, #120
+; BE-NEXT: vmov.32 d12[0], r0
+; BE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEXT: vmov.32 d13[1], r4
+; BE-NEXT: vrev64.32 d8, d10
+; BE-NEXT: vmov.32 d12[1], r1
+; BE-NEXT: vmov.f32 s0, s17
+; BE-NEXT: vrev64.32 q6, q6
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.f32 s0, s16
+; BE-NEXT: vmov.32 d15[0], r0
+; BE-NEXT: vmov.32 d15[1], r1
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.32 d14[0], r0
+; BE-NEXT: ldr r6, [sp, #156] @ 4-byte Reload
+; BE-NEXT: vrev64.32 d8, d11
+; BE-NEXT: add r5, r6, #64
+; BE-NEXT: vmov.32 d14[1], r1
+; BE-NEXT: vmov.f32 s0, s17
+; BE-NEXT: vrev64.32 q8, q7
+; BE-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.f32 s0, s16
+; BE-NEXT: vmov.32 d15[0], r0
+; BE-NEXT: vmov.32 d15[1], r1
+; BE-NEXT: bl llrintf
+; BE-NEXT: add lr, sp, #208
+; BE-NEXT: vmov.32 d14[0], r0
+; BE-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-NEXT: vmov.32 d14[1], r1
+; BE-NEXT: vrev64.32 d8, d18
+; BE-NEXT: vrev64.32 q8, q7
+; BE-NEXT: vmov.f32 s0, s17
+; BE-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.f32 s0, s16
+; BE-NEXT: mov r4, r1
+; BE-NEXT: vmov.32 d15[0], r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.32 d14[0], r0
+; BE-NEXT: add lr, sp, #160
+; BE-NEXT: vmov.32 d15[1], r4
+; BE-NEXT: vmov.32 d14[1], r1
+; BE-NEXT: vrev64.32 q8, q7
+; BE-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; BE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEXT: vrev64.32 d8, d11
+; BE-NEXT: vst1.64 {d12, d13}, [r5:128]
+; BE-NEXT: vmov.f32 s0, s17
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.f32 s0, s16
+; BE-NEXT: mov r4, r1
+; BE-NEXT: vmov.32 d13[0], r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.32 d12[0], r0
+; BE-NEXT: add lr, sp, #208
+; BE-NEXT: vmov.32 d13[1], r4
+; BE-NEXT: vmov.32 d12[1], r1
+; BE-NEXT: vrev64.32 q8, q6
+; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT: add lr, sp, #176
+; BE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-NEXT: vrev64.32 d8, d12
+; BE-NEXT: vmov.f32 s0, s17
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.f32 s0, s16
+; BE-NEXT: vmov.32 d15[0], r0
+; BE-NEXT: vmov.32 d15[1], r1
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.32 d14[0], r0
+; BE-NEXT: mov r5, r6
+; BE-NEXT: vrev64.32 d8, d13
+; BE-NEXT: vmov.32 d14[1], r1
+; BE-NEXT: vmov.f32 s0, s17
+; BE-NEXT: vrev64.32 q8, q7
+; BE-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.f32 s0, s16
+; BE-NEXT: vmov.32 d15[0], r0
+; BE-NEXT: vmov.32 d15[1], r1
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.32 d14[0], r0
+; BE-NEXT: vrev64.32 d8, d10
+; BE-NEXT: vmov.32 d14[1], r1
+; BE-NEXT: vmov.f32 s0, s17
+; BE-NEXT: vrev64.32 q8, q7
+; BE-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.f32 s0, s16
+; BE-NEXT: mov r4, r1
+; BE-NEXT: vmov.32 d15[0], r0
+; BE-NEXT: bl llrintf
+; BE-NEXT: vmov.32 d14[0], r0
+; BE-NEXT: add lr, sp, #208
+; BE-NEXT: add r0, r6, #192
+; BE-NEXT: vmov.32 d15[1], r4
+; BE-NEXT: vmov.32 d14[1], r1
+; BE-NEXT: vrev64.32 q8, q7
+; BE-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT: add lr, sp, #56
+; BE-NEXT: vst1.64 {d16, d17}, [r5:128]
+; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT: add lr, sp, #192
+; BE-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT: add lr, sp, #240
+; BE-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT: add lr, sp, #224
+; BE-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT: add lr, sp, #136
+; BE-NEXT: vst1.64 {d16, d17}, [r0:128]
+; BE-NEXT: add r0, r6, #128
+; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT: add lr, sp, #104
+; BE-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT: add lr, sp, #88
+; BE-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT: add lr, sp, #72
+; BE-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT: vst1.64 {d16, d17}, [r0:128]
+; BE-NEXT: add sp, sp, #256
+; BE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT: add sp, sp, #4
+; BE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-NEON-LABEL: llrint_v32i64_v32f32:
+; BE-NEON: @ %bb.0:
+; BE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEON-NEXT: .pad #4
+; BE-NEON-NEXT: sub sp, sp, #4
+; BE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT: .pad #256
+; BE-NEON-NEXT: sub sp, sp, #256
+; BE-NEON-NEXT: add lr, sp, #208
+; BE-NEON-NEXT: str r0, [sp, #156] @ 4-byte Spill
+; BE-NEON-NEXT: add r0, sp, #408
+; BE-NEON-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill
+; BE-NEON-NEXT: add lr, sp, #120
+; BE-NEON-NEXT: vld1.64 {d10, d11}, [r0]
+; BE-NEON-NEXT: add r0, sp, #392
+; BE-NEON-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill
+; BE-NEON-NEXT: add lr, sp, #160
+; BE-NEON-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill
+; BE-NEON-NEXT: add lr, sp, #176
+; BE-NEON-NEXT: vrev64.32 d8, d10
+; BE-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; BE-NEON-NEXT: add lr, sp, #136
+; BE-NEON-NEXT: vmov.f32 s0, s17
+; BE-NEON-NEXT: vld1.64 {d12, d13}, [r0]
+; BE-NEON-NEXT: add r0, sp, #360
+; BE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-NEON-NEXT: add lr, sp, #192
+; BE-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-NEON-NEXT: add r0, sp, #376
+; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT: add lr, sp, #40
+; BE-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.f32 s0, s16
+; BE-NEON-NEXT: str r1, [sp, #88] @ 4-byte Spill
+; BE-NEON-NEXT: vmov.32 d15[0], r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vrev64.32 d9, d11
+; BE-NEON-NEXT: add lr, sp, #240
+; BE-NEON-NEXT: vmov.32 d14[0], r0
+; BE-NEON-NEXT: str r1, [sp, #104] @ 4-byte Spill
+; BE-NEON-NEXT: vmov.f32 s0, s18
+; BE-NEON-NEXT: vrev64.32 d8, d13
+; BE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.f32 s0, s17
+; BE-NEON-NEXT: mov r5, r1
+; BE-NEON-NEXT: vmov.32 d14[0], r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.f32 s0, s19
+; BE-NEON-NEXT: add lr, sp, #192
+; BE-NEON-NEXT: str r1, [sp, #72] @ 4-byte Spill
+; BE-NEON-NEXT: vmov.32 d13[0], r0
+; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT: vrev64.32 d10, d16
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.f32 s0, s20
+; BE-NEON-NEXT: add lr, sp, #224
+; BE-NEON-NEXT: vmov.32 d15[0], r0
+; BE-NEON-NEXT: mov r4, r1
+; BE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.f32 s0, s21
+; BE-NEON-NEXT: mov r7, r1
+; BE-NEON-NEXT: vmov.32 d14[0], r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.f32 s0, s16
+; BE-NEON-NEXT: mov r6, r1
+; BE-NEON-NEXT: vmov.32 d15[0], r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.32 d12[0], r0
+; BE-NEON-NEXT: add lr, sp, #24
+; BE-NEON-NEXT: mov r9, r1
+; BE-NEON-NEXT: vmov.32 d15[1], r6
+; BE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-NEON-NEXT: add lr, sp, #192
+; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT: vrev64.32 d8, d17
+; BE-NEON-NEXT: vmov.f32 s0, s17
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.f32 s0, s16
+; BE-NEON-NEXT: vmov.32 d14[1], r7
+; BE-NEON-NEXT: add lr, sp, #56
+; BE-NEON-NEXT: mov r10, r1
+; BE-NEON-NEXT: vmov.32 d11[0], r0
+; BE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.32 d10[0], r0
+; BE-NEON-NEXT: add lr, sp, #192
+; BE-NEON-NEXT: mov r11, r1
+; BE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEON-NEXT: add lr, sp, #40
+; BE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-NEON-NEXT: add lr, sp, #224
+; BE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEON-NEXT: vrev64.32 d8, d12
+; BE-NEON-NEXT: vmov.32 d11[1], r4
+; BE-NEON-NEXT: vmov.f32 s0, s17
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.f32 s0, s16
+; BE-NEON-NEXT: vmov.32 d10[1], r5
+; BE-NEON-NEXT: add lr, sp, #224
+; BE-NEON-NEXT: mov r8, r1
+; BE-NEON-NEXT: vmov.32 d15[0], r0
+; BE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vrev64.32 d8, d13
+; BE-NEON-NEXT: add lr, sp, #8
+; BE-NEON-NEXT: vmov.32 d14[0], r0
+; BE-NEON-NEXT: ldr r0, [sp, #88] @ 4-byte Reload
+; BE-NEON-NEXT: vmov.f32 s0, s17
+; BE-NEON-NEXT: mov r5, r1
+; BE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-NEON-NEXT: add lr, sp, #240
+; BE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEON-NEXT: vmov.32 d11[1], r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.f32 s0, s16
+; BE-NEON-NEXT: vmov.32 d15[0], r0
+; BE-NEON-NEXT: ldr r0, [sp, #104] @ 4-byte Reload
+; BE-NEON-NEXT: add lr, sp, #240
+; BE-NEON-NEXT: mov r4, r1
+; BE-NEON-NEXT: vmov.32 d10[1], r0
+; BE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: add lr, sp, #136
+; BE-NEON-NEXT: vmov.32 d14[0], r0
+; BE-NEON-NEXT: ldr r0, [sp, #72] @ 4-byte Reload
+; BE-NEON-NEXT: mov r6, r1
+; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT: add lr, sp, #24
+; BE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-NEON-NEXT: vrev64.32 d8, d16
+; BE-NEON-NEXT: vmov.32 d13[1], r0
+; BE-NEON-NEXT: vmov.f32 s0, s17
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.f32 s0, s16
+; BE-NEON-NEXT: mov r7, r1
+; BE-NEON-NEXT: vmov.32 d11[0], r0
+; BE-NEON-NEXT: vmov.32 d12[1], r9
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: add lr, sp, #192
+; BE-NEON-NEXT: vmov.32 d15[1], r4
+; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT: add lr, sp, #8
+; BE-NEON-NEXT: vmov.32 d17[1], r10
+; BE-NEON-NEXT: vmov.32 d16[1], r11
+; BE-NEON-NEXT: vorr q9, q8, q8
+; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT: add lr, sp, #192
+; BE-NEON-NEXT: vmov.32 d17[1], r8
+; BE-NEON-NEXT: vmov.32 d16[1], r5
+; BE-NEON-NEXT: vorr q10, q8, q8
+; BE-NEON-NEXT: vrev64.32 q8, q6
+; BE-NEON-NEXT: vmov.32 d14[1], r6
+; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT: add lr, sp, #240
+; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT: vmov.32 d10[0], r0
+; BE-NEON-NEXT: vrev64.32 q8, q8
+; BE-NEON-NEXT: vmov.32 d11[1], r7
+; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT: add lr, sp, #224
+; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT: vmov.32 d10[1], r1
+; BE-NEON-NEXT: vrev64.32 q8, q8
+; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT: add lr, sp, #56
+; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT: add lr, sp, #136
+; BE-NEON-NEXT: vrev64.32 q8, q8
+; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT: add lr, sp, #104
+; BE-NEON-NEXT: vrev64.32 q8, q9
+; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT: add lr, sp, #88
+; BE-NEON-NEXT: vrev64.32 q8, q10
+; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT: add lr, sp, #72
+; BE-NEON-NEXT: vrev64.32 q8, q7
+; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT: add lr, sp, #208
+; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT: add lr, sp, #56
+; BE-NEON-NEXT: vrev64.32 d8, d17
+; BE-NEON-NEXT: vrev64.32 q8, q5
+; BE-NEON-NEXT: vmov.f32 s0, s17
+; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.f32 s0, s16
+; BE-NEON-NEXT: mov r4, r1
+; BE-NEON-NEXT: vmov.32 d13[0], r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: add lr, sp, #120
+; BE-NEON-NEXT: vmov.32 d12[0], r0
+; BE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEON-NEXT: vmov.32 d13[1], r4
+; BE-NEON-NEXT: vrev64.32 d8, d10
+; BE-NEON-NEXT: vmov.32 d12[1], r1
+; BE-NEON-NEXT: vmov.f32 s0, s17
+; BE-NEON-NEXT: vrev64.32 q6, q6
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.f32 s0, s16
+; BE-NEON-NEXT: vmov.32 d15[0], r0
+; BE-NEON-NEXT: vmov.32 d15[1], r1
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.32 d14[0], r0
+; BE-NEON-NEXT: ldr r6, [sp, #156] @ 4-byte Reload
+; BE-NEON-NEXT: vrev64.32 d8, d11
+; BE-NEON-NEXT: add r5, r6, #64
+; BE-NEON-NEXT: vmov.32 d14[1], r1
+; BE-NEON-NEXT: vmov.f32 s0, s17
+; BE-NEON-NEXT: vrev64.32 q8, q7
+; BE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.f32 s0, s16
+; BE-NEON-NEXT: vmov.32 d15[0], r0
+; BE-NEON-NEXT: vmov.32 d15[1], r1
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: add lr, sp, #208
+; BE-NEON-NEXT: vmov.32 d14[0], r0
+; BE-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-NEON-NEXT: vmov.32 d14[1], r1
+; BE-NEON-NEXT: vrev64.32 d8, d18
+; BE-NEON-NEXT: vrev64.32 q8, q7
+; BE-NEON-NEXT: vmov.f32 s0, s17
+; BE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.f32 s0, s16
+; BE-NEON-NEXT: mov r4, r1
+; BE-NEON-NEXT: vmov.32 d15[0], r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.32 d14[0], r0
+; BE-NEON-NEXT: add lr, sp, #160
+; BE-NEON-NEXT: vmov.32 d15[1], r4
+; BE-NEON-NEXT: vmov.32 d14[1], r1
+; BE-NEON-NEXT: vrev64.32 q8, q7
+; BE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; BE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEON-NEXT: vrev64.32 d8, d11
+; BE-NEON-NEXT: vst1.64 {d12, d13}, [r5:128]
+; BE-NEON-NEXT: vmov.f32 s0, s17
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.f32 s0, s16
+; BE-NEON-NEXT: mov r4, r1
+; BE-NEON-NEXT: vmov.32 d13[0], r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.32 d12[0], r0
+; BE-NEON-NEXT: add lr, sp, #208
+; BE-NEON-NEXT: vmov.32 d13[1], r4
+; BE-NEON-NEXT: vmov.32 d12[1], r1
+; BE-NEON-NEXT: vrev64.32 q8, q6
+; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT: add lr, sp, #176
+; BE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-NEON-NEXT: vrev64.32 d8, d12
+; BE-NEON-NEXT: vmov.f32 s0, s17
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.f32 s0, s16
+; BE-NEON-NEXT: vmov.32 d15[0], r0
+; BE-NEON-NEXT: vmov.32 d15[1], r1
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.32 d14[0], r0
+; BE-NEON-NEXT: mov r5, r6
+; BE-NEON-NEXT: vrev64.32 d8, d13
+; BE-NEON-NEXT: vmov.32 d14[1], r1
+; BE-NEON-NEXT: vmov.f32 s0, s17
+; BE-NEON-NEXT: vrev64.32 q8, q7
+; BE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.f32 s0, s16
+; BE-NEON-NEXT: vmov.32 d15[0], r0
+; BE-NEON-NEXT: vmov.32 d15[1], r1
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.32 d14[0], r0
+; BE-NEON-NEXT: vrev64.32 d8, d10
+; BE-NEON-NEXT: vmov.32 d14[1], r1
+; BE-NEON-NEXT: vmov.f32 s0, s17
+; BE-NEON-NEXT: vrev64.32 q8, q7
+; BE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.f32 s0, s16
+; BE-NEON-NEXT: mov r4, r1
+; BE-NEON-NEXT: vmov.32 d15[0], r0
+; BE-NEON-NEXT: bl llrintf
+; BE-NEON-NEXT: vmov.32 d14[0], r0
+; BE-NEON-NEXT: add lr, sp, #208
+; BE-NEON-NEXT: add r0, r6, #192
+; BE-NEON-NEXT: vmov.32 d15[1], r4
+; BE-NEON-NEXT: vmov.32 d14[1], r1
+; BE-NEON-NEXT: vrev64.32 q8, q7
+; BE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT: add lr, sp, #56
+; BE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]
+; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT: add lr, sp, #192
+; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT: add lr, sp, #240
+; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT: add lr, sp, #224
+; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT: add lr, sp, #136
+; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]
+; BE-NEON-NEXT: add r0, r6, #128
+; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT: add lr, sp, #104
+; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT: add lr, sp, #88
+; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT: add lr, sp, #72
+; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]
+; BE-NEON-NEXT: add sp, sp, #256
+; BE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT: add sp, sp, #4
+; BE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+ %a = call <32 x i64> @llvm.llrint.v32i64.v32f32(<32 x float> %x)
+ ret <32 x i64> %a
+}
+declare <32 x i64> @llvm.llrint.v32i64.v32f32(<32 x float>)
+
+define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) {
+; LE-LABEL: llrint_v1i64_v1f64:
+; LE: @ %bb.0:
+; LE-NEXT: .save {r11, lr}
+; LE-NEXT: push {r11, lr}
+; LE-NEXT: bl llrint
+; LE-NEXT: vmov.32 d0[0], r0
+; LE-NEXT: vmov.32 d0[1], r1
+; LE-NEXT: pop {r11, pc}
+;
+; LE-NEON-LABEL: llrint_v1i64_v1f64:
+; LE-NEON: @ %bb.0:
+; LE-NEON-NEXT: .save {r11, lr}
+; LE-NEON-NEXT: push {r11, lr}
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: vmov.32 d0[0], r0
+; LE-NEON-NEXT: vmov.32 d0[1], r1
+; LE-NEON-NEXT: pop {r11, pc}
+;
+; BE-LABEL: llrint_v1i64_v1f64:
+; BE: @ %bb.0:
+; BE-NEXT: .save {r11, lr}
+; BE-NEXT: push {r11, lr}
+; BE-NEXT: bl llrint
+; BE-NEXT: vmov.32 d16[0], r0
+; BE-NEXT: vmov.32 d16[1], r1
+; BE-NEXT: vrev64.32 d0, d16
+; BE-NEXT: pop {r11, pc}
+;
+; BE-NEON-LABEL: llrint_v1i64_v1f64:
+; BE-NEON: @ %bb.0:
+; BE-NEON-NEXT: .save {r11, lr}
+; BE-NEON-NEXT: push {r11, lr}
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vmov.32 d16[0], r0
+; BE-NEON-NEXT: vmov.32 d16[1], r1
+; BE-NEON-NEXT: vrev64.32 d0, d16
+; BE-NEON-NEXT: pop {r11, pc}
+ %a = call <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double> %x)
+ ret <1 x i64> %a
+}
+declare <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double>)
+
+define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) {
+; LE-LABEL: llrint_v2i64_v2f64:
+; LE: @ %bb.0:
+; LE-NEXT: .save {r4, lr}
+; LE-NEXT: push {r4, lr}
+; LE-NEXT: .vsave {d8, d9, d10, d11}
+; LE-NEXT: vpush {d8, d9, d10, d11}
+; LE-NEXT: vorr q4, q0, q0
+; LE-NEXT: vorr d0, d9, d9
+; LE-NEXT: bl llrint
+; LE-NEXT: vorr d0, d8, d8
+; LE-NEXT: mov r4, r1
+; LE-NEXT: vmov.32 d11[0], r0
+; LE-NEXT: bl llrint
+; LE-NEXT: vmov.32 d10[0], r0
+; LE-NEXT: vmov.32 d11[1], r4
+; LE-NEXT: vmov.32 d10[1], r1
+; LE-NEXT: vorr q0, q5, q5
+; LE-NEXT: vpop {d8, d9, d10, d11}
+; LE-NEXT: pop {r4, pc}
+;
+; LE-NEON-LABEL: llrint_v2i64_v2f64:
+; LE-NEON: @ %bb.0:
+; LE-NEON-NEXT: .save {r4, lr}
+; LE-NEON-NEXT: push {r4, lr}
+; LE-NEON-NEXT: .vsave {d8, d9, d10, d11}
+; LE-NEON-NEXT: vpush {d8, d9, d10, d11}
+; LE-NEON-NEXT: vorr q4, q0, q0
+; LE-NEON-NEXT: vorr d0, d9, d9
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: vorr d0, d8, d8
+; LE-NEON-NEXT: mov r4, r1
+; LE-NEON-NEXT: vmov.32 d11[0], r0
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: vmov.32 d10[0], r0
+; LE-NEON-NEXT: vmov.32 d11[1], r4
+; LE-NEON-NEXT: vmov.32 d10[1], r1
+; LE-NEON-NEXT: vorr q0, q5, q5
+; LE-NEON-NEXT: vpop {d8, d9, d10, d11}
+; LE-NEON-NEXT: pop {r4, pc}
+;
+; BE-LABEL: llrint_v2i64_v2f64:
+; BE: @ %bb.0:
+; BE-NEXT: .save {r4, lr}
+; BE-NEXT: push {r4, lr}
+; BE-NEXT: .vsave {d8, d9, d10, d11}
+; BE-NEXT: vpush {d8, d9, d10, d11}
+; BE-NEXT: vorr q4, q0, q0
+; BE-NEXT: vorr d0, d9, d9
+; BE-NEXT: bl llrint
+; BE-NEXT: vorr d0, d8, d8
+; BE-NEXT: mov r4, r1
+; BE-NEXT: vmov.32 d11[0], r0
+; BE-NEXT: bl llrint
+; BE-NEXT: vmov.32 d10[0], r0
+; BE-NEXT: vmov.32 d11[1], r4
+; BE-NEXT: vmov.32 d10[1], r1
+; BE-NEXT: vrev64.32 q0, q5
+; BE-NEXT: vpop {d8, d9, d10, d11}
+; BE-NEXT: pop {r4, pc}
+;
+; BE-NEON-LABEL: llrint_v2i64_v2f64:
+; BE-NEON: @ %bb.0:
+; BE-NEON-NEXT: .save {r4, lr}
+; BE-NEON-NEXT: push {r4, lr}
+; BE-NEON-NEXT: .vsave {d8, d9, d10, d11}
+; BE-NEON-NEXT: vpush {d8, d9, d10, d11}
+; BE-NEON-NEXT: vorr q4, q0, q0
+; BE-NEON-NEXT: vorr d0, d9, d9
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vorr d0, d8, d8
+; BE-NEON-NEXT: mov r4, r1
+; BE-NEON-NEXT: vmov.32 d11[0], r0
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vmov.32 d10[0], r0
+; BE-NEON-NEXT: vmov.32 d11[1], r4
+; BE-NEON-NEXT: vmov.32 d10[1], r1
+; BE-NEON-NEXT: vrev64.32 q0, q5
+; BE-NEON-NEXT: vpop {d8, d9, d10, d11}
+; BE-NEON-NEXT: pop {r4, pc}
+ %a = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> %x)
+ ret <2 x i64> %a
+}
+declare <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double>)
+
+define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) {
+; LE-LABEL: llrint_v4i64_v4f64:
+; LE: @ %bb.0:
+; LE-NEXT: .save {r4, r5, r6, lr}
+; LE-NEXT: push {r4, r5, r6, lr}
+; LE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT: vorr q5, q1, q1
+; LE-NEXT: vorr q6, q0, q0
+; LE-NEXT: vorr d0, d11, d11
+; LE-NEXT: bl llrint
+; LE-NEXT: vorr d0, d12, d12
+; LE-NEXT: mov r4, r1
+; LE-NEXT: vmov.32 d9[0], r0
+; LE-NEXT: bl llrint
+; LE-NEXT: vorr d0, d13, d13
+; LE-NEXT: mov r5, r1
+; LE-NEXT: vmov.32 d14[0], r0
+; LE-NEXT: bl llrint
+; LE-NEXT: vorr d0, d10, d10
+; LE-NEXT: mov r6, r1
+; LE-NEXT: vmov.32 d15[0], r0
+; LE-NEXT: bl llrint
+; LE-NEXT: vmov.32 d8[0], r0
+; LE-NEXT: vmov.32 d15[1], r6
+; LE-NEXT: vmov.32 d9[1], r4
+; LE-NEXT: vmov.32 d14[1], r5
+; LE-NEXT: vmov.32 d8[1], r1
+; LE-NEXT: vorr q0, q7, q7
+; LE-NEXT: vorr q1, q4, q4
+; LE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT: pop {r4, r5, r6, pc}
+;
+; LE-NEON-LABEL: llrint_v4i64_v4f64:
+; LE-NEON: @ %bb.0:
+; LE-NEON-NEXT: .save {r4, r5, r6, lr}
+; LE-NEON-NEXT: push {r4, r5, r6, lr}
+; LE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT: vorr q5, q1, q1
+; LE-NEON-NEXT: vorr q6, q0, q0
+; LE-NEON-NEXT: vorr d0, d11, d11
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: vorr d0, d12, d12
+; LE-NEON-NEXT: mov r4, r1
+; LE-NEON-NEXT: vmov.32 d9[0], r0
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: vorr d0, d13, d13
+; LE-NEON-NEXT: mov r5, r1
+; LE-NEON-NEXT: vmov.32 d14[0], r0
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: vorr d0, d10, d10
+; LE-NEON-NEXT: mov r6, r1
+; LE-NEON-NEXT: vmov.32 d15[0], r0
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: vmov.32 d8[0], r0
+; LE-NEON-NEXT: vmov.32 d15[1], r6
+; LE-NEON-NEXT: vmov.32 d9[1], r4
+; LE-NEON-NEXT: vmov.32 d14[1], r5
+; LE-NEON-NEXT: vmov.32 d8[1], r1
+; LE-NEON-NEXT: vorr q0, q7, q7
+; LE-NEON-NEXT: vorr q1, q4, q4
+; LE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT: pop {r4, r5, r6, pc}
+;
+; BE-LABEL: llrint_v4i64_v4f64:
+; BE: @ %bb.0:
+; BE-NEXT: .save {r4, r5, r6, lr}
+; BE-NEXT: push {r4, r5, r6, lr}
+; BE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT: vorr q4, q1, q1
+; BE-NEXT: vorr q5, q0, q0
+; BE-NEXT: vorr d0, d9, d9
+; BE-NEXT: bl llrint
+; BE-NEXT: vorr d0, d10, d10
+; BE-NEXT: mov r4, r1
+; BE-NEXT: vmov.32 d13[0], r0
+; BE-NEXT: bl llrint
+; BE-NEXT: vorr d0, d11, d11
+; BE-NEXT: mov r5, r1
+; BE-NEXT: vmov.32 d14[0], r0
+; BE-NEXT: bl llrint
+; BE-NEXT: vorr d0, d8, d8
+; BE-NEXT: mov r6, r1
+; BE-NEXT: vmov.32 d15[0], r0
+; BE-NEXT: bl llrint
+; BE-NEXT: vmov.32 d12[0], r0
+; BE-NEXT: vmov.32 d15[1], r6
+; BE-NEXT: vmov.32 d13[1], r4
+; BE-NEXT: vmov.32 d14[1], r5
+; BE-NEXT: vmov.32 d12[1], r1
+; BE-NEXT: vrev64.32 q0, q7
+; BE-NEXT: vrev64.32 q1, q6
+; BE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT: pop {r4, r5, r6, pc}
+;
+; BE-NEON-LABEL: llrint_v4i64_v4f64:
+; BE-NEON: @ %bb.0:
+; BE-NEON-NEXT: .save {r4, r5, r6, lr}
+; BE-NEON-NEXT: push {r4, r5, r6, lr}
+; BE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT: vorr q4, q1, q1
+; BE-NEON-NEXT: vorr q5, q0, q0
+; BE-NEON-NEXT: vorr d0, d9, d9
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vorr d0, d10, d10
+; BE-NEON-NEXT: mov r4, r1
+; BE-NEON-NEXT: vmov.32 d13[0], r0
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vorr d0, d11, d11
+; BE-NEON-NEXT: mov r5, r1
+; BE-NEON-NEXT: vmov.32 d14[0], r0
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vorr d0, d8, d8
+; BE-NEON-NEXT: mov r6, r1
+; BE-NEON-NEXT: vmov.32 d15[0], r0
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vmov.32 d12[0], r0
+; BE-NEON-NEXT: vmov.32 d15[1], r6
+; BE-NEON-NEXT: vmov.32 d13[1], r4
+; BE-NEON-NEXT: vmov.32 d14[1], r5
+; BE-NEON-NEXT: vmov.32 d12[1], r1
+; BE-NEON-NEXT: vrev64.32 q0, q7
+; BE-NEON-NEXT: vrev64.32 q1, q6
+; BE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT: pop {r4, r5, r6, pc}
+ %a = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> %x)
+ ret <4 x i64> %a
+}
+declare <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double>)
+
+define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) {
+; LE-LABEL: llrint_v8i64_v8f64:
+; LE: @ %bb.0:
+; LE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT: .pad #40
+; LE-NEXT: sub sp, sp, #40
+; LE-NEXT: vorr q4, q0, q0
+; LE-NEXT: add lr, sp, #24
+; LE-NEXT: vorr d0, d7, d7
+; LE-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill
+; LE-NEXT: vorr q7, q2, q2
+; LE-NEXT: vorr q6, q1, q1
+; LE-NEXT: bl llrint
+; LE-NEXT: vorr d0, d14, d14
+; LE-NEXT: add lr, sp, #8
+; LE-NEXT: vmov.32 d17[0], r0
+; LE-NEXT: mov r8, r1
+; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT: bl llrint
+; LE-NEXT: vorr d0, d15, d15
+; LE-NEXT: mov r9, r1
+; LE-NEXT: vmov.32 d10[0], r0
+; LE-NEXT: bl llrint
+; LE-NEXT: vorr d0, d12, d12
+; LE-NEXT: mov r10, r1
+; LE-NEXT: vmov.32 d11[0], r0
+; LE-NEXT: bl llrint
+; LE-NEXT: vorr d0, d13, d13
+; LE-NEXT: mov r7, r1
+; LE-NEXT: vmov.32 d14[0], r0
+; LE-NEXT: bl llrint
+; LE-NEXT: vorr d0, d8, d8
+; LE-NEXT: mov r4, r1
+; LE-NEXT: vmov.32 d15[0], r0
+; LE-NEXT: bl llrint
+; LE-NEXT: vorr d0, d9, d9
+; LE-NEXT: mov r5, r1
+; LE-NEXT: vmov.32 d12[0], r0
+; LE-NEXT: bl llrint
+; LE-NEXT: add lr, sp, #24
+; LE-NEXT: mov r6, r1
+; LE-NEXT: vmov.32 d13[0], r0
+; LE-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; LE-NEXT: bl llrint
+; LE-NEXT: add lr, sp, #8
+; LE-NEXT: vmov.32 d13[1], r6
+; LE-NEXT: vldmia lr, {d6, d7} @ 16-byte Reload
+; LE-NEXT: vmov.32 d15[1], r4
+; LE-NEXT: vmov.32 d11[1], r10
+; LE-NEXT: vmov.32 d6[0], r0
+; LE-NEXT: vmov.32 d12[1], r5
+; LE-NEXT: vmov.32 d14[1], r7
+; LE-NEXT: vorr q0, q6, q6
+; LE-NEXT: vmov.32 d10[1], r9
+; LE-NEXT: vorr q1, q7, q7
+; LE-NEXT: vmov.32 d7[1], r8
+; LE-NEXT: vorr q2, q5, q5
+; LE-NEXT: vmov.32 d6[1], r1
+; LE-NEXT: add sp, sp, #40
+; LE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; LE-NEON-LABEL: llrint_v8i64_v8f64:
+; LE-NEON: @ %bb.0:
+; LE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT: .pad #40
+; LE-NEON-NEXT: sub sp, sp, #40
+; LE-NEON-NEXT: vorr q4, q0, q0
+; LE-NEON-NEXT: add lr, sp, #24
+; LE-NEON-NEXT: vorr d0, d7, d7
+; LE-NEON-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill
+; LE-NEON-NEXT: vorr q7, q2, q2
+; LE-NEON-NEXT: vorr q6, q1, q1
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: vorr d0, d14, d14
+; LE-NEON-NEXT: add lr, sp, #8
+; LE-NEON-NEXT: vmov.32 d17[0], r0
+; LE-NEON-NEXT: mov r8, r1
+; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: vorr d0, d15, d15
+; LE-NEON-NEXT: mov r9, r1
+; LE-NEON-NEXT: vmov.32 d10[0], r0
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: vorr d0, d12, d12
+; LE-NEON-NEXT: mov r10, r1
+; LE-NEON-NEXT: vmov.32 d11[0], r0
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: vorr d0, d13, d13
+; LE-NEON-NEXT: mov r7, r1
+; LE-NEON-NEXT: vmov.32 d14[0], r0
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: vorr d0, d8, d8
+; LE-NEON-NEXT: mov r4, r1
+; LE-NEON-NEXT: vmov.32 d15[0], r0
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: vorr d0, d9, d9
+; LE-NEON-NEXT: mov r5, r1
+; LE-NEON-NEXT: vmov.32 d12[0], r0
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: add lr, sp, #24
+; LE-NEON-NEXT: mov r6, r1
+; LE-NEON-NEXT: vmov.32 d13[0], r0
+; LE-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: add lr, sp, #8
+; LE-NEON-NEXT: vmov.32 d13[1], r6
+; LE-NEON-NEXT: vldmia lr, {d6, d7} @ 16-byte Reload
+; LE-NEON-NEXT: vmov.32 d15[1], r4
+; LE-NEON-NEXT: vmov.32 d11[1], r10
+; LE-NEON-NEXT: vmov.32 d6[0], r0
+; LE-NEON-NEXT: vmov.32 d12[1], r5
+; LE-NEON-NEXT: vmov.32 d14[1], r7
+; LE-NEON-NEXT: vorr q0, q6, q6
+; LE-NEON-NEXT: vmov.32 d10[1], r9
+; LE-NEON-NEXT: vorr q1, q7, q7
+; LE-NEON-NEXT: vmov.32 d7[1], r8
+; LE-NEON-NEXT: vorr q2, q5, q5
+; LE-NEON-NEXT: vmov.32 d6[1], r1
+; LE-NEON-NEXT: add sp, sp, #40
+; LE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; BE-LABEL: llrint_v8i64_v8f64:
+; BE: @ %bb.0:
+; BE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT: .pad #40
+; BE-NEXT: sub sp, sp, #40
+; BE-NEXT: vorr q4, q0, q0
+; BE-NEXT: add lr, sp, #24
+; BE-NEXT: vorr d0, d7, d7
+; BE-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill
+; BE-NEXT: vorr q7, q2, q2
+; BE-NEXT: vorr q6, q1, q1
+; BE-NEXT: bl llrint
+; BE-NEXT: vorr d0, d14, d14
+; BE-NEXT: add lr, sp, #8
+; BE-NEXT: vmov.32 d17[0], r0
+; BE-NEXT: mov r8, r1
+; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT: bl llrint
+; BE-NEXT: vorr d0, d15, d15
+; BE-NEXT: mov r9, r1
+; BE-NEXT: vmov.32 d10[0], r0
+; BE-NEXT: bl llrint
+; BE-NEXT: vorr d0, d12, d12
+; BE-NEXT: mov r10, r1
+; BE-NEXT: vmov.32 d11[0], r0
+; BE-NEXT: bl llrint
+; BE-NEXT: vorr d0, d13, d13
+; BE-NEXT: mov r7, r1
+; BE-NEXT: vmov.32 d14[0], r0
+; BE-NEXT: bl llrint
+; BE-NEXT: vorr d0, d8, d8
+; BE-NEXT: mov r4, r1
+; BE-NEXT: vmov.32 d15[0], r0
+; BE-NEXT: bl llrint
+; BE-NEXT: vorr d0, d9, d9
+; BE-NEXT: mov r5, r1
+; BE-NEXT: vmov.32 d12[0], r0
+; BE-NEXT: bl llrint
+; BE-NEXT: add lr, sp, #24
+; BE-NEXT: mov r6, r1
+; BE-NEXT: vmov.32 d13[0], r0
+; BE-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; BE-NEXT: bl llrint
+; BE-NEXT: add lr, sp, #8
+; BE-NEXT: vmov.32 d13[1], r6
+; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT: vmov.32 d15[1], r4
+; BE-NEXT: vmov.32 d16[0], r0
+; BE-NEXT: vmov.32 d11[1], r10
+; BE-NEXT: vmov.32 d17[1], r8
+; BE-NEXT: vmov.32 d12[1], r5
+; BE-NEXT: vmov.32 d14[1], r7
+; BE-NEXT: vmov.32 d10[1], r9
+; BE-NEXT: vmov.32 d16[1], r1
+; BE-NEXT: vrev64.32 q0, q6
+; BE-NEXT: vrev64.32 q1, q7
+; BE-NEXT: vrev64.32 q2, q5
+; BE-NEXT: vrev64.32 q3, q8
+; BE-NEXT: add sp, sp, #40
+; BE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; BE-NEON-LABEL: llrint_v8i64_v8f64:
+; BE-NEON: @ %bb.0:
+; BE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT: .pad #40
+; BE-NEON-NEXT: sub sp, sp, #40
+; BE-NEON-NEXT: vorr q4, q0, q0
+; BE-NEON-NEXT: add lr, sp, #24
+; BE-NEON-NEXT: vorr d0, d7, d7
+; BE-NEON-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill
+; BE-NEON-NEXT: vorr q7, q2, q2
+; BE-NEON-NEXT: vorr q6, q1, q1
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vorr d0, d14, d14
+; BE-NEON-NEXT: add lr, sp, #8
+; BE-NEON-NEXT: vmov.32 d17[0], r0
+; BE-NEON-NEXT: mov r8, r1
+; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vorr d0, d15, d15
+; BE-NEON-NEXT: mov r9, r1
+; BE-NEON-NEXT: vmov.32 d10[0], r0
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vorr d0, d12, d12
+; BE-NEON-NEXT: mov r10, r1
+; BE-NEON-NEXT: vmov.32 d11[0], r0
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vorr d0, d13, d13
+; BE-NEON-NEXT: mov r7, r1
+; BE-NEON-NEXT: vmov.32 d14[0], r0
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vorr d0, d8, d8
+; BE-NEON-NEXT: mov r4, r1
+; BE-NEON-NEXT: vmov.32 d15[0], r0
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vorr d0, d9, d9
+; BE-NEON-NEXT: mov r5, r1
+; BE-NEON-NEXT: vmov.32 d12[0], r0
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: add lr, sp, #24
+; BE-NEON-NEXT: mov r6, r1
+; BE-NEON-NEXT: vmov.32 d13[0], r0
+; BE-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: add lr, sp, #8
+; BE-NEON-NEXT: vmov.32 d13[1], r6
+; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT: vmov.32 d15[1], r4
+; BE-NEON-NEXT: vmov.32 d16[0], r0
+; BE-NEON-NEXT: vmov.32 d11[1], r10
+; BE-NEON-NEXT: vmov.32 d17[1], r8
+; BE-NEON-NEXT: vmov.32 d12[1], r5
+; BE-NEON-NEXT: vmov.32 d14[1], r7
+; BE-NEON-NEXT: vmov.32 d10[1], r9
+; BE-NEON-NEXT: vmov.32 d16[1], r1
+; BE-NEON-NEXT: vrev64.32 q0, q6
+; BE-NEON-NEXT: vrev64.32 q1, q7
+; BE-NEON-NEXT: vrev64.32 q2, q5
+; BE-NEON-NEXT: vrev64.32 q3, q8
+; BE-NEON-NEXT: add sp, sp, #40
+; BE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+ %a = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> %x)
+ ret <8 x i64> %a
+}
+declare <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double>)
+
+define <16 x i64> @llrint_v16f64(<16 x double> %x) {
+; LE-LABEL: llrint_v16f64:
+; LE: @ %bb.0:
+; LE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEXT: .pad #4
+; LE-NEXT: sub sp, sp, #4
+; LE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT: .pad #176
+; LE-NEXT: sub sp, sp, #176
+; LE-NEXT: add lr, sp, #40
+; LE-NEXT: str r0, [sp, #140] @ 4-byte Spill
+; LE-NEXT: add r0, sp, #312
+; LE-NEXT: vorr q6, q2, q2
+; LE-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #96
+; LE-NEXT: vorr q7, q1, q1
+; LE-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #144
+; LE-NEXT: vorr d0, d1, d1
+; LE-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-NEXT: add r0, sp, #280
+; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #80
+; LE-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-NEXT: add r0, sp, #296
+; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #120
+; LE-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-NEXT: add r0, sp, #328
+; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #56
+; LE-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT: bl llrint
+; LE-NEXT: vorr d0, d14, d14
+; LE-NEXT: str r1, [sp, #116] @ 4-byte Spill
+; LE-NEXT: vmov.32 d11[0], r0
+; LE-NEXT: bl llrint
+; LE-NEXT: vorr d0, d15, d15
+; LE-NEXT: str r1, [sp, #76] @ 4-byte Spill
+; LE-NEXT: vmov.32 d8[0], r0
+; LE-NEXT: bl llrint
+; LE-NEXT: vorr d0, d12, d12
+; LE-NEXT: add lr, sp, #160
+; LE-NEXT: vmov.32 d9[0], r0
+; LE-NEXT: str r1, [sp, #72] @ 4-byte Spill
+; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEXT: bl llrint
+; LE-NEXT: vorr d0, d13, d13
+; LE-NEXT: mov r6, r1
+; LE-NEXT: vmov.32 d14[0], r0
+; LE-NEXT: bl llrint
+; LE-NEXT: add lr, sp, #40
+; LE-NEXT: mov r4, r1
+; LE-NEXT: vmov.32 d15[0], r0
+; LE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEXT: vorr d0, d8, d8
+; LE-NEXT: bl llrint
+; LE-NEXT: vorr d0, d9, d9
+; LE-NEXT: mov r7, r1
+; LE-NEXT: vmov.32 d12[0], r0
+; LE-NEXT: bl llrint
+; LE-NEXT: add lr, sp, #96
+; LE-NEXT: mov r5, r1
+; LE-NEXT: vmov.32 d13[0], r0
+; LE-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; LE-NEXT: bl llrint
+; LE-NEXT: vmov.32 d10[0], r0
+; LE-NEXT: add lr, sp, #40
+; LE-NEXT: mov r10, r1
+; LE-NEXT: vmov.32 d13[1], r5
+; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #56
+; LE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEXT: vorr d0, d9, d9
+; LE-NEXT: bl llrint
+; LE-NEXT: vorr d0, d8, d8
+; LE-NEXT: vmov.32 d12[1], r7
+; LE-NEXT: add lr, sp, #96
+; LE-NEXT: mov r9, r1
+; LE-NEXT: vmov.32 d11[0], r0
+; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEXT: bl llrint
+; LE-NEXT: vmov.32 d10[0], r0
+; LE-NEXT: add lr, sp, #24
+; LE-NEXT: mov r11, r1
+; LE-NEXT: vmov.32 d15[1], r4
+; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #144
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: vorr d0, d17, d17
+; LE-NEXT: bl llrint
+; LE-NEXT: add lr, sp, #8
+; LE-NEXT: vmov.32 d14[1], r6
+; LE-NEXT: mov r8, r1
+; LE-NEXT: vmov.32 d17[0], r0
+; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #56
+; LE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #80
+; LE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEXT: vorr d0, d11, d11
+; LE-NEXT: bl llrint
+; LE-NEXT: vmov.32 d15[0], r0
+; LE-NEXT: add lr, sp, #160
+; LE-NEXT: vorr d0, d10, d10
+; LE-NEXT: ldr r0, [sp, #72] @ 4-byte Reload
+; LE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEXT: mov r6, r1
+; LE-NEXT: vmov.32 d9[1], r0
+; LE-NEXT: bl llrint
+; LE-NEXT: vmov.32 d14[0], r0
+; LE-NEXT: ldr r0, [sp, #76] @ 4-byte Reload
+; LE-NEXT: add lr, sp, #160
+; LE-NEXT: mov r4, r1
+; LE-NEXT: vmov.32 d8[1], r0
+; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #120
+; LE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEXT: vorr d0, d11, d11
+; LE-NEXT: bl llrint
+; LE-NEXT: vmov.32 d13[0], r0
+; LE-NEXT: add lr, sp, #40
+; LE-NEXT: vorr d0, d10, d10
+; LE-NEXT: ldr r0, [sp, #116] @ 4-byte Reload
+; LE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEXT: mov r5, r1
+; LE-NEXT: vmov.32 d9[1], r0
+; LE-NEXT: bl llrint
+; LE-NEXT: add lr, sp, #144
+; LE-NEXT: mov r7, r1
+; LE-NEXT: vmov.32 d12[0], r0
+; LE-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; LE-NEXT: vmov.32 d8[1], r10
+; LE-NEXT: bl llrint
+; LE-NEXT: add lr, sp, #8
+; LE-NEXT: vmov.32 d15[1], r6
+; LE-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #24
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #160
+; LE-NEXT: vmov.32 d20[0], r0
+; LE-NEXT: vmov.32 d21[1], r8
+; LE-NEXT: vmov.32 d20[1], r1
+; LE-NEXT: ldr r1, [sp, #140] @ 4-byte Reload
+; LE-NEXT: vmov.32 d13[1], r5
+; LE-NEXT: mov r0, r1
+; LE-NEXT: vst1.64 {d8, d9}, [r0:128]!
+; LE-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #56
+; LE-NEXT: vmov.32 d14[1], r4
+; LE-NEXT: vst1.64 {d18, d19}, [r0:128]!
+; LE-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #96
+; LE-NEXT: vmov.32 d12[1], r7
+; LE-NEXT: vst1.64 {d18, d19}, [r0:128]!
+; LE-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-NEXT: vmov.32 d17[1], r9
+; LE-NEXT: vst1.64 {d18, d19}, [r0:128]
+; LE-NEXT: add r0, r1, #64
+; LE-NEXT: vst1.64 {d14, d15}, [r0:128]!
+; LE-NEXT: vst1.64 {d12, d13}, [r0:128]!
+; LE-NEXT: vmov.32 d16[1], r11
+; LE-NEXT: vst1.64 {d20, d21}, [r0:128]!
+; LE-NEXT: vst1.64 {d16, d17}, [r0:128]
+; LE-NEXT: add sp, sp, #176
+; LE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT: add sp, sp, #4
+; LE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; LE-NEON-LABEL: llrint_v16f64:
+; LE-NEON: @ %bb.0:
+; LE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEON-NEXT: .pad #4
+; LE-NEON-NEXT: sub sp, sp, #4
+; LE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT: .pad #176
+; LE-NEON-NEXT: sub sp, sp, #176
+; LE-NEON-NEXT: add lr, sp, #40
+; LE-NEON-NEXT: str r0, [sp, #140] @ 4-byte Spill
+; LE-NEON-NEXT: add r0, sp, #312
+; LE-NEON-NEXT: vorr q6, q2, q2
+; LE-NEON-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #96
+; LE-NEON-NEXT: vorr q7, q1, q1
+; LE-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #144
+; LE-NEON-NEXT: vorr d0, d1, d1
+; LE-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-NEON-NEXT: add r0, sp, #280
+; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #80
+; LE-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-NEON-NEXT: add r0, sp, #296
+; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #120
+; LE-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-NEON-NEXT: add r0, sp, #328
+; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #56
+; LE-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: vorr d0, d14, d14
+; LE-NEON-NEXT: str r1, [sp, #116] @ 4-byte Spill
+; LE-NEON-NEXT: vmov.32 d11[0], r0
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: vorr d0, d15, d15
+; LE-NEON-NEXT: str r1, [sp, #76] @ 4-byte Spill
+; LE-NEON-NEXT: vmov.32 d8[0], r0
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: vorr d0, d12, d12
+; LE-NEON-NEXT: add lr, sp, #160
+; LE-NEON-NEXT: vmov.32 d9[0], r0
+; LE-NEON-NEXT: str r1, [sp, #72] @ 4-byte Spill
+; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: vorr d0, d13, d13
+; LE-NEON-NEXT: mov r6, r1
+; LE-NEON-NEXT: vmov.32 d14[0], r0
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: add lr, sp, #40
+; LE-NEON-NEXT: mov r4, r1
+; LE-NEON-NEXT: vmov.32 d15[0], r0
+; LE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEON-NEXT: vorr d0, d8, d8
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: vorr d0, d9, d9
+; LE-NEON-NEXT: mov r7, r1
+; LE-NEON-NEXT: vmov.32 d12[0], r0
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: add lr, sp, #96
+; LE-NEON-NEXT: mov r5, r1
+; LE-NEON-NEXT: vmov.32 d13[0], r0
+; LE-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: vmov.32 d10[0], r0
+; LE-NEON-NEXT: add lr, sp, #40
+; LE-NEON-NEXT: mov r10, r1
+; LE-NEON-NEXT: vmov.32 d13[1], r5
+; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #56
+; LE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEON-NEXT: vorr d0, d9, d9
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: vorr d0, d8, d8
+; LE-NEON-NEXT: vmov.32 d12[1], r7
+; LE-NEON-NEXT: add lr, sp, #96
+; LE-NEON-NEXT: mov r9, r1
+; LE-NEON-NEXT: vmov.32 d11[0], r0
+; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: vmov.32 d10[0], r0
+; LE-NEON-NEXT: add lr, sp, #24
+; LE-NEON-NEXT: mov r11, r1
+; LE-NEON-NEXT: vmov.32 d15[1], r4
+; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #144
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: vorr d0, d17, d17
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: add lr, sp, #8
+; LE-NEON-NEXT: vmov.32 d14[1], r6
+; LE-NEON-NEXT: mov r8, r1
+; LE-NEON-NEXT: vmov.32 d17[0], r0
+; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #56
+; LE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #80
+; LE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEON-NEXT: vorr d0, d11, d11
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: vmov.32 d15[0], r0
+; LE-NEON-NEXT: add lr, sp, #160
+; LE-NEON-NEXT: vorr d0, d10, d10
+; LE-NEON-NEXT: ldr r0, [sp, #72] @ 4-byte Reload
+; LE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEON-NEXT: mov r6, r1
+; LE-NEON-NEXT: vmov.32 d9[1], r0
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: vmov.32 d14[0], r0
+; LE-NEON-NEXT: ldr r0, [sp, #76] @ 4-byte Reload
+; LE-NEON-NEXT: add lr, sp, #160
+; LE-NEON-NEXT: mov r4, r1
+; LE-NEON-NEXT: vmov.32 d8[1], r0
+; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #120
+; LE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEON-NEXT: vorr d0, d11, d11
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: vmov.32 d13[0], r0
+; LE-NEON-NEXT: add lr, sp, #40
+; LE-NEON-NEXT: vorr d0, d10, d10
+; LE-NEON-NEXT: ldr r0, [sp, #116] @ 4-byte Reload
+; LE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEON-NEXT: mov r5, r1
+; LE-NEON-NEXT: vmov.32 d9[1], r0
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: add lr, sp, #144
+; LE-NEON-NEXT: mov r7, r1
+; LE-NEON-NEXT: vmov.32 d12[0], r0
+; LE-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; LE-NEON-NEXT: vmov.32 d8[1], r10
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: add lr, sp, #8
+; LE-NEON-NEXT: vmov.32 d15[1], r6
+; LE-NEON-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #24
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #160
+; LE-NEON-NEXT: vmov.32 d20[0], r0
+; LE-NEON-NEXT: vmov.32 d21[1], r8
+; LE-NEON-NEXT: vmov.32 d20[1], r1
+; LE-NEON-NEXT: ldr r1, [sp, #140] @ 4-byte Reload
+; LE-NEON-NEXT: vmov.32 d13[1], r5
+; LE-NEON-NEXT: mov r0, r1
+; LE-NEON-NEXT: vst1.64 {d8, d9}, [r0:128]!
+; LE-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #56
+; LE-NEON-NEXT: vmov.32 d14[1], r4
+; LE-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]!
+; LE-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #96
+; LE-NEON-NEXT: vmov.32 d12[1], r7
+; LE-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]!
+; LE-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-NEON-NEXT: vmov.32 d17[1], r9
+; LE-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]
+; LE-NEON-NEXT: add r0, r1, #64
+; LE-NEON-NEXT: vst1.64 {d14, d15}, [r0:128]!
+; LE-NEON-NEXT: vst1.64 {d12, d13}, [r0:128]!
+; LE-NEON-NEXT: vmov.32 d16[1], r11
+; LE-NEON-NEXT: vst1.64 {d20, d21}, [r0:128]!
+; LE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]
+; LE-NEON-NEXT: add sp, sp, #176
+; LE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT: add sp, sp, #4
+; LE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-LABEL: llrint_v16f64:
+; BE: @ %bb.0:
+; BE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEXT: .pad #4
+; BE-NEXT: sub sp, sp, #4
+; BE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT: .pad #168
+; BE-NEXT: sub sp, sp, #168
+; BE-NEXT: add lr, sp, #64
+; BE-NEXT: str r0, [sp, #132] @ 4-byte Spill
+; BE-NEXT: add r0, sp, #304
+; BE-NEXT: vorr q4, q3, q3
+; BE-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; BE-NEXT: add lr, sp, #48
+; BE-NEXT: vorr d0, d1, d1
+; BE-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-NEXT: add r0, sp, #320
+; BE-NEXT: vorr q6, q2, q2
+; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT: add lr, sp, #88
+; BE-NEXT: vorr q7, q1, q1
+; BE-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-NEXT: add r0, sp, #272
+; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT: add lr, sp, #112
+; BE-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-NEXT: add r0, sp, #288
+; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT: add lr, sp, #24
+; BE-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT: bl llrint
+; BE-NEXT: vorr d0, d14, d14
+; BE-NEXT: add lr, sp, #136
+; BE-NEXT: vmov.32 d17[0], r0
+; BE-NEXT: str r1, [sp, #108] @ 4-byte Spill
+; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT: bl llrint
+; BE-NEXT: vorr d0, d15, d15
+; BE-NEXT: str r1, [sp, #84] @ 4-byte Spill
+; BE-NEXT: vmov.32 d10[0], r0
+; BE-NEXT: bl llrint
+; BE-NEXT: vorr d0, d12, d12
+; BE-NEXT: add lr, sp, #152
+; BE-NEXT: vmov.32 d11[0], r0
+; BE-NEXT: str r1, [sp, #44] @ 4-byte Spill
+; BE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEXT: bl llrint
+; BE-NEXT: vorr d0, d13, d13
+; BE-NEXT: mov r6, r1
+; BE-NEXT: vmov.32 d10[0], r0
+; BE-NEXT: bl llrint
+; BE-NEXT: vorr d0, d8, d8
+; BE-NEXT: mov r4, r1
+; BE-NEXT: vmov.32 d11[0], r0
+; BE-NEXT: bl llrint
+; BE-NEXT: vorr d0, d9, d9
+; BE-NEXT: mov r7, r1
+; BE-NEXT: vmov.32 d12[0], r0
+; BE-NEXT: bl llrint
+; BE-NEXT: add lr, sp, #64
+; BE-NEXT: mov r5, r1
+; BE-NEXT: vmov.32 d13[0], r0
+; BE-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; BE-NEXT: bl llrint
+; BE-NEXT: add lr, sp, #136
+; BE-NEXT: mov r9, r1
+; BE-NEXT: vmov.32 d13[1], r5
+; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT: vmov.32 d16[0], r0
+; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT: add lr, sp, #24
+; BE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-NEXT: vorr d0, d9, d9
+; BE-NEXT: bl llrint
+; BE-NEXT: vorr d0, d8, d8
+; BE-NEXT: vmov.32 d12[1], r7
+; BE-NEXT: add lr, sp, #64
+; BE-NEXT: mov r10, r1
+; BE-NEXT: vmov.32 d15[0], r0
+; BE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-NEXT: bl llrint
+; BE-NEXT: vmov.32 d14[0], r0
+; BE-NEXT: add lr, sp, #8
+; BE-NEXT: mov r11, r1
+; BE-NEXT: vmov.32 d11[1], r4
+; BE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-NEXT: add lr, sp, #48
+; BE-NEXT: vorr q6, q5, q5
+; BE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-NEXT: vorr d0, d9, d9
+; BE-NEXT: bl llrint
+; BE-NEXT: vorr d0, d8, d8
+; BE-NEXT: vmov.32 d12[1], r6
+; BE-NEXT: add lr, sp, #24
+; BE-NEXT: mov r8, r1
+; BE-NEXT: vmov.32 d11[0], r0
+; BE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-NEXT: bl llrint
+; BE-NEXT: vmov.32 d10[0], r0
+; BE-NEXT: add lr, sp, #48
+; BE-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
+; BE-NEXT: mov r6, r1
+; BE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEXT: add lr, sp, #152
+; BE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-NEXT: add lr, sp, #88
+; BE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-NEXT: vorr d0, d13, d13
+; BE-NEXT: vmov.32 d9[1], r0
+; BE-NEXT: bl llrint
+; BE-NEXT: vmov.32 d15[0], r0
+; BE-NEXT: ldr r0, [sp, #84] @ 4-byte Reload
+; BE-NEXT: vorr d0, d12, d12
+; BE-NEXT: add lr, sp, #152
+; BE-NEXT: mov r4, r1
+; BE-NEXT: vmov.32 d8[1], r0
+; BE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; BE-NEXT: bl llrint
+; BE-NEXT: add lr, sp, #136
+; BE-NEXT: vmov.32 d14[0], r0
+; BE-NEXT: ldr r0, [sp, #108] @ 4-byte Reload
+; BE-NEXT: mov r5, r1
+; BE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEXT: add lr, sp, #112
+; BE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-NEXT: vorr d0, d9, d9
+; BE-NEXT: vmov.32 d11[1], r0
+; BE-NEXT: bl llrint
+; BE-NEXT: vorr d0, d8, d8
+; BE-NEXT: mov r7, r1
+; BE-NEXT: vmov.32 d13[0], r0
+; BE-NEXT: vmov.32 d10[1], r9
+; BE-NEXT: bl llrint
+; BE-NEXT: add lr, sp, #8
+; BE-NEXT: vmov.32 d12[0], r0
+; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT: add lr, sp, #48
+; BE-NEXT: vmov.32 d17[1], r10
+; BE-NEXT: vmov.32 d16[1], r11
+; BE-NEXT: vorr q12, q8, q8
+; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT: add lr, sp, #152
+; BE-NEXT: vmov.32 d17[1], r8
+; BE-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-NEXT: add lr, sp, #24
+; BE-NEXT: vmov.32 d13[1], r7
+; BE-NEXT: vmov.32 d16[1], r6
+; BE-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload
+; BE-NEXT: add lr, sp, #64
+; BE-NEXT: vorr q13, q8, q8
+; BE-NEXT: vmov.32 d12[1], r1
+; BE-NEXT: ldr r1, [sp, #132] @ 4-byte Reload
+; BE-NEXT: vrev64.32 q8, q5
+; BE-NEXT: mov r0, r1
+; BE-NEXT: vldmia lr, {d22, d23} @ 16-byte Reload
+; BE-NEXT: vrev64.32 q9, q9
+; BE-NEXT: vrev64.32 q10, q10
+; BE-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-NEXT: vst1.64 {d18, d19}, [r0:128]!
+; BE-NEXT: vrev64.32 q11, q11
+; BE-NEXT: vmov.32 d15[1], r4
+; BE-NEXT: vst1.64 {d20, d21}, [r0:128]!
+; BE-NEXT: vrev64.32 q15, q6
+; BE-NEXT: vmov.32 d14[1], r5
+; BE-NEXT: vrev64.32 q12, q12
+; BE-NEXT: vst1.64 {d22, d23}, [r0:128]
+; BE-NEXT: add r0, r1, #64
+; BE-NEXT: vrev64.32 q13, q13
+; BE-NEXT: vst1.64 {d30, d31}, [r0:128]!
+; BE-NEXT: vst1.64 {d24, d25}, [r0:128]!
+; BE-NEXT: vrev64.32 q14, q7
+; BE-NEXT: vst1.64 {d26, d27}, [r0:128]!
+; BE-NEXT: vst1.64 {d28, d29}, [r0:128]
+; BE-NEXT: add sp, sp, #168
+; BE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT: add sp, sp, #4
+; BE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-NEON-LABEL: llrint_v16f64:
+; BE-NEON: @ %bb.0:
+; BE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEON-NEXT: .pad #4
+; BE-NEON-NEXT: sub sp, sp, #4
+; BE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT: .pad #168
+; BE-NEON-NEXT: sub sp, sp, #168
+; BE-NEON-NEXT: add lr, sp, #64
+; BE-NEON-NEXT: str r0, [sp, #132] @ 4-byte Spill
+; BE-NEON-NEXT: add r0, sp, #304
+; BE-NEON-NEXT: vorr q4, q3, q3
+; BE-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; BE-NEON-NEXT: add lr, sp, #48
+; BE-NEON-NEXT: vorr d0, d1, d1
+; BE-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-NEON-NEXT: add r0, sp, #320
+; BE-NEON-NEXT: vorr q6, q2, q2
+; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT: add lr, sp, #88
+; BE-NEON-NEXT: vorr q7, q1, q1
+; BE-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-NEON-NEXT: add r0, sp, #272
+; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT: add lr, sp, #112
+; BE-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-NEON-NEXT: add r0, sp, #288
+; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT: add lr, sp, #24
+; BE-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vorr d0, d14, d14
+; BE-NEON-NEXT: add lr, sp, #136
+; BE-NEON-NEXT: vmov.32 d17[0], r0
+; BE-NEON-NEXT: str r1, [sp, #108] @ 4-byte Spill
+; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vorr d0, d15, d15
+; BE-NEON-NEXT: str r1, [sp, #84] @ 4-byte Spill
+; BE-NEON-NEXT: vmov.32 d10[0], r0
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vorr d0, d12, d12
+; BE-NEON-NEXT: add lr, sp, #152
+; BE-NEON-NEXT: vmov.32 d11[0], r0
+; BE-NEON-NEXT: str r1, [sp, #44] @ 4-byte Spill
+; BE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vorr d0, d13, d13
+; BE-NEON-NEXT: mov r6, r1
+; BE-NEON-NEXT: vmov.32 d10[0], r0
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vorr d0, d8, d8
+; BE-NEON-NEXT: mov r4, r1
+; BE-NEON-NEXT: vmov.32 d11[0], r0
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vorr d0, d9, d9
+; BE-NEON-NEXT: mov r7, r1
+; BE-NEON-NEXT: vmov.32 d12[0], r0
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: add lr, sp, #64
+; BE-NEON-NEXT: mov r5, r1
+; BE-NEON-NEXT: vmov.32 d13[0], r0
+; BE-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: add lr, sp, #136
+; BE-NEON-NEXT: mov r9, r1
+; BE-NEON-NEXT: vmov.32 d13[1], r5
+; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT: vmov.32 d16[0], r0
+; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT: add lr, sp, #24
+; BE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-NEON-NEXT: vorr d0, d9, d9
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vorr d0, d8, d8
+; BE-NEON-NEXT: vmov.32 d12[1], r7
+; BE-NEON-NEXT: add lr, sp, #64
+; BE-NEON-NEXT: mov r10, r1
+; BE-NEON-NEXT: vmov.32 d15[0], r0
+; BE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vmov.32 d14[0], r0
+; BE-NEON-NEXT: add lr, sp, #8
+; BE-NEON-NEXT: mov r11, r1
+; BE-NEON-NEXT: vmov.32 d11[1], r4
+; BE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-NEON-NEXT: add lr, sp, #48
+; BE-NEON-NEXT: vorr q6, q5, q5
+; BE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-NEON-NEXT: vorr d0, d9, d9
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vorr d0, d8, d8
+; BE-NEON-NEXT: vmov.32 d12[1], r6
+; BE-NEON-NEXT: add lr, sp, #24
+; BE-NEON-NEXT: mov r8, r1
+; BE-NEON-NEXT: vmov.32 d11[0], r0
+; BE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vmov.32 d10[0], r0
+; BE-NEON-NEXT: add lr, sp, #48
+; BE-NEON-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
+; BE-NEON-NEXT: mov r6, r1
+; BE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEON-NEXT: add lr, sp, #152
+; BE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-NEON-NEXT: add lr, sp, #88
+; BE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-NEON-NEXT: vorr d0, d13, d13
+; BE-NEON-NEXT: vmov.32 d9[1], r0
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vmov.32 d15[0], r0
+; BE-NEON-NEXT: ldr r0, [sp, #84] @ 4-byte Reload
+; BE-NEON-NEXT: vorr d0, d12, d12
+; BE-NEON-NEXT: add lr, sp, #152
+; BE-NEON-NEXT: mov r4, r1
+; BE-NEON-NEXT: vmov.32 d8[1], r0
+; BE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: add lr, sp, #136
+; BE-NEON-NEXT: vmov.32 d14[0], r0
+; BE-NEON-NEXT: ldr r0, [sp, #108] @ 4-byte Reload
+; BE-NEON-NEXT: mov r5, r1
+; BE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEON-NEXT: add lr, sp, #112
+; BE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-NEON-NEXT: vorr d0, d9, d9
+; BE-NEON-NEXT: vmov.32 d11[1], r0
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vorr d0, d8, d8
+; BE-NEON-NEXT: mov r7, r1
+; BE-NEON-NEXT: vmov.32 d13[0], r0
+; BE-NEON-NEXT: vmov.32 d10[1], r9
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: add lr, sp, #8
+; BE-NEON-NEXT: vmov.32 d12[0], r0
+; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT: add lr, sp, #48
+; BE-NEON-NEXT: vmov.32 d17[1], r10
+; BE-NEON-NEXT: vmov.32 d16[1], r11
+; BE-NEON-NEXT: vorr q12, q8, q8
+; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT: add lr, sp, #152
+; BE-NEON-NEXT: vmov.32 d17[1], r8
+; BE-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-NEON-NEXT: add lr, sp, #24
+; BE-NEON-NEXT: vmov.32 d13[1], r7
+; BE-NEON-NEXT: vmov.32 d16[1], r6
+; BE-NEON-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload
+; BE-NEON-NEXT: add lr, sp, #64
+; BE-NEON-NEXT: vorr q13, q8, q8
+; BE-NEON-NEXT: vmov.32 d12[1], r1
+; BE-NEON-NEXT: ldr r1, [sp, #132] @ 4-byte Reload
+; BE-NEON-NEXT: vrev64.32 q8, q5
+; BE-NEON-NEXT: mov r0, r1
+; BE-NEON-NEXT: vldmia lr, {d22, d23} @ 16-byte Reload
+; BE-NEON-NEXT: vrev64.32 q9, q9
+; BE-NEON-NEXT: vrev64.32 q10, q10
+; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]!
+; BE-NEON-NEXT: vrev64.32 q11, q11
+; BE-NEON-NEXT: vmov.32 d15[1], r4
+; BE-NEON-NEXT: vst1.64 {d20, d21}, [r0:128]!
+; BE-NEON-NEXT: vrev64.32 q15, q6
+; BE-NEON-NEXT: vmov.32 d14[1], r5
+; BE-NEON-NEXT: vrev64.32 q12, q12
+; BE-NEON-NEXT: vst1.64 {d22, d23}, [r0:128]
+; BE-NEON-NEXT: add r0, r1, #64
+; BE-NEON-NEXT: vrev64.32 q13, q13
+; BE-NEON-NEXT: vst1.64 {d30, d31}, [r0:128]!
+; BE-NEON-NEXT: vst1.64 {d24, d25}, [r0:128]!
+; BE-NEON-NEXT: vrev64.32 q14, q7
+; BE-NEON-NEXT: vst1.64 {d26, d27}, [r0:128]!
+; BE-NEON-NEXT: vst1.64 {d28, d29}, [r0:128]
+; BE-NEON-NEXT: add sp, sp, #168
+; BE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT: add sp, sp, #4
+; BE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+ %a = call <16 x i64> @llvm.llrint.v16i64.v16f64(<16 x double> %x)
+ ret <16 x i64> %a
+}
+declare <16 x i64> @llvm.llrint.v16i64.v16f64(<16 x double>)
+
+define <32 x i64> @llrint_v32f64(<32 x double> %x) {
+; LE-LABEL: llrint_v32f64:
+; LE: @ %bb.0:
+; LE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEXT: .pad #4
+; LE-NEXT: sub sp, sp, #4
+; LE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT: .pad #208
+; LE-NEXT: sub sp, sp, #208
+; LE-NEXT: add lr, sp, #72
+; LE-NEXT: str r0, [sp, #156] @ 4-byte Spill
+; LE-NEXT: add r0, sp, #456
+; LE-NEXT: vorr q4, q0, q0
+; LE-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #56
+; LE-NEXT: vorr d0, d7, d7
+; LE-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #104
+; LE-NEXT: vorr q5, q2, q2
+; LE-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-NEXT: add r0, sp, #344
+; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #192
+; LE-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-NEXT: add r0, sp, #376
+; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #160
+; LE-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-NEXT: add r0, sp, #360
+; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #136
+; LE-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-NEXT: add r0, sp, #440
+; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #176
+; LE-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT: bl llrint
+; LE-NEXT: vorr d0, d10, d10
+; LE-NEXT: str r1, [sp, #120] @ 4-byte Spill
+; LE-NEXT: vmov.32 d13[0], r0
+; LE-NEXT: bl llrint
+; LE-NEXT: vorr d0, d11, d11
+; LE-NEXT: mov r10, r1
+; LE-NEXT: vmov.32 d14[0], r0
+; LE-NEXT: bl llrint
+; LE-NEXT: vorr d0, d8, d8
+; LE-NEXT: add lr, sp, #88
+; LE-NEXT: vmov.32 d15[0], r0
+; LE-NEXT: mov r11, r1
+; LE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEXT: bl llrint
+; LE-NEXT: vorr d0, d9, d9
+; LE-NEXT: mov r9, r1
+; LE-NEXT: vmov.32 d10[0], r0
+; LE-NEXT: bl llrint
+; LE-NEXT: add lr, sp, #40
+; LE-NEXT: vmov.32 d11[0], r0
+; LE-NEXT: mov r6, r1
+; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #56
+; LE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEXT: vorr d0, d10, d10
+; LE-NEXT: bl llrint
+; LE-NEXT: vorr d0, d11, d11
+; LE-NEXT: mov r4, r1
+; LE-NEXT: vmov.32 d8[0], r0
+; LE-NEXT: bl llrint
+; LE-NEXT: add lr, sp, #72
+; LE-NEXT: mov r7, r1
+; LE-NEXT: vmov.32 d9[0], r0
+; LE-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; LE-NEXT: bl llrint
+; LE-NEXT: vmov.32 d12[0], r0
+; LE-NEXT: add lr, sp, #56
+; LE-NEXT: mov r5, r1
+; LE-NEXT: vmov.32 d9[1], r7
+; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #176
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: vorr d0, d17, d17
+; LE-NEXT: bl llrint
+; LE-NEXT: vmov.32 d8[1], r4
+; LE-NEXT: add lr, sp, #72
+; LE-NEXT: mov r7, r1
+; LE-NEXT: vmov.32 d13[0], r0
+; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #104
+; LE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEXT: vorr d0, d9, d9
+; LE-NEXT: bl llrint
+; LE-NEXT: add lr, sp, #40
+; LE-NEXT: vorr d0, d8, d8
+; LE-NEXT: mov r8, r1
+; LE-NEXT: vmov.32 d15[0], r0
+; LE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEXT: vmov.32 d11[1], r6
+; LE-NEXT: bl llrint
+; LE-NEXT: add lr, sp, #176
+; LE-NEXT: mov r4, r1
+; LE-NEXT: vmov.32 d14[0], r0
+; LE-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; LE-NEXT: vmov.32 d10[1], r9
+; LE-NEXT: bl llrint
+; LE-NEXT: add lr, sp, #88
+; LE-NEXT: vmov.32 d12[0], r0
+; LE-NEXT: ldr r0, [sp, #120] @ 4-byte Reload
+; LE-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #56
+; LE-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #24
+; LE-NEXT: vmov.32 d19[1], r0
+; LE-NEXT: add r0, sp, #408
+; LE-NEXT: ldr r2, [sp, #156] @ 4-byte Reload
+; LE-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-NEXT: vmov.32 d13[1], r7
+; LE-NEXT: mov r0, r2
+; LE-NEXT: vmov.32 d12[1], r1
+; LE-NEXT: add r1, sp, #488
+; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #40
+; LE-NEXT: vld1.64 {d16, d17}, [r1]
+; LE-NEXT: add r1, sp, #472
+; LE-NEXT: vst1.64 {d10, d11}, [r0:128]!
+; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #72
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #56
+; LE-NEXT: vmov.32 d21[1], r11
+; LE-NEXT: vmov.32 d20[1], r10
+; LE-NEXT: add r10, r2, #192
+; LE-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; LE-NEXT: vld1.64 {d16, d17}, [r1]
+; LE-NEXT: add r1, sp, #392
+; LE-NEXT: vmov.32 d18[1], r5
+; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #72
+; LE-NEXT: vst1.64 {d20, d21}, [r0:128]!
+; LE-NEXT: vld1.64 {d16, d17}, [r1]
+; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #104
+; LE-NEXT: vst1.64 {d18, d19}, [r0:128]
+; LE-NEXT: add r0, sp, #312
+; LE-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-NEXT: add r0, sp, #328
+; LE-NEXT: vmov.32 d15[1], r8
+; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #120
+; LE-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-NEXT: add r0, sp, #424
+; LE-NEXT: vmov.32 d14[1], r4
+; LE-NEXT: vst1.64 {d12, d13}, [r10:128]!
+; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #176
+; LE-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-NEXT: vst1.64 {d14, d15}, [r10:128]!
+; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #192
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: vorr d0, d17, d17
+; LE-NEXT: bl llrint
+; LE-NEXT: add lr, sp, #136
+; LE-NEXT: mov r9, r1
+; LE-NEXT: vmov.32 d9[0], r0
+; LE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEXT: vorr d0, d10, d10
+; LE-NEXT: bl llrint
+; LE-NEXT: vorr d0, d11, d11
+; LE-NEXT: mov r8, r1
+; LE-NEXT: vmov.32 d12[0], r0
+; LE-NEXT: bl llrint
+; LE-NEXT: add lr, sp, #160
+; LE-NEXT: mov r5, r1
+; LE-NEXT: vmov.32 d13[0], r0
+; LE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEXT: vorr d0, d10, d10
+; LE-NEXT: bl llrint
+; LE-NEXT: vorr d0, d11, d11
+; LE-NEXT: mov r6, r1
+; LE-NEXT: vmov.32 d14[0], r0
+; LE-NEXT: bl llrint
+; LE-NEXT: add lr, sp, #192
+; LE-NEXT: mov r4, r1
+; LE-NEXT: vmov.32 d15[0], r0
+; LE-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; LE-NEXT: bl llrint
+; LE-NEXT: vmov.32 d8[0], r0
+; LE-NEXT: add lr, sp, #192
+; LE-NEXT: mov r11, r1
+; LE-NEXT: vmov.32 d15[1], r4
+; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #176
+; LE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEXT: vorr d0, d9, d9
+; LE-NEXT: bl llrint
+; LE-NEXT: vorr d0, d8, d8
+; LE-NEXT: vmov.32 d14[1], r6
+; LE-NEXT: add lr, sp, #136
+; LE-NEXT: mov r4, r1
+; LE-NEXT: vmov.32 d11[0], r0
+; LE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEXT: bl llrint
+; LE-NEXT: vmov.32 d10[0], r0
+; LE-NEXT: add lr, sp, #176
+; LE-NEXT: mov r7, r1
+; LE-NEXT: vmov.32 d13[1], r5
+; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #24
+; LE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEXT: vorr d0, d9, d9
+; LE-NEXT: bl llrint
+; LE-NEXT: vorr d0, d8, d8
+; LE-NEXT: vmov.32 d12[1], r8
+; LE-NEXT: add lr, sp, #88
+; LE-NEXT: mov r5, r1
+; LE-NEXT: vmov.32 d15[0], r0
+; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEXT: bl llrint
+; LE-NEXT: add lr, sp, #192
+; LE-NEXT: str r1, [sp, #24] @ 4-byte Spill
+; LE-NEXT: vmov.32 d14[0], r0
+; LE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #40
+; LE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEXT: vorr d0, d11, d11
+; LE-NEXT: vmov.32 d9[1], r9
+; LE-NEXT: bl llrint
+; LE-NEXT: vorr d0, d10, d10
+; LE-NEXT: vmov.32 d8[1], r11
+; LE-NEXT: add lr, sp, #192
+; LE-NEXT: mov r6, r1
+; LE-NEXT: vmov.32 d13[0], r0
+; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEXT: bl llrint
+; LE-NEXT: vmov.32 d12[0], r0
+; LE-NEXT: add lr, sp, #160
+; LE-NEXT: str r1, [sp, #40] @ 4-byte Spill
+; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #176
+; LE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #56
+; LE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEXT: vorr d0, d9, d9
+; LE-NEXT: vmov.32 d11[1], r4
+; LE-NEXT: bl llrint
+; LE-NEXT: vorr d0, d8, d8
+; LE-NEXT: vmov.32 d10[1], r7
+; LE-NEXT: add lr, sp, #176
+; LE-NEXT: mov r8, r1
+; LE-NEXT: vmov.32 d13[0], r0
+; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEXT: bl llrint
+; LE-NEXT: vmov.32 d12[0], r0
+; LE-NEXT: add lr, sp, #8
+; LE-NEXT: mov r11, r1
+; LE-NEXT: vmov.32 d15[1], r5
+; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #72
+; LE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEXT: vorr d0, d9, d9
+; LE-NEXT: bl llrint
+; LE-NEXT: vmov.32 d11[0], r0
+; LE-NEXT: ldr r0, [sp, #24] @ 4-byte Reload
+; LE-NEXT: vorr d0, d8, d8
+; LE-NEXT: add lr, sp, #56
+; LE-NEXT: mov r9, r1
+; LE-NEXT: vmov.32 d14[1], r0
+; LE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEXT: bl llrint
+; LE-NEXT: vmov.32 d10[0], r0
+; LE-NEXT: add lr, sp, #72
+; LE-NEXT: mov r7, r1
+; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #160
+; LE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #104
+; LE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEXT: vorr d0, d9, d9
+; LE-NEXT: vmov.32 d13[1], r6
+; LE-NEXT: bl llrint
+; LE-NEXT: vmov.32 d11[0], r0
+; LE-NEXT: ldr r0, [sp, #40] @ 4-byte Reload
+; LE-NEXT: vorr d0, d8, d8
+; LE-NEXT: add lr, sp, #160
+; LE-NEXT: mov r6, r1
+; LE-NEXT: vmov.32 d12[1], r0
+; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEXT: bl llrint
+; LE-NEXT: add lr, sp, #8
+; LE-NEXT: mov r4, r1
+; LE-NEXT: vmov.32 d10[0], r0
+; LE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #120
+; LE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEXT: vorr d0, d9, d9
+; LE-NEXT: vmov.32 d13[1], r8
+; LE-NEXT: bl llrint
+; LE-NEXT: vorr d0, d8, d8
+; LE-NEXT: mov r5, r1
+; LE-NEXT: vmov.32 d15[0], r0
+; LE-NEXT: vmov.32 d12[1], r11
+; LE-NEXT: bl llrint
+; LE-NEXT: add lr, sp, #72
+; LE-NEXT: vmov.32 d14[0], r0
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #160
+; LE-NEXT: vmov.32 d17[1], r9
+; LE-NEXT: vmov.32 d16[1], r7
+; LE-NEXT: vst1.64 {d12, d13}, [r10:128]!
+; LE-NEXT: vorr q9, q8, q8
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #136
+; LE-NEXT: vmov.32 d15[1], r5
+; LE-NEXT: vst1.64 {d16, d17}, [r10:128]
+; LE-NEXT: vmov.32 d14[1], r1
+; LE-NEXT: ldr r1, [sp, #156] @ 4-byte Reload
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: add r0, r1, #128
+; LE-NEXT: add lr, sp, #56
+; LE-NEXT: vmov.32 d11[1], r6
+; LE-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; LE-NEXT: vst1.64 {d18, d19}, [r0:128]!
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #176
+; LE-NEXT: vmov.32 d10[1], r4
+; LE-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #192
+; LE-NEXT: vst1.64 {d16, d17}, [r0:128]
+; LE-NEXT: add r0, r1, #64
+; LE-NEXT: vst1.64 {d10, d11}, [r0:128]!
+; LE-NEXT: vst1.64 {d14, d15}, [r0:128]!
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #88
+; LE-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: vst1.64 {d16, d17}, [r0:128]
+; LE-NEXT: add sp, sp, #208
+; LE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT: add sp, sp, #4
+; LE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; LE-NEON-LABEL: llrint_v32f64:
+; LE-NEON: @ %bb.0:
+; LE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEON-NEXT: .pad #4
+; LE-NEON-NEXT: sub sp, sp, #4
+; LE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT: .pad #208
+; LE-NEON-NEXT: sub sp, sp, #208
+; LE-NEON-NEXT: add lr, sp, #72
+; LE-NEON-NEXT: str r0, [sp, #156] @ 4-byte Spill
+; LE-NEON-NEXT: add r0, sp, #456
+; LE-NEON-NEXT: vorr q4, q0, q0
+; LE-NEON-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #56
+; LE-NEON-NEXT: vorr d0, d7, d7
+; LE-NEON-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #104
+; LE-NEON-NEXT: vorr q5, q2, q2
+; LE-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-NEON-NEXT: add r0, sp, #344
+; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #192
+; LE-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-NEON-NEXT: add r0, sp, #376
+; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #160
+; LE-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-NEON-NEXT: add r0, sp, #360
+; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #136
+; LE-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-NEON-NEXT: add r0, sp, #440
+; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #176
+; LE-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: vorr d0, d10, d10
+; LE-NEON-NEXT: str r1, [sp, #120] @ 4-byte Spill
+; LE-NEON-NEXT: vmov.32 d13[0], r0
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: vorr d0, d11, d11
+; LE-NEON-NEXT: mov r10, r1
+; LE-NEON-NEXT: vmov.32 d14[0], r0
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: vorr d0, d8, d8
+; LE-NEON-NEXT: add lr, sp, #88
+; LE-NEON-NEXT: vmov.32 d15[0], r0
+; LE-NEON-NEXT: mov r11, r1
+; LE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: vorr d0, d9, d9
+; LE-NEON-NEXT: mov r9, r1
+; LE-NEON-NEXT: vmov.32 d10[0], r0
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: add lr, sp, #40
+; LE-NEON-NEXT: vmov.32 d11[0], r0
+; LE-NEON-NEXT: mov r6, r1
+; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #56
+; LE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEON-NEXT: vorr d0, d10, d10
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: vorr d0, d11, d11
+; LE-NEON-NEXT: mov r4, r1
+; LE-NEON-NEXT: vmov.32 d8[0], r0
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: add lr, sp, #72
+; LE-NEON-NEXT: mov r7, r1
+; LE-NEON-NEXT: vmov.32 d9[0], r0
+; LE-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: vmov.32 d12[0], r0
+; LE-NEON-NEXT: add lr, sp, #56
+; LE-NEON-NEXT: mov r5, r1
+; LE-NEON-NEXT: vmov.32 d9[1], r7
+; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #176
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: vorr d0, d17, d17
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: vmov.32 d8[1], r4
+; LE-NEON-NEXT: add lr, sp, #72
+; LE-NEON-NEXT: mov r7, r1
+; LE-NEON-NEXT: vmov.32 d13[0], r0
+; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #104
+; LE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEON-NEXT: vorr d0, d9, d9
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: add lr, sp, #40
+; LE-NEON-NEXT: vorr d0, d8, d8
+; LE-NEON-NEXT: mov r8, r1
+; LE-NEON-NEXT: vmov.32 d15[0], r0
+; LE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEON-NEXT: vmov.32 d11[1], r6
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: add lr, sp, #176
+; LE-NEON-NEXT: mov r4, r1
+; LE-NEON-NEXT: vmov.32 d14[0], r0
+; LE-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; LE-NEON-NEXT: vmov.32 d10[1], r9
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: add lr, sp, #88
+; LE-NEON-NEXT: vmov.32 d12[0], r0
+; LE-NEON-NEXT: ldr r0, [sp, #120] @ 4-byte Reload
+; LE-NEON-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #56
+; LE-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #24
+; LE-NEON-NEXT: vmov.32 d19[1], r0
+; LE-NEON-NEXT: add r0, sp, #408
+; LE-NEON-NEXT: ldr r2, [sp, #156] @ 4-byte Reload
+; LE-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-NEON-NEXT: vmov.32 d13[1], r7
+; LE-NEON-NEXT: mov r0, r2
+; LE-NEON-NEXT: vmov.32 d12[1], r1
+; LE-NEON-NEXT: add r1, sp, #488
+; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #40
+; LE-NEON-NEXT: vld1.64 {d16, d17}, [r1]
+; LE-NEON-NEXT: add r1, sp, #472
+; LE-NEON-NEXT: vst1.64 {d10, d11}, [r0:128]!
+; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #72
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #56
+; LE-NEON-NEXT: vmov.32 d21[1], r11
+; LE-NEON-NEXT: vmov.32 d20[1], r10
+; LE-NEON-NEXT: add r10, r2, #192
+; LE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; LE-NEON-NEXT: vld1.64 {d16, d17}, [r1]
+; LE-NEON-NEXT: add r1, sp, #392
+; LE-NEON-NEXT: vmov.32 d18[1], r5
+; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #72
+; LE-NEON-NEXT: vst1.64 {d20, d21}, [r0:128]!
+; LE-NEON-NEXT: vld1.64 {d16, d17}, [r1]
+; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #104
+; LE-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]
+; LE-NEON-NEXT: add r0, sp, #312
+; LE-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-NEON-NEXT: add r0, sp, #328
+; LE-NEON-NEXT: vmov.32 d15[1], r8
+; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #120
+; LE-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-NEON-NEXT: add r0, sp, #424
+; LE-NEON-NEXT: vmov.32 d14[1], r4
+; LE-NEON-NEXT: vst1.64 {d12, d13}, [r10:128]!
+; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #176
+; LE-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-NEON-NEXT: vst1.64 {d14, d15}, [r10:128]!
+; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #192
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: vorr d0, d17, d17
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: add lr, sp, #136
+; LE-NEON-NEXT: mov r9, r1
+; LE-NEON-NEXT: vmov.32 d9[0], r0
+; LE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEON-NEXT: vorr d0, d10, d10
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: vorr d0, d11, d11
+; LE-NEON-NEXT: mov r8, r1
+; LE-NEON-NEXT: vmov.32 d12[0], r0
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: add lr, sp, #160
+; LE-NEON-NEXT: mov r5, r1
+; LE-NEON-NEXT: vmov.32 d13[0], r0
+; LE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEON-NEXT: vorr d0, d10, d10
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: vorr d0, d11, d11
+; LE-NEON-NEXT: mov r6, r1
+; LE-NEON-NEXT: vmov.32 d14[0], r0
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: add lr, sp, #192
+; LE-NEON-NEXT: mov r4, r1
+; LE-NEON-NEXT: vmov.32 d15[0], r0
+; LE-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: vmov.32 d8[0], r0
+; LE-NEON-NEXT: add lr, sp, #192
+; LE-NEON-NEXT: mov r11, r1
+; LE-NEON-NEXT: vmov.32 d15[1], r4
+; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #176
+; LE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEON-NEXT: vorr d0, d9, d9
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: vorr d0, d8, d8
+; LE-NEON-NEXT: vmov.32 d14[1], r6
+; LE-NEON-NEXT: add lr, sp, #136
+; LE-NEON-NEXT: mov r4, r1
+; LE-NEON-NEXT: vmov.32 d11[0], r0
+; LE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: vmov.32 d10[0], r0
+; LE-NEON-NEXT: add lr, sp, #176
+; LE-NEON-NEXT: mov r7, r1
+; LE-NEON-NEXT: vmov.32 d13[1], r5
+; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #24
+; LE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEON-NEXT: vorr d0, d9, d9
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: vorr d0, d8, d8
+; LE-NEON-NEXT: vmov.32 d12[1], r8
+; LE-NEON-NEXT: add lr, sp, #88
+; LE-NEON-NEXT: mov r5, r1
+; LE-NEON-NEXT: vmov.32 d15[0], r0
+; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: add lr, sp, #192
+; LE-NEON-NEXT: str r1, [sp, #24] @ 4-byte Spill
+; LE-NEON-NEXT: vmov.32 d14[0], r0
+; LE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #40
+; LE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEON-NEXT: vorr d0, d11, d11
+; LE-NEON-NEXT: vmov.32 d9[1], r9
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: vorr d0, d10, d10
+; LE-NEON-NEXT: vmov.32 d8[1], r11
+; LE-NEON-NEXT: add lr, sp, #192
+; LE-NEON-NEXT: mov r6, r1
+; LE-NEON-NEXT: vmov.32 d13[0], r0
+; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: vmov.32 d12[0], r0
+; LE-NEON-NEXT: add lr, sp, #160
+; LE-NEON-NEXT: str r1, [sp, #40] @ 4-byte Spill
+; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #176
+; LE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #56
+; LE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEON-NEXT: vorr d0, d9, d9
+; LE-NEON-NEXT: vmov.32 d11[1], r4
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: vorr d0, d8, d8
+; LE-NEON-NEXT: vmov.32 d10[1], r7
+; LE-NEON-NEXT: add lr, sp, #176
+; LE-NEON-NEXT: mov r8, r1
+; LE-NEON-NEXT: vmov.32 d13[0], r0
+; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: vmov.32 d12[0], r0
+; LE-NEON-NEXT: add lr, sp, #8
+; LE-NEON-NEXT: mov r11, r1
+; LE-NEON-NEXT: vmov.32 d15[1], r5
+; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #72
+; LE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEON-NEXT: vorr d0, d9, d9
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: vmov.32 d11[0], r0
+; LE-NEON-NEXT: ldr r0, [sp, #24] @ 4-byte Reload
+; LE-NEON-NEXT: vorr d0, d8, d8
+; LE-NEON-NEXT: add lr, sp, #56
+; LE-NEON-NEXT: mov r9, r1
+; LE-NEON-NEXT: vmov.32 d14[1], r0
+; LE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: vmov.32 d10[0], r0
+; LE-NEON-NEXT: add lr, sp, #72
+; LE-NEON-NEXT: mov r7, r1
+; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #160
+; LE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #104
+; LE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEON-NEXT: vorr d0, d9, d9
+; LE-NEON-NEXT: vmov.32 d13[1], r6
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: vmov.32 d11[0], r0
+; LE-NEON-NEXT: ldr r0, [sp, #40] @ 4-byte Reload
+; LE-NEON-NEXT: vorr d0, d8, d8
+; LE-NEON-NEXT: add lr, sp, #160
+; LE-NEON-NEXT: mov r6, r1
+; LE-NEON-NEXT: vmov.32 d12[1], r0
+; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: add lr, sp, #8
+; LE-NEON-NEXT: mov r4, r1
+; LE-NEON-NEXT: vmov.32 d10[0], r0
+; LE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #120
+; LE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEON-NEXT: vorr d0, d9, d9
+; LE-NEON-NEXT: vmov.32 d13[1], r8
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: vorr d0, d8, d8
+; LE-NEON-NEXT: mov r5, r1
+; LE-NEON-NEXT: vmov.32 d15[0], r0
+; LE-NEON-NEXT: vmov.32 d12[1], r11
+; LE-NEON-NEXT: bl llrint
+; LE-NEON-NEXT: add lr, sp, #72
+; LE-NEON-NEXT: vmov.32 d14[0], r0
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #160
+; LE-NEON-NEXT: vmov.32 d17[1], r9
+; LE-NEON-NEXT: vmov.32 d16[1], r7
+; LE-NEON-NEXT: vst1.64 {d12, d13}, [r10:128]!
+; LE-NEON-NEXT: vorr q9, q8, q8
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #136
+; LE-NEON-NEXT: vmov.32 d15[1], r5
+; LE-NEON-NEXT: vst1.64 {d16, d17}, [r10:128]
+; LE-NEON-NEXT: vmov.32 d14[1], r1
+; LE-NEON-NEXT: ldr r1, [sp, #156] @ 4-byte Reload
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: add r0, r1, #128
+; LE-NEON-NEXT: add lr, sp, #56
+; LE-NEON-NEXT: vmov.32 d11[1], r6
+; LE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; LE-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]!
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #176
+; LE-NEON-NEXT: vmov.32 d10[1], r4
+; LE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #192
+; LE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]
+; LE-NEON-NEXT: add r0, r1, #64
+; LE-NEON-NEXT: vst1.64 {d10, d11}, [r0:128]!
+; LE-NEON-NEXT: vst1.64 {d14, d15}, [r0:128]!
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #88
+; LE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]
+; LE-NEON-NEXT: add sp, sp, #208
+; LE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT: add sp, sp, #4
+; LE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-LABEL: llrint_v32f64:
+; BE: @ %bb.0:
+; BE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEXT: .pad #4
+; BE-NEXT: sub sp, sp, #4
+; BE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT: .pad #232
+; BE-NEXT: sub sp, sp, #232
+; BE-NEXT: add lr, sp, #184
+; BE-NEXT: str r0, [sp, #148] @ 4-byte Spill
+; BE-NEXT: add r0, sp, #416
+; BE-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill
+; BE-NEXT: add lr, sp, #168
+; BE-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill
+; BE-NEXT: add lr, sp, #152
+; BE-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill
+; BE-NEXT: add lr, sp, #128
+; BE-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; BE-NEXT: add lr, sp, #200
+; BE-NEXT: vld1.64 {d18, d19}, [r0]
+; BE-NEXT: add r0, sp, #448
+; BE-NEXT: vorr d0, d19, d19
+; BE-NEXT: vld1.64 {d14, d15}, [r0]
+; BE-NEXT: add r0, sp, #336
+; BE-NEXT: vstmia lr, {d18, d19} @ 16-byte Spill
+; BE-NEXT: add lr, sp, #64
+; BE-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-NEXT: add r0, sp, #400
+; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT: add lr, sp, #8
+; BE-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-NEXT: add r0, sp, #352
+; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT: add lr, sp, #24
+; BE-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-NEXT: add r0, sp, #368
+; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT: add lr, sp, #48
+; BE-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-NEXT: add r0, sp, #384
+; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT: add lr, sp, #96
+; BE-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-NEXT: add r0, sp, #512
+; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT: add lr, sp, #112
+; BE-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-NEXT: add r0, sp, #432
+; BE-NEXT: vld1.64 {d8, d9}, [r0]
+; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT: bl llrint
+; BE-NEXT: vorr d0, d8, d8
+; BE-NEXT: str r1, [sp, #80] @ 4-byte Spill
+; BE-NEXT: vmov.32 d13[0], r0
+; BE-NEXT: bl llrint
+; BE-NEXT: vorr d0, d9, d9
+; BE-NEXT: str r1, [sp, #44] @ 4-byte Spill
+; BE-NEXT: vmov.32 d10[0], r0
+; BE-NEXT: bl llrint
+; BE-NEXT: vorr d0, d14, d14
+; BE-NEXT: add lr, sp, #216
+; BE-NEXT: vmov.32 d11[0], r0
+; BE-NEXT: mov r9, r1
+; BE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEXT: bl llrint
+; BE-NEXT: vorr d0, d15, d15
+; BE-NEXT: mov r8, r1
+; BE-NEXT: vmov.32 d8[0], r0
+; BE-NEXT: bl llrint
+; BE-NEXT: add lr, sp, #64
+; BE-NEXT: mov r4, r1
+; BE-NEXT: vmov.32 d9[0], r0
+; BE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEXT: vorr d0, d10, d10
+; BE-NEXT: bl llrint
+; BE-NEXT: vorr d0, d11, d11
+; BE-NEXT: mov r6, r1
+; BE-NEXT: vmov.32 d14[0], r0
+; BE-NEXT: bl llrint
+; BE-NEXT: add lr, sp, #200
+; BE-NEXT: mov r7, r1
+; BE-NEXT: vmov.32 d15[0], r0
+; BE-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; BE-NEXT: bl llrint
+; BE-NEXT: vmov.32 d12[0], r0
+; BE-NEXT: add lr, sp, #200
+; BE-NEXT: mov r5, r1
+; BE-NEXT: vmov.32 d15[1], r7
+; BE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-NEXT: add lr, sp, #8
+; BE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEXT: vorr d0, d11, d11
+; BE-NEXT: bl llrint
+; BE-NEXT: vorr d0, d10, d10
+; BE-NEXT: vmov.32 d14[1], r6
+; BE-NEXT: add lr, sp, #64
+; BE-NEXT: mov r10, r1
+; BE-NEXT: vmov.32 d13[0], r0
+; BE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-NEXT: bl llrint
+; BE-NEXT: add lr, sp, #24
+; BE-NEXT: mov r11, r1
+; BE-NEXT: vmov.32 d12[0], r0
+; BE-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload
+; BE-NEXT: vorr d0, d15, d15
+; BE-NEXT: vmov.32 d9[1], r4
+; BE-NEXT: bl llrint
+; BE-NEXT: vorr d0, d14, d14
+; BE-NEXT: vmov.32 d8[1], r8
+; BE-NEXT: add lr, sp, #8
+; BE-NEXT: str r1, [sp, #4] @ 4-byte Spill
+; BE-NEXT: vmov.32 d11[0], r0
+; BE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; BE-NEXT: bl llrint
+; BE-NEXT: vmov.32 d10[0], r0
+; BE-NEXT: add lr, sp, #24
+; BE-NEXT: mov r8, r1
+; BE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEXT: add lr, sp, #216
+; BE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEXT: add lr, sp, #48
+; BE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-NEXT: vorr d0, d9, d9
+; BE-NEXT: vmov.32 d11[1], r9
+; BE-NEXT: bl llrint
+; BE-NEXT: vmov.32 d15[0], r0
+; BE-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
+; BE-NEXT: vorr d0, d8, d8
+; BE-NEXT: add lr, sp, #216
+; BE-NEXT: mov r9, r1
+; BE-NEXT: vmov.32 d10[1], r0
+; BE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEXT: bl llrint
+; BE-NEXT: vmov.32 d14[0], r0
+; BE-NEXT: add lr, sp, #48
+; BE-NEXT: ldr r0, [sp, #80] @ 4-byte Reload
+; BE-NEXT: mov r6, r1
+; BE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-NEXT: add lr, sp, #200
+; BE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEXT: add lr, sp, #96
+; BE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-NEXT: vorr d0, d9, d9
+; BE-NEXT: vmov.32 d11[1], r0
+; BE-NEXT: bl llrint
+; BE-NEXT: vorr d0, d8, d8
+; BE-NEXT: vmov.32 d10[1], r5
+; BE-NEXT: add lr, sp, #200
+; BE-NEXT: mov r7, r1
+; BE-NEXT: vmov.32 d15[0], r0
+; BE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEXT: bl llrint
+; BE-NEXT: add lr, sp, #112
+; BE-NEXT: vorr q4, q6, q6
+; BE-NEXT: mov r4, r1
+; BE-NEXT: vmov.32 d14[0], r0
+; BE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-NEXT: vorr d0, d13, d13
+; BE-NEXT: vmov.32 d9[1], r10
+; BE-NEXT: bl llrint
+; BE-NEXT: vorr d0, d12, d12
+; BE-NEXT: mov r5, r1
+; BE-NEXT: vmov.32 d11[0], r0
+; BE-NEXT: vmov.32 d8[1], r11
+; BE-NEXT: bl llrint
+; BE-NEXT: vmov.32 d10[0], r0
+; BE-NEXT: add lr, sp, #24
+; BE-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
+; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT: add lr, sp, #48
+; BE-NEXT: vmov.32 d17[1], r0
+; BE-NEXT: vmov.32 d16[1], r8
+; BE-NEXT: vorr q9, q8, q8
+; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT: add lr, sp, #112
+; BE-NEXT: vmov.32 d17[1], r9
+; BE-NEXT: vmov.32 d16[1], r6
+; BE-NEXT: vorr q10, q8, q8
+; BE-NEXT: vrev64.32 q8, q4
+; BE-NEXT: vmov.32 d15[1], r7
+; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT: add lr, sp, #200
+; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT: vmov.32 d11[1], r5
+; BE-NEXT: vrev64.32 q8, q8
+; BE-NEXT: vmov.32 d14[1], r4
+; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT: add lr, sp, #216
+; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT: vmov.32 d10[1], r1
+; BE-NEXT: vrev64.32 q8, q8
+; BE-NEXT: vrev64.32 q6, q7
+; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT: add lr, sp, #8
+; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT: add lr, sp, #96
+; BE-NEXT: vrev64.32 q7, q5
+; BE-NEXT: vrev64.32 q8, q8
+; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT: add lr, sp, #64
+; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT: add lr, sp, #80
+; BE-NEXT: vrev64.32 q8, q8
+; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT: add lr, sp, #64
+; BE-NEXT: vrev64.32 q8, q9
+; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT: add lr, sp, #48
+; BE-NEXT: vrev64.32 q8, q10
+; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT: add lr, sp, #128
+; BE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEXT: vorr d0, d11, d11
+; BE-NEXT: bl llrint
+; BE-NEXT: vorr d0, d10, d10
+; BE-NEXT: mov r4, r1
+; BE-NEXT: vmov.32 d9[0], r0
+; BE-NEXT: bl llrint
+; BE-NEXT: vmov.32 d8[0], r0
+; BE-NEXT: ldr r6, [sp, #148] @ 4-byte Reload
+; BE-NEXT: add lr, sp, #152
+; BE-NEXT: vmov.32 d9[1], r4
+; BE-NEXT: mov r5, r6
+; BE-NEXT: vmov.32 d8[1], r1
+; BE-NEXT: vrev64.32 q8, q4
+; BE-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; BE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEXT: vorr d0, d11, d11
+; BE-NEXT: bl llrint
+; BE-NEXT: vorr d0, d10, d10
+; BE-NEXT: mov r4, r1
+; BE-NEXT: vmov.32 d9[0], r0
+; BE-NEXT: bl llrint
+; BE-NEXT: vmov.32 d8[0], r0
+; BE-NEXT: add lr, sp, #168
+; BE-NEXT: vmov.32 d9[1], r4
+; BE-NEXT: vmov.32 d8[1], r1
+; BE-NEXT: vrev64.32 q8, q4
+; BE-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; BE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEXT: vorr d0, d11, d11
+; BE-NEXT: bl llrint
+; BE-NEXT: vorr d0, d10, d10
+; BE-NEXT: mov r4, r1
+; BE-NEXT: vmov.32 d9[0], r0
+; BE-NEXT: bl llrint
+; BE-NEXT: vmov.32 d8[0], r0
+; BE-NEXT: add lr, sp, #184
+; BE-NEXT: vmov.32 d9[1], r4
+; BE-NEXT: vmov.32 d8[1], r1
+; BE-NEXT: vrev64.32 q8, q4
+; BE-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; BE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEXT: vorr d0, d11, d11
+; BE-NEXT: bl llrint
+; BE-NEXT: vorr d0, d10, d10
+; BE-NEXT: mov r4, r1
+; BE-NEXT: vmov.32 d9[0], r0
+; BE-NEXT: bl llrint
+; BE-NEXT: vmov.32 d8[0], r0
+; BE-NEXT: add r0, sp, #464
+; BE-NEXT: vmov.32 d9[1], r4
+; BE-NEXT: vmov.32 d8[1], r1
+; BE-NEXT: vrev64.32 q8, q4
+; BE-NEXT: vld1.64 {d8, d9}, [r0]
+; BE-NEXT: vorr d0, d9, d9
+; BE-NEXT: vst1.64 {d16, d17}, [r5:128]
+; BE-NEXT: bl llrint
+; BE-NEXT: vorr d0, d8, d8
+; BE-NEXT: mov r4, r1
+; BE-NEXT: vmov.32 d11[0], r0
+; BE-NEXT: bl llrint
+; BE-NEXT: vmov.32 d10[0], r0
+; BE-NEXT: add r0, sp, #480
+; BE-NEXT: add r5, r6, #192
+; BE-NEXT: vmov.32 d11[1], r4
+; BE-NEXT: vmov.32 d10[1], r1
+; BE-NEXT: vld1.64 {d8, d9}, [r0]
+; BE-NEXT: vorr d0, d9, d9
+; BE-NEXT: vrev64.32 q8, q5
+; BE-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; BE-NEXT: bl llrint
+; BE-NEXT: vorr d0, d8, d8
+; BE-NEXT: mov r4, r1
+; BE-NEXT: vmov.32 d11[0], r0
+; BE-NEXT: bl llrint
+; BE-NEXT: vmov.32 d10[0], r0
+; BE-NEXT: add r0, sp, #496
+; BE-NEXT: vmov.32 d11[1], r4
+; BE-NEXT: vmov.32 d10[1], r1
+; BE-NEXT: vld1.64 {d8, d9}, [r0]
+; BE-NEXT: vorr d0, d9, d9
+; BE-NEXT: vrev64.32 q8, q5
+; BE-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; BE-NEXT: bl llrint
+; BE-NEXT: vorr d0, d8, d8
+; BE-NEXT: mov r4, r1
+; BE-NEXT: vmov.32 d11[0], r0
+; BE-NEXT: bl llrint
+; BE-NEXT: vmov.32 d10[0], r0
+; BE-NEXT: add lr, sp, #112
+; BE-NEXT: add r0, r6, #128
+; BE-NEXT: vmov.32 d11[1], r4
+; BE-NEXT: vmov.32 d10[1], r1
+; BE-NEXT: vrev64.32 q8, q5
+; BE-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; BE-NEXT: vst1.64 {d14, d15}, [r5:128]
+; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT: add lr, sp, #200
+; BE-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT: add lr, sp, #216
+; BE-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT: add lr, sp, #96
+; BE-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT: add lr, sp, #80
+; BE-NEXT: vst1.64 {d16, d17}, [r0:128]
+; BE-NEXT: add r0, r6, #64
+; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT: add lr, sp, #64
+; BE-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT: add lr, sp, #48
+; BE-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-NEXT: vst1.64 {d12, d13}, [r0:128]
+; BE-NEXT: add sp, sp, #232
+; BE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT: add sp, sp, #4
+; BE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-NEON-LABEL: llrint_v32f64:
+; BE-NEON: @ %bb.0:
+; BE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEON-NEXT: .pad #4
+; BE-NEON-NEXT: sub sp, sp, #4
+; BE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT: .pad #232
+; BE-NEON-NEXT: sub sp, sp, #232
+; BE-NEON-NEXT: add lr, sp, #184
+; BE-NEON-NEXT: str r0, [sp, #148] @ 4-byte Spill
+; BE-NEON-NEXT: add r0, sp, #416
+; BE-NEON-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill
+; BE-NEON-NEXT: add lr, sp, #168
+; BE-NEON-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill
+; BE-NEON-NEXT: add lr, sp, #152
+; BE-NEON-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill
+; BE-NEON-NEXT: add lr, sp, #128
+; BE-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; BE-NEON-NEXT: add lr, sp, #200
+; BE-NEON-NEXT: vld1.64 {d18, d19}, [r0]
+; BE-NEON-NEXT: add r0, sp, #448
+; BE-NEON-NEXT: vorr d0, d19, d19
+; BE-NEON-NEXT: vld1.64 {d14, d15}, [r0]
+; BE-NEON-NEXT: add r0, sp, #336
+; BE-NEON-NEXT: vstmia lr, {d18, d19} @ 16-byte Spill
+; BE-NEON-NEXT: add lr, sp, #64
+; BE-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-NEON-NEXT: add r0, sp, #400
+; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT: add lr, sp, #8
+; BE-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-NEON-NEXT: add r0, sp, #352
+; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT: add lr, sp, #24
+; BE-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-NEON-NEXT: add r0, sp, #368
+; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT: add lr, sp, #48
+; BE-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-NEON-NEXT: add r0, sp, #384
+; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT: add lr, sp, #96
+; BE-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-NEON-NEXT: add r0, sp, #512
+; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT: add lr, sp, #112
+; BE-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-NEON-NEXT: add r0, sp, #432
+; BE-NEON-NEXT: vld1.64 {d8, d9}, [r0]
+; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vorr d0, d8, d8
+; BE-NEON-NEXT: str r1, [sp, #80] @ 4-byte Spill
+; BE-NEON-NEXT: vmov.32 d13[0], r0
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vorr d0, d9, d9
+; BE-NEON-NEXT: str r1, [sp, #44] @ 4-byte Spill
+; BE-NEON-NEXT: vmov.32 d10[0], r0
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vorr d0, d14, d14
+; BE-NEON-NEXT: add lr, sp, #216
+; BE-NEON-NEXT: vmov.32 d11[0], r0
+; BE-NEON-NEXT: mov r9, r1
+; BE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vorr d0, d15, d15
+; BE-NEON-NEXT: mov r8, r1
+; BE-NEON-NEXT: vmov.32 d8[0], r0
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: add lr, sp, #64
+; BE-NEON-NEXT: mov r4, r1
+; BE-NEON-NEXT: vmov.32 d9[0], r0
+; BE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEON-NEXT: vorr d0, d10, d10
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vorr d0, d11, d11
+; BE-NEON-NEXT: mov r6, r1
+; BE-NEON-NEXT: vmov.32 d14[0], r0
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: add lr, sp, #200
+; BE-NEON-NEXT: mov r7, r1
+; BE-NEON-NEXT: vmov.32 d15[0], r0
+; BE-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vmov.32 d12[0], r0
+; BE-NEON-NEXT: add lr, sp, #200
+; BE-NEON-NEXT: mov r5, r1
+; BE-NEON-NEXT: vmov.32 d15[1], r7
+; BE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-NEON-NEXT: add lr, sp, #8
+; BE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEON-NEXT: vorr d0, d11, d11
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vorr d0, d10, d10
+; BE-NEON-NEXT: vmov.32 d14[1], r6
+; BE-NEON-NEXT: add lr, sp, #64
+; BE-NEON-NEXT: mov r10, r1
+; BE-NEON-NEXT: vmov.32 d13[0], r0
+; BE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: add lr, sp, #24
+; BE-NEON-NEXT: mov r11, r1
+; BE-NEON-NEXT: vmov.32 d12[0], r0
+; BE-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload
+; BE-NEON-NEXT: vorr d0, d15, d15
+; BE-NEON-NEXT: vmov.32 d9[1], r4
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vorr d0, d14, d14
+; BE-NEON-NEXT: vmov.32 d8[1], r8
+; BE-NEON-NEXT: add lr, sp, #8
+; BE-NEON-NEXT: str r1, [sp, #4] @ 4-byte Spill
+; BE-NEON-NEXT: vmov.32 d11[0], r0
+; BE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vmov.32 d10[0], r0
+; BE-NEON-NEXT: add lr, sp, #24
+; BE-NEON-NEXT: mov r8, r1
+; BE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEON-NEXT: add lr, sp, #216
+; BE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEON-NEXT: add lr, sp, #48
+; BE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-NEON-NEXT: vorr d0, d9, d9
+; BE-NEON-NEXT: vmov.32 d11[1], r9
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vmov.32 d15[0], r0
+; BE-NEON-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
+; BE-NEON-NEXT: vorr d0, d8, d8
+; BE-NEON-NEXT: add lr, sp, #216
+; BE-NEON-NEXT: mov r9, r1
+; BE-NEON-NEXT: vmov.32 d10[1], r0
+; BE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vmov.32 d14[0], r0
+; BE-NEON-NEXT: add lr, sp, #48
+; BE-NEON-NEXT: ldr r0, [sp, #80] @ 4-byte Reload
+; BE-NEON-NEXT: mov r6, r1
+; BE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-NEON-NEXT: add lr, sp, #200
+; BE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEON-NEXT: add lr, sp, #96
+; BE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-NEON-NEXT: vorr d0, d9, d9
+; BE-NEON-NEXT: vmov.32 d11[1], r0
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vorr d0, d8, d8
+; BE-NEON-NEXT: vmov.32 d10[1], r5
+; BE-NEON-NEXT: add lr, sp, #200
+; BE-NEON-NEXT: mov r7, r1
+; BE-NEON-NEXT: vmov.32 d15[0], r0
+; BE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: add lr, sp, #112
+; BE-NEON-NEXT: vorr q4, q6, q6
+; BE-NEON-NEXT: mov r4, r1
+; BE-NEON-NEXT: vmov.32 d14[0], r0
+; BE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-NEON-NEXT: vorr d0, d13, d13
+; BE-NEON-NEXT: vmov.32 d9[1], r10
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vorr d0, d12, d12
+; BE-NEON-NEXT: mov r5, r1
+; BE-NEON-NEXT: vmov.32 d11[0], r0
+; BE-NEON-NEXT: vmov.32 d8[1], r11
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vmov.32 d10[0], r0
+; BE-NEON-NEXT: add lr, sp, #24
+; BE-NEON-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
+; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT: add lr, sp, #48
+; BE-NEON-NEXT: vmov.32 d17[1], r0
+; BE-NEON-NEXT: vmov.32 d16[1], r8
+; BE-NEON-NEXT: vorr q9, q8, q8
+; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT: add lr, sp, #112
+; BE-NEON-NEXT: vmov.32 d17[1], r9
+; BE-NEON-NEXT: vmov.32 d16[1], r6
+; BE-NEON-NEXT: vorr q10, q8, q8
+; BE-NEON-NEXT: vrev64.32 q8, q4
+; BE-NEON-NEXT: vmov.32 d15[1], r7
+; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT: add lr, sp, #200
+; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT: vmov.32 d11[1], r5
+; BE-NEON-NEXT: vrev64.32 q8, q8
+; BE-NEON-NEXT: vmov.32 d14[1], r4
+; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT: add lr, sp, #216
+; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT: vmov.32 d10[1], r1
+; BE-NEON-NEXT: vrev64.32 q8, q8
+; BE-NEON-NEXT: vrev64.32 q6, q7
+; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT: add lr, sp, #8
+; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT: add lr, sp, #96
+; BE-NEON-NEXT: vrev64.32 q7, q5
+; BE-NEON-NEXT: vrev64.32 q8, q8
+; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT: add lr, sp, #64
+; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT: add lr, sp, #80
+; BE-NEON-NEXT: vrev64.32 q8, q8
+; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT: add lr, sp, #64
+; BE-NEON-NEXT: vrev64.32 q8, q9
+; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT: add lr, sp, #48
+; BE-NEON-NEXT: vrev64.32 q8, q10
+; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT: add lr, sp, #128
+; BE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEON-NEXT: vorr d0, d11, d11
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vorr d0, d10, d10
+; BE-NEON-NEXT: mov r4, r1
+; BE-NEON-NEXT: vmov.32 d9[0], r0
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vmov.32 d8[0], r0
+; BE-NEON-NEXT: ldr r6, [sp, #148] @ 4-byte Reload
+; BE-NEON-NEXT: add lr, sp, #152
+; BE-NEON-NEXT: vmov.32 d9[1], r4
+; BE-NEON-NEXT: mov r5, r6
+; BE-NEON-NEXT: vmov.32 d8[1], r1
+; BE-NEON-NEXT: vrev64.32 q8, q4
+; BE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; BE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEON-NEXT: vorr d0, d11, d11
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vorr d0, d10, d10
+; BE-NEON-NEXT: mov r4, r1
+; BE-NEON-NEXT: vmov.32 d9[0], r0
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vmov.32 d8[0], r0
+; BE-NEON-NEXT: add lr, sp, #168
+; BE-NEON-NEXT: vmov.32 d9[1], r4
+; BE-NEON-NEXT: vmov.32 d8[1], r1
+; BE-NEON-NEXT: vrev64.32 q8, q4
+; BE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; BE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEON-NEXT: vorr d0, d11, d11
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vorr d0, d10, d10
+; BE-NEON-NEXT: mov r4, r1
+; BE-NEON-NEXT: vmov.32 d9[0], r0
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vmov.32 d8[0], r0
+; BE-NEON-NEXT: add lr, sp, #184
+; BE-NEON-NEXT: vmov.32 d9[1], r4
+; BE-NEON-NEXT: vmov.32 d8[1], r1
+; BE-NEON-NEXT: vrev64.32 q8, q4
+; BE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; BE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEON-NEXT: vorr d0, d11, d11
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vorr d0, d10, d10
+; BE-NEON-NEXT: mov r4, r1
+; BE-NEON-NEXT: vmov.32 d9[0], r0
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vmov.32 d8[0], r0
+; BE-NEON-NEXT: add r0, sp, #464
+; BE-NEON-NEXT: vmov.32 d9[1], r4
+; BE-NEON-NEXT: vmov.32 d8[1], r1
+; BE-NEON-NEXT: vrev64.32 q8, q4
+; BE-NEON-NEXT: vld1.64 {d8, d9}, [r0]
+; BE-NEON-NEXT: vorr d0, d9, d9
+; BE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vorr d0, d8, d8
+; BE-NEON-NEXT: mov r4, r1
+; BE-NEON-NEXT: vmov.32 d11[0], r0
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vmov.32 d10[0], r0
+; BE-NEON-NEXT: add r0, sp, #480
+; BE-NEON-NEXT: add r5, r6, #192
+; BE-NEON-NEXT: vmov.32 d11[1], r4
+; BE-NEON-NEXT: vmov.32 d10[1], r1
+; BE-NEON-NEXT: vld1.64 {d8, d9}, [r0]
+; BE-NEON-NEXT: vorr d0, d9, d9
+; BE-NEON-NEXT: vrev64.32 q8, q5
+; BE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vorr d0, d8, d8
+; BE-NEON-NEXT: mov r4, r1
+; BE-NEON-NEXT: vmov.32 d11[0], r0
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vmov.32 d10[0], r0
+; BE-NEON-NEXT: add r0, sp, #496
+; BE-NEON-NEXT: vmov.32 d11[1], r4
+; BE-NEON-NEXT: vmov.32 d10[1], r1
+; BE-NEON-NEXT: vld1.64 {d8, d9}, [r0]
+; BE-NEON-NEXT: vorr d0, d9, d9
+; BE-NEON-NEXT: vrev64.32 q8, q5
+; BE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vorr d0, d8, d8
+; BE-NEON-NEXT: mov r4, r1
+; BE-NEON-NEXT: vmov.32 d11[0], r0
+; BE-NEON-NEXT: bl llrint
+; BE-NEON-NEXT: vmov.32 d10[0], r0
+; BE-NEON-NEXT: add lr, sp, #112
+; BE-NEON-NEXT: add r0, r6, #128
+; BE-NEON-NEXT: vmov.32 d11[1], r4
+; BE-NEON-NEXT: vmov.32 d10[1], r1
+; BE-NEON-NEXT: vrev64.32 q8, q5
+; BE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; BE-NEON-NEXT: vst1.64 {d14, d15}, [r5:128]
+; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT: add lr, sp, #200
+; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT: add lr, sp, #216
+; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT: add lr, sp, #96
+; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT: add lr, sp, #80
+; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]
+; BE-NEON-NEXT: add r0, r6, #64
+; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT: add lr, sp, #64
+; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT: add lr, sp, #48
+; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-NEON-NEXT: vst1.64 {d12, d13}, [r0:128]
+; BE-NEON-NEXT: add sp, sp, #232
+; BE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT: add sp, sp, #4
+; BE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+ %a = call <32 x i64> @llvm.llrint.v32i64.v16f64(<32 x double> %x)
+ ret <32 x i64> %a
+}
+declare <32 x i64> @llvm.llrint.v32i64.v32f64(<32 x double>)
+
+define <1 x i64> @llrint_v1i64_v1f128(<1 x fp128> %x) {
+; LE-LABEL: llrint_v1i64_v1f128:
+; LE: @ %bb.0:
+; LE-NEXT: .save {r11, lr}
+; LE-NEXT: push {r11, lr}
+; LE-NEXT: bl llrintl
+; LE-NEXT: vmov.32 d0[0], r0
+; LE-NEXT: vmov.32 d0[1], r1
+; LE-NEXT: pop {r11, pc}
+;
+; LE-NEON-LABEL: llrint_v1i64_v1f128:
+; LE-NEON: @ %bb.0:
+; LE-NEON-NEXT: .save {r11, lr}
+; LE-NEON-NEXT: push {r11, lr}
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: vmov.32 d0[0], r0
+; LE-NEON-NEXT: vmov.32 d0[1], r1
+; LE-NEON-NEXT: pop {r11, pc}
+;
+; BE-LABEL: llrint_v1i64_v1f128:
+; BE: @ %bb.0:
+; BE-NEXT: .save {r11, lr}
+; BE-NEXT: push {r11, lr}
+; BE-NEXT: bl llrintl
+; BE-NEXT: vmov.32 d16[0], r0
+; BE-NEXT: vmov.32 d16[1], r1
+; BE-NEXT: vrev64.32 d0, d16
+; BE-NEXT: pop {r11, pc}
+;
+; BE-NEON-LABEL: llrint_v1i64_v1f128:
+; BE-NEON: @ %bb.0:
+; BE-NEON-NEXT: .save {r11, lr}
+; BE-NEON-NEXT: push {r11, lr}
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: vmov.32 d16[0], r0
+; BE-NEON-NEXT: vmov.32 d16[1], r1
+; BE-NEON-NEXT: vrev64.32 d0, d16
+; BE-NEON-NEXT: pop {r11, pc}
+ %a = call <1 x i64> @llvm.llrint.v1i64.v1f128(<1 x fp128> %x)
+ ret <1 x i64> %a
+}
+declare <1 x i64> @llvm.llrint.v1i64.v1f128(<1 x fp128>)
+
+define <2 x i64> @llrint_v2i64_v2f128(<2 x fp128> %x) {
+; LE-LABEL: llrint_v2i64_v2f128:
+; LE: @ %bb.0:
+; LE-NEXT: .save {r4, r5, r6, r7, r8, lr}
+; LE-NEXT: push {r4, r5, r6, r7, r8, lr}
+; LE-NEXT: .vsave {d8, d9}
+; LE-NEXT: vpush {d8, d9}
+; LE-NEXT: mov r8, r3
+; LE-NEXT: add r3, sp, #40
+; LE-NEXT: mov r5, r2
+; LE-NEXT: mov r6, r1
+; LE-NEXT: mov r7, r0
+; LE-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-NEXT: bl llrintl
+; LE-NEXT: mov r4, r1
+; LE-NEXT: vmov.32 d9[0], r0
+; LE-NEXT: mov r0, r7
+; LE-NEXT: mov r1, r6
+; LE-NEXT: mov r2, r5
+; LE-NEXT: mov r3, r8
+; LE-NEXT: bl llrintl
+; LE-NEXT: vmov.32 d8[0], r0
+; LE-NEXT: vmov.32 d9[1], r4
+; LE-NEXT: vmov.32 d8[1], r1
+; LE-NEXT: vorr q0, q4, q4
+; LE-NEXT: vpop {d8, d9}
+; LE-NEXT: pop {r4, r5, r6, r7, r8, pc}
+;
+; LE-NEON-LABEL: llrint_v2i64_v2f128:
+; LE-NEON: @ %bb.0:
+; LE-NEON-NEXT: .save {r4, r5, r6, r7, r8, lr}
+; LE-NEON-NEXT: push {r4, r5, r6, r7, r8, lr}
+; LE-NEON-NEXT: .vsave {d8, d9}
+; LE-NEON-NEXT: vpush {d8, d9}
+; LE-NEON-NEXT: mov r8, r3
+; LE-NEON-NEXT: add r3, sp, #40
+; LE-NEON-NEXT: mov r5, r2
+; LE-NEON-NEXT: mov r6, r1
+; LE-NEON-NEXT: mov r7, r0
+; LE-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: mov r4, r1
+; LE-NEON-NEXT: vmov.32 d9[0], r0
+; LE-NEON-NEXT: mov r0, r7
+; LE-NEON-NEXT: mov r1, r6
+; LE-NEON-NEXT: mov r2, r5
+; LE-NEON-NEXT: mov r3, r8
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: vmov.32 d8[0], r0
+; LE-NEON-NEXT: vmov.32 d9[1], r4
+; LE-NEON-NEXT: vmov.32 d8[1], r1
+; LE-NEON-NEXT: vorr q0, q4, q4
+; LE-NEON-NEXT: vpop {d8, d9}
+; LE-NEON-NEXT: pop {r4, r5, r6, r7, r8, pc}
+;
+; BE-LABEL: llrint_v2i64_v2f128:
+; BE: @ %bb.0:
+; BE-NEXT: .save {r4, r5, r6, r7, r8, lr}
+; BE-NEXT: push {r4, r5, r6, r7, r8, lr}
+; BE-NEXT: .vsave {d8}
+; BE-NEXT: vpush {d8}
+; BE-NEXT: mov r8, r3
+; BE-NEXT: add r3, sp, #32
+; BE-NEXT: mov r5, r2
+; BE-NEXT: mov r6, r1
+; BE-NEXT: mov r7, r0
+; BE-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-NEXT: bl llrintl
+; BE-NEXT: mov r4, r1
+; BE-NEXT: vmov.32 d8[0], r0
+; BE-NEXT: mov r0, r7
+; BE-NEXT: mov r1, r6
+; BE-NEXT: mov r2, r5
+; BE-NEXT: mov r3, r8
+; BE-NEXT: bl llrintl
+; BE-NEXT: vmov.32 d16[0], r0
+; BE-NEXT: vmov.32 d8[1], r4
+; BE-NEXT: vmov.32 d16[1], r1
+; BE-NEXT: vrev64.32 d1, d8
+; BE-NEXT: vrev64.32 d0, d16
+; BE-NEXT: vpop {d8}
+; BE-NEXT: pop {r4, r5, r6, r7, r8, pc}
+;
+; BE-NEON-LABEL: llrint_v2i64_v2f128:
+; BE-NEON: @ %bb.0:
+; BE-NEON-NEXT: .save {r4, r5, r6, r7, r8, lr}
+; BE-NEON-NEXT: push {r4, r5, r6, r7, r8, lr}
+; BE-NEON-NEXT: .vsave {d8}
+; BE-NEON-NEXT: vpush {d8}
+; BE-NEON-NEXT: mov r8, r3
+; BE-NEON-NEXT: add r3, sp, #32
+; BE-NEON-NEXT: mov r5, r2
+; BE-NEON-NEXT: mov r6, r1
+; BE-NEON-NEXT: mov r7, r0
+; BE-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: mov r4, r1
+; BE-NEON-NEXT: vmov.32 d8[0], r0
+; BE-NEON-NEXT: mov r0, r7
+; BE-NEON-NEXT: mov r1, r6
+; BE-NEON-NEXT: mov r2, r5
+; BE-NEON-NEXT: mov r3, r8
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: vmov.32 d16[0], r0
+; BE-NEON-NEXT: vmov.32 d8[1], r4
+; BE-NEON-NEXT: vmov.32 d16[1], r1
+; BE-NEON-NEXT: vrev64.32 d1, d8
+; BE-NEON-NEXT: vrev64.32 d0, d16
+; BE-NEON-NEXT: vpop {d8}
+; BE-NEON-NEXT: pop {r4, r5, r6, r7, r8, pc}
+ %a = call <2 x i64> @llvm.llrint.v2i64.v2f128(<2 x fp128> %x)
+ ret <2 x i64> %a
+}
+declare <2 x i64> @llvm.llrint.v2i64.v2f128(<2 x fp128>)
+
+define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) {
+; LE-LABEL: llrint_v4i64_v4f128:
+; LE: @ %bb.0:
+; LE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-NEXT: .vsave {d8, d9, d10, d11}
+; LE-NEXT: vpush {d8, d9, d10, d11}
+; LE-NEXT: mov r5, r3
+; LE-NEXT: add r3, sp, #96
+; LE-NEXT: mov r7, r2
+; LE-NEXT: mov r6, r1
+; LE-NEXT: mov r4, r0
+; LE-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-NEXT: bl llrintl
+; LE-NEXT: mov r9, r1
+; LE-NEXT: vmov.32 d9[0], r0
+; LE-NEXT: mov r0, r4
+; LE-NEXT: mov r1, r6
+; LE-NEXT: mov r2, r7
+; LE-NEXT: mov r3, r5
+; LE-NEXT: ldr r8, [sp, #80]
+; LE-NEXT: ldr r10, [sp, #64]
+; LE-NEXT: bl llrintl
+; LE-NEXT: add r3, sp, #68
+; LE-NEXT: mov r5, r1
+; LE-NEXT: vmov.32 d10[0], r0
+; LE-NEXT: mov r0, r10
+; LE-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEXT: bl llrintl
+; LE-NEXT: add r3, sp, #84
+; LE-NEXT: mov r4, r1
+; LE-NEXT: vmov.32 d11[0], r0
+; LE-NEXT: mov r0, r8
+; LE-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEXT: bl llrintl
+; LE-NEXT: vmov.32 d8[0], r0
+; LE-NEXT: vmov.32 d11[1], r4
+; LE-NEXT: vmov.32 d9[1], r9
+; LE-NEXT: vmov.32 d10[1], r5
+; LE-NEXT: vmov.32 d8[1], r1
+; LE-NEXT: vorr q0, q5, q5
+; LE-NEXT: vorr q1, q4, q4
+; LE-NEXT: vpop {d8, d9, d10, d11}
+; LE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; LE-NEON-LABEL: llrint_v4i64_v4f128:
+; LE-NEON: @ %bb.0:
+; LE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-NEON-NEXT: .vsave {d8, d9, d10, d11}
+; LE-NEON-NEXT: vpush {d8, d9, d10, d11}
+; LE-NEON-NEXT: mov r5, r3
+; LE-NEON-NEXT: add r3, sp, #96
+; LE-NEON-NEXT: mov r7, r2
+; LE-NEON-NEXT: mov r6, r1
+; LE-NEON-NEXT: mov r4, r0
+; LE-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: mov r9, r1
+; LE-NEON-NEXT: vmov.32 d9[0], r0
+; LE-NEON-NEXT: mov r0, r4
+; LE-NEON-NEXT: mov r1, r6
+; LE-NEON-NEXT: mov r2, r7
+; LE-NEON-NEXT: mov r3, r5
+; LE-NEON-NEXT: ldr r8, [sp, #80]
+; LE-NEON-NEXT: ldr r10, [sp, #64]
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: add r3, sp, #68
+; LE-NEON-NEXT: mov r5, r1
+; LE-NEON-NEXT: vmov.32 d10[0], r0
+; LE-NEON-NEXT: mov r0, r10
+; LE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: add r3, sp, #84
+; LE-NEON-NEXT: mov r4, r1
+; LE-NEON-NEXT: vmov.32 d11[0], r0
+; LE-NEON-NEXT: mov r0, r8
+; LE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: vmov.32 d8[0], r0
+; LE-NEON-NEXT: vmov.32 d11[1], r4
+; LE-NEON-NEXT: vmov.32 d9[1], r9
+; LE-NEON-NEXT: vmov.32 d10[1], r5
+; LE-NEON-NEXT: vmov.32 d8[1], r1
+; LE-NEON-NEXT: vorr q0, q5, q5
+; LE-NEON-NEXT: vorr q1, q4, q4
+; LE-NEON-NEXT: vpop {d8, d9, d10, d11}
+; LE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; BE-LABEL: llrint_v4i64_v4f128:
+; BE: @ %bb.0:
+; BE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-NEXT: .vsave {d8, d9, d10}
+; BE-NEXT: vpush {d8, d9, d10}
+; BE-NEXT: mov r5, r3
+; BE-NEXT: add r3, sp, #88
+; BE-NEXT: mov r7, r2
+; BE-NEXT: mov r6, r1
+; BE-NEXT: mov r4, r0
+; BE-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-NEXT: bl llrintl
+; BE-NEXT: mov r9, r1
+; BE-NEXT: vmov.32 d8[0], r0
+; BE-NEXT: mov r0, r4
+; BE-NEXT: mov r1, r6
+; BE-NEXT: mov r2, r7
+; BE-NEXT: mov r3, r5
+; BE-NEXT: ldr r8, [sp, #72]
+; BE-NEXT: ldr r10, [sp, #56]
+; BE-NEXT: bl llrintl
+; BE-NEXT: add r3, sp, #60
+; BE-NEXT: mov r5, r1
+; BE-NEXT: vmov.32 d9[0], r0
+; BE-NEXT: mov r0, r10
+; BE-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEXT: bl llrintl
+; BE-NEXT: add r3, sp, #76
+; BE-NEXT: mov r4, r1
+; BE-NEXT: vmov.32 d10[0], r0
+; BE-NEXT: mov r0, r8
+; BE-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEXT: bl llrintl
+; BE-NEXT: vmov.32 d16[0], r0
+; BE-NEXT: vmov.32 d10[1], r4
+; BE-NEXT: vmov.32 d8[1], r9
+; BE-NEXT: vmov.32 d9[1], r5
+; BE-NEXT: vmov.32 d16[1], r1
+; BE-NEXT: vrev64.32 d1, d10
+; BE-NEXT: vrev64.32 d3, d8
+; BE-NEXT: vrev64.32 d0, d9
+; BE-NEXT: vrev64.32 d2, d16
+; BE-NEXT: vpop {d8, d9, d10}
+; BE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; BE-NEON-LABEL: llrint_v4i64_v4f128:
+; BE-NEON: @ %bb.0:
+; BE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-NEON-NEXT: .vsave {d8, d9, d10}
+; BE-NEON-NEXT: vpush {d8, d9, d10}
+; BE-NEON-NEXT: mov r5, r3
+; BE-NEON-NEXT: add r3, sp, #88
+; BE-NEON-NEXT: mov r7, r2
+; BE-NEON-NEXT: mov r6, r1
+; BE-NEON-NEXT: mov r4, r0
+; BE-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: mov r9, r1
+; BE-NEON-NEXT: vmov.32 d8[0], r0
+; BE-NEON-NEXT: mov r0, r4
+; BE-NEON-NEXT: mov r1, r6
+; BE-NEON-NEXT: mov r2, r7
+; BE-NEON-NEXT: mov r3, r5
+; BE-NEON-NEXT: ldr r8, [sp, #72]
+; BE-NEON-NEXT: ldr r10, [sp, #56]
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: add r3, sp, #60
+; BE-NEON-NEXT: mov r5, r1
+; BE-NEON-NEXT: vmov.32 d9[0], r0
+; BE-NEON-NEXT: mov r0, r10
+; BE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: add r3, sp, #76
+; BE-NEON-NEXT: mov r4, r1
+; BE-NEON-NEXT: vmov.32 d10[0], r0
+; BE-NEON-NEXT: mov r0, r8
+; BE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: vmov.32 d16[0], r0
+; BE-NEON-NEXT: vmov.32 d10[1], r4
+; BE-NEON-NEXT: vmov.32 d8[1], r9
+; BE-NEON-NEXT: vmov.32 d9[1], r5
+; BE-NEON-NEXT: vmov.32 d16[1], r1
+; BE-NEON-NEXT: vrev64.32 d1, d10
+; BE-NEON-NEXT: vrev64.32 d3, d8
+; BE-NEON-NEXT: vrev64.32 d0, d9
+; BE-NEON-NEXT: vrev64.32 d2, d16
+; BE-NEON-NEXT: vpop {d8, d9, d10}
+; BE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+ %a = call <4 x i64> @llvm.llrint.v4i64.v4f128(<4 x fp128> %x)
+ ret <4 x i64> %a
+}
+declare <4 x i64> @llvm.llrint.v4i64.v4f128(<4 x fp128>)
+
+define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) {
+; LE-LABEL: llrint_v8i64_v8f128:
+; LE: @ %bb.0:
+; LE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEXT: .pad #4
+; LE-NEXT: sub sp, sp, #4
+; LE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT: .pad #8
+; LE-NEXT: sub sp, sp, #8
+; LE-NEXT: mov r11, r3
+; LE-NEXT: add r3, sp, #208
+; LE-NEXT: mov r10, r2
+; LE-NEXT: mov r4, r1
+; LE-NEXT: mov r5, r0
+; LE-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-NEXT: bl llrintl
+; LE-NEXT: add r7, sp, #164
+; LE-NEXT: ldr r6, [sp, #160]
+; LE-NEXT: str r1, [sp, #4] @ 4-byte Spill
+; LE-NEXT: vmov.32 d9[0], r0
+; LE-NEXT: ldm r7, {r1, r2, r3, r7}
+; LE-NEXT: mov r0, r6
+; LE-NEXT: ldr r8, [sp, #128]
+; LE-NEXT: ldr r9, [sp, #144]
+; LE-NEXT: bl llrintl
+; LE-NEXT: add r3, sp, #180
+; LE-NEXT: str r1, [sp] @ 4-byte Spill
+; LE-NEXT: vmov.32 d10[0], r0
+; LE-NEXT: mov r0, r7
+; LE-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEXT: bl llrintl
+; LE-NEXT: add r3, sp, #132
+; LE-NEXT: mov r7, r1
+; LE-NEXT: vmov.32 d11[0], r0
+; LE-NEXT: mov r0, r8
+; LE-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEXT: bl llrintl
+; LE-NEXT: add r3, sp, #148
+; LE-NEXT: mov r8, r1
+; LE-NEXT: vmov.32 d12[0], r0
+; LE-NEXT: mov r0, r9
+; LE-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEXT: bl llrintl
+; LE-NEXT: mov r9, r1
+; LE-NEXT: vmov.32 d13[0], r0
+; LE-NEXT: mov r0, r5
+; LE-NEXT: mov r1, r4
+; LE-NEXT: mov r2, r10
+; LE-NEXT: mov r3, r11
+; LE-NEXT: ldr r6, [sp, #112]
+; LE-NEXT: bl llrintl
+; LE-NEXT: add r3, sp, #116
+; LE-NEXT: mov r4, r1
+; LE-NEXT: vmov.32 d14[0], r0
+; LE-NEXT: mov r0, r6
+; LE-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEXT: bl llrintl
+; LE-NEXT: add r3, sp, #196
+; LE-NEXT: vmov.32 d15[0], r0
+; LE-NEXT: ldr r0, [sp, #192]
+; LE-NEXT: mov r5, r1
+; LE-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEXT: bl llrintl
+; LE-NEXT: vmov.32 d8[0], r0
+; LE-NEXT: ldr r0, [sp] @ 4-byte Reload
+; LE-NEXT: vmov.32 d11[1], r7
+; LE-NEXT: vmov.32 d10[1], r0
+; LE-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
+; LE-NEXT: vmov.32 d15[1], r5
+; LE-NEXT: vorr q2, q5, q5
+; LE-NEXT: vmov.32 d13[1], r9
+; LE-NEXT: vmov.32 d9[1], r0
+; LE-NEXT: vmov.32 d14[1], r4
+; LE-NEXT: vmov.32 d12[1], r8
+; LE-NEXT: vorr q0, q7, q7
+; LE-NEXT: vmov.32 d8[1], r1
+; LE-NEXT: vorr q1, q6, q6
+; LE-NEXT: vorr q3, q4, q4
+; LE-NEXT: add sp, sp, #8
+; LE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT: add sp, sp, #4
+; LE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; LE-NEON-LABEL: llrint_v8i64_v8f128:
+; LE-NEON: @ %bb.0:
+; LE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEON-NEXT: .pad #4
+; LE-NEON-NEXT: sub sp, sp, #4
+; LE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT: .pad #8
+; LE-NEON-NEXT: sub sp, sp, #8
+; LE-NEON-NEXT: mov r11, r3
+; LE-NEON-NEXT: add r3, sp, #208
+; LE-NEON-NEXT: mov r10, r2
+; LE-NEON-NEXT: mov r4, r1
+; LE-NEON-NEXT: mov r5, r0
+; LE-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: add r7, sp, #164
+; LE-NEON-NEXT: ldr r6, [sp, #160]
+; LE-NEON-NEXT: str r1, [sp, #4] @ 4-byte Spill
+; LE-NEON-NEXT: vmov.32 d9[0], r0
+; LE-NEON-NEXT: ldm r7, {r1, r2, r3, r7}
+; LE-NEON-NEXT: mov r0, r6
+; LE-NEON-NEXT: ldr r8, [sp, #128]
+; LE-NEON-NEXT: ldr r9, [sp, #144]
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: add r3, sp, #180
+; LE-NEON-NEXT: str r1, [sp] @ 4-byte Spill
+; LE-NEON-NEXT: vmov.32 d10[0], r0
+; LE-NEON-NEXT: mov r0, r7
+; LE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: add r3, sp, #132
+; LE-NEON-NEXT: mov r7, r1
+; LE-NEON-NEXT: vmov.32 d11[0], r0
+; LE-NEON-NEXT: mov r0, r8
+; LE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: add r3, sp, #148
+; LE-NEON-NEXT: mov r8, r1
+; LE-NEON-NEXT: vmov.32 d12[0], r0
+; LE-NEON-NEXT: mov r0, r9
+; LE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: mov r9, r1
+; LE-NEON-NEXT: vmov.32 d13[0], r0
+; LE-NEON-NEXT: mov r0, r5
+; LE-NEON-NEXT: mov r1, r4
+; LE-NEON-NEXT: mov r2, r10
+; LE-NEON-NEXT: mov r3, r11
+; LE-NEON-NEXT: ldr r6, [sp, #112]
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: add r3, sp, #116
+; LE-NEON-NEXT: mov r4, r1
+; LE-NEON-NEXT: vmov.32 d14[0], r0
+; LE-NEON-NEXT: mov r0, r6
+; LE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: add r3, sp, #196
+; LE-NEON-NEXT: vmov.32 d15[0], r0
+; LE-NEON-NEXT: ldr r0, [sp, #192]
+; LE-NEON-NEXT: mov r5, r1
+; LE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: vmov.32 d8[0], r0
+; LE-NEON-NEXT: ldr r0, [sp] @ 4-byte Reload
+; LE-NEON-NEXT: vmov.32 d11[1], r7
+; LE-NEON-NEXT: vmov.32 d10[1], r0
+; LE-NEON-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
+; LE-NEON-NEXT: vmov.32 d15[1], r5
+; LE-NEON-NEXT: vorr q2, q5, q5
+; LE-NEON-NEXT: vmov.32 d13[1], r9
+; LE-NEON-NEXT: vmov.32 d9[1], r0
+; LE-NEON-NEXT: vmov.32 d14[1], r4
+; LE-NEON-NEXT: vmov.32 d12[1], r8
+; LE-NEON-NEXT: vorr q0, q7, q7
+; LE-NEON-NEXT: vmov.32 d8[1], r1
+; LE-NEON-NEXT: vorr q1, q6, q6
+; LE-NEON-NEXT: vorr q3, q4, q4
+; LE-NEON-NEXT: add sp, sp, #8
+; LE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT: add sp, sp, #4
+; LE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-LABEL: llrint_v8i64_v8f128:
+; BE: @ %bb.0:
+; BE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEXT: .pad #4
+; BE-NEXT: sub sp, sp, #4
+; BE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14}
+; BE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14}
+; BE-NEXT: .pad #16
+; BE-NEXT: sub sp, sp, #16
+; BE-NEXT: str r3, [sp, #4] @ 4-byte Spill
+; BE-NEXT: add r3, sp, #208
+; BE-NEXT: mov r11, r2
+; BE-NEXT: mov r4, r1
+; BE-NEXT: mov r5, r0
+; BE-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-NEXT: bl llrintl
+; BE-NEXT: ldr r7, [sp, #176]
+; BE-NEXT: add r3, sp, #180
+; BE-NEXT: str r1, [sp, #12] @ 4-byte Spill
+; BE-NEXT: vmov.32 d8[0], r0
+; BE-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEXT: mov r0, r7
+; BE-NEXT: ldr r6, [sp, #128]
+; BE-NEXT: ldr r8, [sp, #144]
+; BE-NEXT: bl llrintl
+; BE-NEXT: add r3, sp, #132
+; BE-NEXT: str r1, [sp, #8] @ 4-byte Spill
+; BE-NEXT: vmov.32 d9[0], r0
+; BE-NEXT: mov r0, r6
+; BE-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEXT: bl llrintl
+; BE-NEXT: add r3, sp, #148
+; BE-NEXT: mov r6, r1
+; BE-NEXT: vmov.32 d10[0], r0
+; BE-NEXT: mov r0, r8
+; BE-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEXT: bl llrintl
+; BE-NEXT: add r3, sp, #160
+; BE-NEXT: mov r9, r0
+; BE-NEXT: mov r7, r1
+; BE-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-NEXT: bl llrintl
+; BE-NEXT: ldr r3, [sp, #4] @ 4-byte Reload
+; BE-NEXT: mov r8, r1
+; BE-NEXT: vmov.32 d11[0], r0
+; BE-NEXT: mov r0, r5
+; BE-NEXT: mov r1, r4
+; BE-NEXT: mov r2, r11
+; BE-NEXT: ldr r10, [sp, #112]
+; BE-NEXT: vmov.32 d12[0], r9
+; BE-NEXT: bl llrintl
+; BE-NEXT: add r3, sp, #116
+; BE-NEXT: mov r4, r1
+; BE-NEXT: vmov.32 d13[0], r0
+; BE-NEXT: mov r0, r10
+; BE-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEXT: bl llrintl
+; BE-NEXT: add r3, sp, #196
+; BE-NEXT: vmov.32 d14[0], r0
+; BE-NEXT: ldr r0, [sp, #192]
+; BE-NEXT: mov r5, r1
+; BE-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEXT: bl llrintl
+; BE-NEXT: vmov.32 d16[0], r0
+; BE-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
+; BE-NEXT: vmov.32 d14[1], r5
+; BE-NEXT: vmov.32 d9[1], r0
+; BE-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
+; BE-NEXT: vmov.32 d12[1], r7
+; BE-NEXT: vmov.32 d8[1], r0
+; BE-NEXT: vmov.32 d13[1], r4
+; BE-NEXT: vmov.32 d10[1], r6
+; BE-NEXT: vmov.32 d11[1], r8
+; BE-NEXT: vmov.32 d16[1], r1
+; BE-NEXT: vrev64.32 d1, d14
+; BE-NEXT: vrev64.32 d3, d12
+; BE-NEXT: vrev64.32 d5, d9
+; BE-NEXT: vrev64.32 d7, d8
+; BE-NEXT: vrev64.32 d0, d13
+; BE-NEXT: vrev64.32 d2, d10
+; BE-NEXT: vrev64.32 d4, d11
+; BE-NEXT: vrev64.32 d6, d16
+; BE-NEXT: add sp, sp, #16
+; BE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14}
+; BE-NEXT: add sp, sp, #4
+; BE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-NEON-LABEL: llrint_v8i64_v8f128:
+; BE-NEON: @ %bb.0:
+; BE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEON-NEXT: .pad #4
+; BE-NEON-NEXT: sub sp, sp, #4
+; BE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14}
+; BE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14}
+; BE-NEON-NEXT: .pad #16
+; BE-NEON-NEXT: sub sp, sp, #16
+; BE-NEON-NEXT: str r3, [sp, #4] @ 4-byte Spill
+; BE-NEON-NEXT: add r3, sp, #208
+; BE-NEON-NEXT: mov r11, r2
+; BE-NEON-NEXT: mov r4, r1
+; BE-NEON-NEXT: mov r5, r0
+; BE-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: ldr r7, [sp, #176]
+; BE-NEON-NEXT: add r3, sp, #180
+; BE-NEON-NEXT: str r1, [sp, #12] @ 4-byte Spill
+; BE-NEON-NEXT: vmov.32 d8[0], r0
+; BE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT: mov r0, r7
+; BE-NEON-NEXT: ldr r6, [sp, #128]
+; BE-NEON-NEXT: ldr r8, [sp, #144]
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: add r3, sp, #132
+; BE-NEON-NEXT: str r1, [sp, #8] @ 4-byte Spill
+; BE-NEON-NEXT: vmov.32 d9[0], r0
+; BE-NEON-NEXT: mov r0, r6
+; BE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: add r3, sp, #148
+; BE-NEON-NEXT: mov r6, r1
+; BE-NEON-NEXT: vmov.32 d10[0], r0
+; BE-NEON-NEXT: mov r0, r8
+; BE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: add r3, sp, #160
+; BE-NEON-NEXT: mov r9, r0
+; BE-NEON-NEXT: mov r7, r1
+; BE-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: ldr r3, [sp, #4] @ 4-byte Reload
+; BE-NEON-NEXT: mov r8, r1
+; BE-NEON-NEXT: vmov.32 d11[0], r0
+; BE-NEON-NEXT: mov r0, r5
+; BE-NEON-NEXT: mov r1, r4
+; BE-NEON-NEXT: mov r2, r11
+; BE-NEON-NEXT: ldr r10, [sp, #112]
+; BE-NEON-NEXT: vmov.32 d12[0], r9
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: add r3, sp, #116
+; BE-NEON-NEXT: mov r4, r1
+; BE-NEON-NEXT: vmov.32 d13[0], r0
+; BE-NEON-NEXT: mov r0, r10
+; BE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: add r3, sp, #196
+; BE-NEON-NEXT: vmov.32 d14[0], r0
+; BE-NEON-NEXT: ldr r0, [sp, #192]
+; BE-NEON-NEXT: mov r5, r1
+; BE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: vmov.32 d16[0], r0
+; BE-NEON-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
+; BE-NEON-NEXT: vmov.32 d14[1], r5
+; BE-NEON-NEXT: vmov.32 d9[1], r0
+; BE-NEON-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
+; BE-NEON-NEXT: vmov.32 d12[1], r7
+; BE-NEON-NEXT: vmov.32 d8[1], r0
+; BE-NEON-NEXT: vmov.32 d13[1], r4
+; BE-NEON-NEXT: vmov.32 d10[1], r6
+; BE-NEON-NEXT: vmov.32 d11[1], r8
+; BE-NEON-NEXT: vmov.32 d16[1], r1
+; BE-NEON-NEXT: vrev64.32 d1, d14
+; BE-NEON-NEXT: vrev64.32 d3, d12
+; BE-NEON-NEXT: vrev64.32 d5, d9
+; BE-NEON-NEXT: vrev64.32 d7, d8
+; BE-NEON-NEXT: vrev64.32 d0, d13
+; BE-NEON-NEXT: vrev64.32 d2, d10
+; BE-NEON-NEXT: vrev64.32 d4, d11
+; BE-NEON-NEXT: vrev64.32 d6, d16
+; BE-NEON-NEXT: add sp, sp, #16
+; BE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14}
+; BE-NEON-NEXT: add sp, sp, #4
+; BE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+ %a = call <8 x i64> @llvm.llrint.v8i64.v8f128(<8 x fp128> %x)
+ ret <8 x i64> %a
+}
+declare <8 x i64> @llvm.llrint.v8i64.v8f128(<8 x fp128>)
+
+define <16 x i64> @llrint_v16f128(<16 x fp128> %x) {
+; LE-LABEL: llrint_v16f128:
+; LE: @ %bb.0:
+; LE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEXT: .pad #4
+; LE-NEXT: sub sp, sp, #4
+; LE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT: .pad #72
+; LE-NEXT: sub sp, sp, #72
+; LE-NEXT: mov r6, r3
+; LE-NEXT: add r3, sp, #408
+; LE-NEXT: mov r7, r2
+; LE-NEXT: mov r4, r0
+; LE-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-NEXT: bl llrintl
+; LE-NEXT: add r5, sp, #176
+; LE-NEXT: mov r10, r1
+; LE-NEXT: vmov.32 d13[0], r0
+; LE-NEXT: mov r0, r7
+; LE-NEXT: ldm r5, {r2, r3, r5}
+; LE-NEXT: mov r1, r6
+; LE-NEXT: ldr r8, [sp, #232]
+; LE-NEXT: bl llrintl
+; LE-NEXT: add r3, sp, #188
+; LE-NEXT: mov r9, r1
+; LE-NEXT: vmov.32 d8[0], r0
+; LE-NEXT: mov r0, r5
+; LE-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEXT: bl llrintl
+; LE-NEXT: add r3, sp, #236
+; LE-NEXT: mov r11, r1
+; LE-NEXT: vmov.32 d9[0], r0
+; LE-NEXT: mov r0, r8
+; LE-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEXT: bl llrintl
+; LE-NEXT: add r3, sp, #252
+; LE-NEXT: vmov.32 d10[0], r0
+; LE-NEXT: ldr r0, [sp, #248]
+; LE-NEXT: mov r8, r1
+; LE-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEXT: bl llrintl
+; LE-NEXT: add r3, sp, #268
+; LE-NEXT: vmov.32 d11[0], r0
+; LE-NEXT: ldr r0, [sp, #264]
+; LE-NEXT: mov r6, r1
+; LE-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEXT: bl llrintl
+; LE-NEXT: add r3, sp, #284
+; LE-NEXT: vmov.32 d14[0], r0
+; LE-NEXT: ldr r0, [sp, #280]
+; LE-NEXT: mov r7, r1
+; LE-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEXT: bl llrintl
+; LE-NEXT: add r3, sp, #316
+; LE-NEXT: vmov.32 d15[0], r0
+; LE-NEXT: ldr r0, [sp, #312]
+; LE-NEXT: mov r5, r1
+; LE-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEXT: bl llrintl
+; LE-NEXT: vmov.32 d15[1], r5
+; LE-NEXT: add lr, sp, #56
+; LE-NEXT: ldr r5, [sp, #300]
+; LE-NEXT: vmov.32 d14[1], r7
+; LE-NEXT: ldr r2, [sp, #304]
+; LE-NEXT: ldr r3, [sp, #308]
+; LE-NEXT: vmov.32 d11[1], r6
+; LE-NEXT: ldr r6, [sp, #200]
+; LE-NEXT: ldr r7, [sp, #204]
+; LE-NEXT: vmov.32 d10[1], r8
+; LE-NEXT: ldr r8, [sp, #344]
+; LE-NEXT: vmov.32 d9[1], r11
+; LE-NEXT: ldr r11, [sp, #216]
+; LE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #40
+; LE-NEXT: vmov.32 d17[0], r0
+; LE-NEXT: ldr r0, [sp, #296]
+; LE-NEXT: vmov.32 d8[1], r9
+; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #24
+; LE-NEXT: vorr q5, q8, q8
+; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEXT: vorr q4, q6, q6
+; LE-NEXT: vmov.32 d11[1], r1
+; LE-NEXT: mov r1, r5
+; LE-NEXT: vmov.32 d9[1], r10
+; LE-NEXT: bl llrintl
+; LE-NEXT: vmov.32 d10[0], r0
+; LE-NEXT: ldr r2, [sp, #208]
+; LE-NEXT: ldr r3, [sp, #212]
+; LE-NEXT: add lr, sp, #8
+; LE-NEXT: mov r9, r1
+; LE-NEXT: mov r0, r6
+; LE-NEXT: mov r1, r7
+; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEXT: bl llrintl
+; LE-NEXT: add r3, sp, #220
+; LE-NEXT: mov r10, r1
+; LE-NEXT: vmov.32 d10[0], r0
+; LE-NEXT: mov r0, r11
+; LE-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEXT: bl llrintl
+; LE-NEXT: add r3, sp, #348
+; LE-NEXT: mov r11, r1
+; LE-NEXT: vmov.32 d11[0], r0
+; LE-NEXT: mov r0, r8
+; LE-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEXT: bl llrintl
+; LE-NEXT: add r3, sp, #364
+; LE-NEXT: vmov.32 d13[0], r0
+; LE-NEXT: ldr r0, [sp, #360]
+; LE-NEXT: mov r8, r1
+; LE-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEXT: bl llrintl
+; LE-NEXT: add r3, sp, #380
+; LE-NEXT: vmov.32 d14[0], r0
+; LE-NEXT: ldr r0, [sp, #376]
+; LE-NEXT: mov r5, r1
+; LE-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEXT: bl llrintl
+; LE-NEXT: add r3, sp, #396
+; LE-NEXT: vmov.32 d15[0], r0
+; LE-NEXT: ldr r0, [sp, #392]
+; LE-NEXT: mov r6, r1
+; LE-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEXT: bl llrintl
+; LE-NEXT: add r3, sp, #332
+; LE-NEXT: vmov.32 d8[0], r0
+; LE-NEXT: ldr r0, [sp, #328]
+; LE-NEXT: mov r7, r1
+; LE-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEXT: bl llrintl
+; LE-NEXT: add lr, sp, #8
+; LE-NEXT: vmov.32 d12[0], r0
+; LE-NEXT: add r0, r4, #64
+; LE-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #24
+; LE-NEXT: vmov.32 d13[1], r8
+; LE-NEXT: vmov.32 d18[1], r9
+; LE-NEXT: vmov.32 d15[1], r6
+; LE-NEXT: vmov.32 d12[1], r1
+; LE-NEXT: vmov.32 d14[1], r5
+; LE-NEXT: vst1.64 {d18, d19}, [r0:128]!
+; LE-NEXT: vst1.64 {d12, d13}, [r0:128]!
+; LE-NEXT: vmov.32 d8[1], r7
+; LE-NEXT: vst1.64 {d14, d15}, [r0:128]!
+; LE-NEXT: vst1.64 {d8, d9}, [r0:128]
+; LE-NEXT: vmov.32 d11[1], r11
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #40
+; LE-NEXT: vmov.32 d10[1], r10
+; LE-NEXT: vst1.64 {d16, d17}, [r4:128]!
+; LE-NEXT: vst1.64 {d10, d11}, [r4:128]!
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #56
+; LE-NEXT: vst1.64 {d16, d17}, [r4:128]!
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: vst1.64 {d16, d17}, [r4:128]
+; LE-NEXT: add sp, sp, #72
+; LE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT: add sp, sp, #4
+; LE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; LE-NEON-LABEL: llrint_v16f128:
+; LE-NEON: @ %bb.0:
+; LE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEON-NEXT: .pad #4
+; LE-NEON-NEXT: sub sp, sp, #4
+; LE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT: .pad #72
+; LE-NEON-NEXT: sub sp, sp, #72
+; LE-NEON-NEXT: mov r6, r3
+; LE-NEON-NEXT: add r3, sp, #408
+; LE-NEON-NEXT: mov r7, r2
+; LE-NEON-NEXT: mov r4, r0
+; LE-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: add r5, sp, #176
+; LE-NEON-NEXT: mov r10, r1
+; LE-NEON-NEXT: vmov.32 d13[0], r0
+; LE-NEON-NEXT: mov r0, r7
+; LE-NEON-NEXT: ldm r5, {r2, r3, r5}
+; LE-NEON-NEXT: mov r1, r6
+; LE-NEON-NEXT: ldr r8, [sp, #232]
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: add r3, sp, #188
+; LE-NEON-NEXT: mov r9, r1
+; LE-NEON-NEXT: vmov.32 d8[0], r0
+; LE-NEON-NEXT: mov r0, r5
+; LE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: add r3, sp, #236
+; LE-NEON-NEXT: mov r11, r1
+; LE-NEON-NEXT: vmov.32 d9[0], r0
+; LE-NEON-NEXT: mov r0, r8
+; LE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: add r3, sp, #252
+; LE-NEON-NEXT: vmov.32 d10[0], r0
+; LE-NEON-NEXT: ldr r0, [sp, #248]
+; LE-NEON-NEXT: mov r8, r1
+; LE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: add r3, sp, #268
+; LE-NEON-NEXT: vmov.32 d11[0], r0
+; LE-NEON-NEXT: ldr r0, [sp, #264]
+; LE-NEON-NEXT: mov r6, r1
+; LE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: add r3, sp, #284
+; LE-NEON-NEXT: vmov.32 d14[0], r0
+; LE-NEON-NEXT: ldr r0, [sp, #280]
+; LE-NEON-NEXT: mov r7, r1
+; LE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: add r3, sp, #316
+; LE-NEON-NEXT: vmov.32 d15[0], r0
+; LE-NEON-NEXT: ldr r0, [sp, #312]
+; LE-NEON-NEXT: mov r5, r1
+; LE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: vmov.32 d15[1], r5
+; LE-NEON-NEXT: add lr, sp, #56
+; LE-NEON-NEXT: ldr r5, [sp, #300]
+; LE-NEON-NEXT: vmov.32 d14[1], r7
+; LE-NEON-NEXT: ldr r2, [sp, #304]
+; LE-NEON-NEXT: ldr r3, [sp, #308]
+; LE-NEON-NEXT: vmov.32 d11[1], r6
+; LE-NEON-NEXT: ldr r6, [sp, #200]
+; LE-NEON-NEXT: ldr r7, [sp, #204]
+; LE-NEON-NEXT: vmov.32 d10[1], r8
+; LE-NEON-NEXT: ldr r8, [sp, #344]
+; LE-NEON-NEXT: vmov.32 d9[1], r11
+; LE-NEON-NEXT: ldr r11, [sp, #216]
+; LE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #40
+; LE-NEON-NEXT: vmov.32 d17[0], r0
+; LE-NEON-NEXT: ldr r0, [sp, #296]
+; LE-NEON-NEXT: vmov.32 d8[1], r9
+; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #24
+; LE-NEON-NEXT: vorr q5, q8, q8
+; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEON-NEXT: vorr q4, q6, q6
+; LE-NEON-NEXT: vmov.32 d11[1], r1
+; LE-NEON-NEXT: mov r1, r5
+; LE-NEON-NEXT: vmov.32 d9[1], r10
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: vmov.32 d10[0], r0
+; LE-NEON-NEXT: ldr r2, [sp, #208]
+; LE-NEON-NEXT: ldr r3, [sp, #212]
+; LE-NEON-NEXT: add lr, sp, #8
+; LE-NEON-NEXT: mov r9, r1
+; LE-NEON-NEXT: mov r0, r6
+; LE-NEON-NEXT: mov r1, r7
+; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: add r3, sp, #220
+; LE-NEON-NEXT: mov r10, r1
+; LE-NEON-NEXT: vmov.32 d10[0], r0
+; LE-NEON-NEXT: mov r0, r11
+; LE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: add r3, sp, #348
+; LE-NEON-NEXT: mov r11, r1
+; LE-NEON-NEXT: vmov.32 d11[0], r0
+; LE-NEON-NEXT: mov r0, r8
+; LE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: add r3, sp, #364
+; LE-NEON-NEXT: vmov.32 d13[0], r0
+; LE-NEON-NEXT: ldr r0, [sp, #360]
+; LE-NEON-NEXT: mov r8, r1
+; LE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: add r3, sp, #380
+; LE-NEON-NEXT: vmov.32 d14[0], r0
+; LE-NEON-NEXT: ldr r0, [sp, #376]
+; LE-NEON-NEXT: mov r5, r1
+; LE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: add r3, sp, #396
+; LE-NEON-NEXT: vmov.32 d15[0], r0
+; LE-NEON-NEXT: ldr r0, [sp, #392]
+; LE-NEON-NEXT: mov r6, r1
+; LE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: add r3, sp, #332
+; LE-NEON-NEXT: vmov.32 d8[0], r0
+; LE-NEON-NEXT: ldr r0, [sp, #328]
+; LE-NEON-NEXT: mov r7, r1
+; LE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: add lr, sp, #8
+; LE-NEON-NEXT: vmov.32 d12[0], r0
+; LE-NEON-NEXT: add r0, r4, #64
+; LE-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #24
+; LE-NEON-NEXT: vmov.32 d13[1], r8
+; LE-NEON-NEXT: vmov.32 d18[1], r9
+; LE-NEON-NEXT: vmov.32 d15[1], r6
+; LE-NEON-NEXT: vmov.32 d12[1], r1
+; LE-NEON-NEXT: vmov.32 d14[1], r5
+; LE-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]!
+; LE-NEON-NEXT: vst1.64 {d12, d13}, [r0:128]!
+; LE-NEON-NEXT: vmov.32 d8[1], r7
+; LE-NEON-NEXT: vst1.64 {d14, d15}, [r0:128]!
+; LE-NEON-NEXT: vst1.64 {d8, d9}, [r0:128]
+; LE-NEON-NEXT: vmov.32 d11[1], r11
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #40
+; LE-NEON-NEXT: vmov.32 d10[1], r10
+; LE-NEON-NEXT: vst1.64 {d16, d17}, [r4:128]!
+; LE-NEON-NEXT: vst1.64 {d10, d11}, [r4:128]!
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #56
+; LE-NEON-NEXT: vst1.64 {d16, d17}, [r4:128]!
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: vst1.64 {d16, d17}, [r4:128]
+; LE-NEON-NEXT: add sp, sp, #72
+; LE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT: add sp, sp, #4
+; LE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-LABEL: llrint_v16f128:
+; BE: @ %bb.0:
+; BE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEXT: .pad #4
+; BE-NEXT: sub sp, sp, #4
+; BE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT: .pad #56
+; BE-NEXT: sub sp, sp, #56
+; BE-NEXT: mov r5, r3
+; BE-NEXT: add r3, sp, #376
+; BE-NEXT: mov r6, r2
+; BE-NEXT: mov r4, r0
+; BE-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-NEXT: bl llrintl
+; BE-NEXT: ldr r7, [sp, #392]
+; BE-NEXT: add r3, sp, #396
+; BE-NEXT: mov r9, r1
+; BE-NEXT: vmov.32 d8[0], r0
+; BE-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEXT: mov r0, r7
+; BE-NEXT: ldr r11, [sp, #168]
+; BE-NEXT: bl llrintl
+; BE-NEXT: ldr r2, [sp, #160]
+; BE-NEXT: mov r10, r1
+; BE-NEXT: ldr r3, [sp, #164]
+; BE-NEXT: vmov.32 d9[0], r0
+; BE-NEXT: mov r0, r6
+; BE-NEXT: mov r1, r5
+; BE-NEXT: bl llrintl
+; BE-NEXT: add r3, sp, #172
+; BE-NEXT: mov r8, r1
+; BE-NEXT: vmov.32 d10[0], r0
+; BE-NEXT: mov r0, r11
+; BE-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEXT: bl llrintl
+; BE-NEXT: add r3, sp, #220
+; BE-NEXT: vmov.32 d11[0], r0
+; BE-NEXT: ldr r0, [sp, #216]
+; BE-NEXT: mov r11, r1
+; BE-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEXT: bl llrintl
+; BE-NEXT: add r3, sp, #236
+; BE-NEXT: vmov.32 d12[0], r0
+; BE-NEXT: ldr r0, [sp, #232]
+; BE-NEXT: mov r6, r1
+; BE-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEXT: bl llrintl
+; BE-NEXT: add r3, sp, #252
+; BE-NEXT: vmov.32 d13[0], r0
+; BE-NEXT: ldr r0, [sp, #248]
+; BE-NEXT: mov r7, r1
+; BE-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEXT: bl llrintl
+; BE-NEXT: add r3, sp, #268
+; BE-NEXT: vmov.32 d14[0], r0
+; BE-NEXT: ldr r0, [sp, #264]
+; BE-NEXT: mov r5, r1
+; BE-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEXT: bl llrintl
+; BE-NEXT: vmov.32 d15[0], r0
+; BE-NEXT: ldr r0, [sp, #280]
+; BE-NEXT: ldr r2, [sp, #288]
+; BE-NEXT: vmov.32 d13[1], r7
+; BE-NEXT: ldr r7, [sp, #284]
+; BE-NEXT: ldr r3, [sp, #292]
+; BE-NEXT: vmov.32 d14[1], r5
+; BE-NEXT: ldr r5, [sp, #328]
+; BE-NEXT: vmov.32 d12[1], r6
+; BE-NEXT: ldr r6, [sp, #300]
+; BE-NEXT: vmov.32 d10[1], r8
+; BE-NEXT: ldr r8, [sp, #184]
+; BE-NEXT: vmov.32 d11[1], r11
+; BE-NEXT: vmov.32 d9[1], r10
+; BE-NEXT: vmov.32 d8[1], r9
+; BE-NEXT: vmov.32 d15[1], r1
+; BE-NEXT: mov r1, r7
+; BE-NEXT: vstr d14, [sp, #48] @ 8-byte Spill
+; BE-NEXT: vstr d13, [sp, #40] @ 8-byte Spill
+; BE-NEXT: vstr d12, [sp, #32] @ 8-byte Spill
+; BE-NEXT: vstr d11, [sp, #24] @ 8-byte Spill
+; BE-NEXT: vstr d10, [sp, #16] @ 8-byte Spill
+; BE-NEXT: vstr d9, [sp, #8] @ 8-byte Spill
+; BE-NEXT: vstr d8, [sp] @ 8-byte Spill
+; BE-NEXT: bl llrintl
+; BE-NEXT: mov r10, r1
+; BE-NEXT: ldr r1, [sp, #296]
+; BE-NEXT: ldr r2, [sp, #304]
+; BE-NEXT: vmov.32 d8[0], r0
+; BE-NEXT: ldr r3, [sp, #308]
+; BE-NEXT: mov r0, r1
+; BE-NEXT: mov r1, r6
+; BE-NEXT: bl llrintl
+; BE-NEXT: add r3, sp, #332
+; BE-NEXT: mov r11, r1
+; BE-NEXT: vmov.32 d9[0], r0
+; BE-NEXT: mov r0, r5
+; BE-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEXT: bl llrintl
+; BE-NEXT: add r3, sp, #188
+; BE-NEXT: mov r7, r1
+; BE-NEXT: vmov.32 d10[0], r0
+; BE-NEXT: mov r0, r8
+; BE-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEXT: bl llrintl
+; BE-NEXT: add r3, sp, #204
+; BE-NEXT: vmov.32 d11[0], r0
+; BE-NEXT: ldr r0, [sp, #200]
+; BE-NEXT: mov r8, r1
+; BE-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEXT: bl llrintl
+; BE-NEXT: add r3, sp, #348
+; BE-NEXT: vmov.32 d12[0], r0
+; BE-NEXT: ldr r0, [sp, #344]
+; BE-NEXT: mov r5, r1
+; BE-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEXT: bl llrintl
+; BE-NEXT: add r3, sp, #364
+; BE-NEXT: vmov.32 d13[0], r0
+; BE-NEXT: ldr r0, [sp, #360]
+; BE-NEXT: mov r9, r1
+; BE-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEXT: bl llrintl
+; BE-NEXT: add r3, sp, #316
+; BE-NEXT: vmov.32 d14[0], r0
+; BE-NEXT: ldr r0, [sp, #312]
+; BE-NEXT: mov r6, r1
+; BE-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEXT: bl llrintl
+; BE-NEXT: vldr d18, [sp, #48] @ 8-byte Reload
+; BE-NEXT: vrev64.32 d17, d15
+; BE-NEXT: vrev64.32 d16, d18
+; BE-NEXT: vldr d18, [sp, #40] @ 8-byte Reload
+; BE-NEXT: vmov.32 d24[0], r0
+; BE-NEXT: add r0, r4, #64
+; BE-NEXT: vldr d20, [sp, #32] @ 8-byte Reload
+; BE-NEXT: vrev64.32 d19, d18
+; BE-NEXT: vmov.32 d9[1], r11
+; BE-NEXT: vmov.32 d10[1], r7
+; BE-NEXT: vrev64.32 d18, d20
+; BE-NEXT: vldr d20, [sp, #24] @ 8-byte Reload
+; BE-NEXT: vmov.32 d8[1], r10
+; BE-NEXT: vmov.32 d14[1], r6
+; BE-NEXT: vmov.32 d24[1], r1
+; BE-NEXT: vldr d22, [sp, #16] @ 8-byte Reload
+; BE-NEXT: vrev64.32 d21, d20
+; BE-NEXT: vrev64.32 d1, d9
+; BE-NEXT: vmov.32 d13[1], r9
+; BE-NEXT: vrev64.32 d31, d10
+; BE-NEXT: vrev64.32 d20, d22
+; BE-NEXT: vldr d22, [sp, #8] @ 8-byte Reload
+; BE-NEXT: vrev64.32 d0, d8
+; BE-NEXT: vrev64.32 d29, d14
+; BE-NEXT: vmov.32 d12[1], r5
+; BE-NEXT: vrev64.32 d30, d24
+; BE-NEXT: vrev64.32 d27, d22
+; BE-NEXT: vldr d22, [sp] @ 8-byte Reload
+; BE-NEXT: vst1.64 {d0, d1}, [r0:128]!
+; BE-NEXT: vmov.32 d11[1], r8
+; BE-NEXT: vrev64.32 d28, d13
+; BE-NEXT: vst1.64 {d30, d31}, [r0:128]!
+; BE-NEXT: vrev64.32 d26, d22
+; BE-NEXT: vrev64.32 d23, d12
+; BE-NEXT: vst1.64 {d28, d29}, [r0:128]!
+; BE-NEXT: vrev64.32 d22, d11
+; BE-NEXT: vst1.64 {d26, d27}, [r0:128]
+; BE-NEXT: vst1.64 {d20, d21}, [r4:128]!
+; BE-NEXT: vst1.64 {d22, d23}, [r4:128]!
+; BE-NEXT: vst1.64 {d18, d19}, [r4:128]!
+; BE-NEXT: vst1.64 {d16, d17}, [r4:128]
+; BE-NEXT: add sp, sp, #56
+; BE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT: add sp, sp, #4
+; BE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-NEON-LABEL: llrint_v16f128:
+; BE-NEON: @ %bb.0:
+; BE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEON-NEXT: .pad #4
+; BE-NEON-NEXT: sub sp, sp, #4
+; BE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT: .pad #56
+; BE-NEON-NEXT: sub sp, sp, #56
+; BE-NEON-NEXT: mov r5, r3
+; BE-NEON-NEXT: add r3, sp, #376
+; BE-NEON-NEXT: mov r6, r2
+; BE-NEON-NEXT: mov r4, r0
+; BE-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: ldr r7, [sp, #392]
+; BE-NEON-NEXT: add r3, sp, #396
+; BE-NEON-NEXT: mov r9, r1
+; BE-NEON-NEXT: vmov.32 d8[0], r0
+; BE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT: mov r0, r7
+; BE-NEON-NEXT: ldr r11, [sp, #168]
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: ldr r2, [sp, #160]
+; BE-NEON-NEXT: mov r10, r1
+; BE-NEON-NEXT: ldr r3, [sp, #164]
+; BE-NEON-NEXT: vmov.32 d9[0], r0
+; BE-NEON-NEXT: mov r0, r6
+; BE-NEON-NEXT: mov r1, r5
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: add r3, sp, #172
+; BE-NEON-NEXT: mov r8, r1
+; BE-NEON-NEXT: vmov.32 d10[0], r0
+; BE-NEON-NEXT: mov r0, r11
+; BE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: add r3, sp, #220
+; BE-NEON-NEXT: vmov.32 d11[0], r0
+; BE-NEON-NEXT: ldr r0, [sp, #216]
+; BE-NEON-NEXT: mov r11, r1
+; BE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: add r3, sp, #236
+; BE-NEON-NEXT: vmov.32 d12[0], r0
+; BE-NEON-NEXT: ldr r0, [sp, #232]
+; BE-NEON-NEXT: mov r6, r1
+; BE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: add r3, sp, #252
+; BE-NEON-NEXT: vmov.32 d13[0], r0
+; BE-NEON-NEXT: ldr r0, [sp, #248]
+; BE-NEON-NEXT: mov r7, r1
+; BE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: add r3, sp, #268
+; BE-NEON-NEXT: vmov.32 d14[0], r0
+; BE-NEON-NEXT: ldr r0, [sp, #264]
+; BE-NEON-NEXT: mov r5, r1
+; BE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: vmov.32 d15[0], r0
+; BE-NEON-NEXT: ldr r0, [sp, #280]
+; BE-NEON-NEXT: ldr r2, [sp, #288]
+; BE-NEON-NEXT: vmov.32 d13[1], r7
+; BE-NEON-NEXT: ldr r7, [sp, #284]
+; BE-NEON-NEXT: ldr r3, [sp, #292]
+; BE-NEON-NEXT: vmov.32 d14[1], r5
+; BE-NEON-NEXT: ldr r5, [sp, #328]
+; BE-NEON-NEXT: vmov.32 d12[1], r6
+; BE-NEON-NEXT: ldr r6, [sp, #300]
+; BE-NEON-NEXT: vmov.32 d10[1], r8
+; BE-NEON-NEXT: ldr r8, [sp, #184]
+; BE-NEON-NEXT: vmov.32 d11[1], r11
+; BE-NEON-NEXT: vmov.32 d9[1], r10
+; BE-NEON-NEXT: vmov.32 d8[1], r9
+; BE-NEON-NEXT: vmov.32 d15[1], r1
+; BE-NEON-NEXT: mov r1, r7
+; BE-NEON-NEXT: vstr d14, [sp, #48] @ 8-byte Spill
+; BE-NEON-NEXT: vstr d13, [sp, #40] @ 8-byte Spill
+; BE-NEON-NEXT: vstr d12, [sp, #32] @ 8-byte Spill
+; BE-NEON-NEXT: vstr d11, [sp, #24] @ 8-byte Spill
+; BE-NEON-NEXT: vstr d10, [sp, #16] @ 8-byte Spill
+; BE-NEON-NEXT: vstr d9, [sp, #8] @ 8-byte Spill
+; BE-NEON-NEXT: vstr d8, [sp] @ 8-byte Spill
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: mov r10, r1
+; BE-NEON-NEXT: ldr r1, [sp, #296]
+; BE-NEON-NEXT: ldr r2, [sp, #304]
+; BE-NEON-NEXT: vmov.32 d8[0], r0
+; BE-NEON-NEXT: ldr r3, [sp, #308]
+; BE-NEON-NEXT: mov r0, r1
+; BE-NEON-NEXT: mov r1, r6
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: add r3, sp, #332
+; BE-NEON-NEXT: mov r11, r1
+; BE-NEON-NEXT: vmov.32 d9[0], r0
+; BE-NEON-NEXT: mov r0, r5
+; BE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: add r3, sp, #188
+; BE-NEON-NEXT: mov r7, r1
+; BE-NEON-NEXT: vmov.32 d10[0], r0
+; BE-NEON-NEXT: mov r0, r8
+; BE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: add r3, sp, #204
+; BE-NEON-NEXT: vmov.32 d11[0], r0
+; BE-NEON-NEXT: ldr r0, [sp, #200]
+; BE-NEON-NEXT: mov r8, r1
+; BE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: add r3, sp, #348
+; BE-NEON-NEXT: vmov.32 d12[0], r0
+; BE-NEON-NEXT: ldr r0, [sp, #344]
+; BE-NEON-NEXT: mov r5, r1
+; BE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: add r3, sp, #364
+; BE-NEON-NEXT: vmov.32 d13[0], r0
+; BE-NEON-NEXT: ldr r0, [sp, #360]
+; BE-NEON-NEXT: mov r9, r1
+; BE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: add r3, sp, #316
+; BE-NEON-NEXT: vmov.32 d14[0], r0
+; BE-NEON-NEXT: ldr r0, [sp, #312]
+; BE-NEON-NEXT: mov r6, r1
+; BE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: vldr d18, [sp, #48] @ 8-byte Reload
+; BE-NEON-NEXT: vrev64.32 d17, d15
+; BE-NEON-NEXT: vrev64.32 d16, d18
+; BE-NEON-NEXT: vldr d18, [sp, #40] @ 8-byte Reload
+; BE-NEON-NEXT: vmov.32 d24[0], r0
+; BE-NEON-NEXT: add r0, r4, #64
+; BE-NEON-NEXT: vldr d20, [sp, #32] @ 8-byte Reload
+; BE-NEON-NEXT: vrev64.32 d19, d18
+; BE-NEON-NEXT: vmov.32 d9[1], r11
+; BE-NEON-NEXT: vmov.32 d10[1], r7
+; BE-NEON-NEXT: vrev64.32 d18, d20
+; BE-NEON-NEXT: vldr d20, [sp, #24] @ 8-byte Reload
+; BE-NEON-NEXT: vmov.32 d8[1], r10
+; BE-NEON-NEXT: vmov.32 d14[1], r6
+; BE-NEON-NEXT: vmov.32 d24[1], r1
+; BE-NEON-NEXT: vldr d22, [sp, #16] @ 8-byte Reload
+; BE-NEON-NEXT: vrev64.32 d21, d20
+; BE-NEON-NEXT: vrev64.32 d1, d9
+; BE-NEON-NEXT: vmov.32 d13[1], r9
+; BE-NEON-NEXT: vrev64.32 d31, d10
+; BE-NEON-NEXT: vrev64.32 d20, d22
+; BE-NEON-NEXT: vldr d22, [sp, #8] @ 8-byte Reload
+; BE-NEON-NEXT: vrev64.32 d0, d8
+; BE-NEON-NEXT: vrev64.32 d29, d14
+; BE-NEON-NEXT: vmov.32 d12[1], r5
+; BE-NEON-NEXT: vrev64.32 d30, d24
+; BE-NEON-NEXT: vrev64.32 d27, d22
+; BE-NEON-NEXT: vldr d22, [sp] @ 8-byte Reload
+; BE-NEON-NEXT: vst1.64 {d0, d1}, [r0:128]!
+; BE-NEON-NEXT: vmov.32 d11[1], r8
+; BE-NEON-NEXT: vrev64.32 d28, d13
+; BE-NEON-NEXT: vst1.64 {d30, d31}, [r0:128]!
+; BE-NEON-NEXT: vrev64.32 d26, d22
+; BE-NEON-NEXT: vrev64.32 d23, d12
+; BE-NEON-NEXT: vst1.64 {d28, d29}, [r0:128]!
+; BE-NEON-NEXT: vrev64.32 d22, d11
+; BE-NEON-NEXT: vst1.64 {d26, d27}, [r0:128]
+; BE-NEON-NEXT: vst1.64 {d20, d21}, [r4:128]!
+; BE-NEON-NEXT: vst1.64 {d22, d23}, [r4:128]!
+; BE-NEON-NEXT: vst1.64 {d18, d19}, [r4:128]!
+; BE-NEON-NEXT: vst1.64 {d16, d17}, [r4:128]
+; BE-NEON-NEXT: add sp, sp, #56
+; BE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT: add sp, sp, #4
+; BE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+ %a = call <16 x i64> @llvm.llrint.v16i64.v16f128(<16 x fp128> %x)
+ ret <16 x i64> %a
+}
+declare <16 x i64> @llvm.llrint.v16i64.v16f128(<16 x fp128>)
+
+define <32 x i64> @llrint_v32f128(<32 x fp128> %x) {
+; LE-LABEL: llrint_v32f128:
+; LE: @ %bb.0:
+; LE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEXT: .pad #4
+; LE-NEXT: sub sp, sp, #4
+; LE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT: .pad #192
+; LE-NEXT: sub sp, sp, #192
+; LE-NEXT: str r3, [sp, #60] @ 4-byte Spill
+; LE-NEXT: add r3, sp, #688
+; LE-NEXT: str r2, [sp, #56] @ 4-byte Spill
+; LE-NEXT: mov r9, r0
+; LE-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-NEXT: bl llrintl
+; LE-NEXT: add r3, sp, #560
+; LE-NEXT: mov r4, r0
+; LE-NEXT: str r1, [sp, #64] @ 4-byte Spill
+; LE-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-NEXT: bl llrintl
+; LE-NEXT: vmov.32 d17[0], r0
+; LE-NEXT: ldr r7, [sp, #544]
+; LE-NEXT: ldr r6, [sp, #548]
+; LE-NEXT: add lr, sp, #96
+; LE-NEXT: ldr r2, [sp, #552]
+; LE-NEXT: vmov.32 d17[1], r1
+; LE-NEXT: ldr r3, [sp, #556]
+; LE-NEXT: mov r0, r7
+; LE-NEXT: mov r1, r6
+; LE-NEXT: vorr q4, q8, q8
+; LE-NEXT: ldr r5, [sp, #528]
+; LE-NEXT: vmov.32 d17[0], r4
+; LE-NEXT: ldr r10, [sp, #304]
+; LE-NEXT: ldr r8, [sp, #368]
+; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT: bl llrintl
+; LE-NEXT: add r3, sp, #532
+; LE-NEXT: vmov.32 d8[0], r0
+; LE-NEXT: mov r11, r1
+; LE-NEXT: add lr, sp, #144
+; LE-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEXT: mov r0, r5
+; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEXT: bl llrintl
+; LE-NEXT: add r3, sp, #308
+; LE-NEXT: mov r5, r1
+; LE-NEXT: add lr, sp, #176
+; LE-NEXT: vmov.32 d17[0], r0
+; LE-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEXT: mov r0, r10
+; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT: bl llrintl
+; LE-NEXT: add r3, sp, #372
+; LE-NEXT: mov r10, r1
+; LE-NEXT: vmov.32 d11[0], r0
+; LE-NEXT: mov r0, r8
+; LE-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEXT: bl llrintl
+; LE-NEXT: add r3, sp, #404
+; LE-NEXT: vmov.32 d15[0], r0
+; LE-NEXT: ldr r0, [sp, #400]
+; LE-NEXT: mov r6, r1
+; LE-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEXT: bl llrintl
+; LE-NEXT: add r3, sp, #596
+; LE-NEXT: vmov.32 d9[0], r0
+; LE-NEXT: ldr r0, [sp, #592]
+; LE-NEXT: mov r7, r1
+; LE-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEXT: bl llrintl
+; LE-NEXT: add r3, sp, #676
+; LE-NEXT: vmov.32 d13[0], r0
+; LE-NEXT: ldr r0, [sp, #672]
+; LE-NEXT: mov r4, r1
+; LE-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEXT: bl llrintl
+; LE-NEXT: add lr, sp, #96
+; LE-NEXT: vmov.32 d13[1], r4
+; LE-NEXT: str r1, [sp, #52] @ 4-byte Spill
+; LE-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #80
+; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #128
+; LE-NEXT: vmov.32 d9[1], r7
+; LE-NEXT: ldr r1, [sp, #628]
+; LE-NEXT: ldr r2, [sp, #632]
+; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #112
+; LE-NEXT: vmov.32 d15[1], r6
+; LE-NEXT: ldr r3, [sp, #636]
+; LE-NEXT: ldr r7, [sp, #64] @ 4-byte Reload
+; LE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #160
+; LE-NEXT: vmov.32 d11[1], r10
+; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #176
+; LE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #144
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: vmov.32 d18[0], r0
+; LE-NEXT: ldr r0, [sp, #624]
+; LE-NEXT: vmov.32 d16[1], r11
+; LE-NEXT: vmov.32 d9[1], r5
+; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #96
+; LE-NEXT: vmov.32 d19[1], r7
+; LE-NEXT: vstmia lr, {d18, d19} @ 16-byte Spill
+; LE-NEXT: bl llrintl
+; LE-NEXT: add r3, sp, #512
+; LE-NEXT: str r0, [sp, #48] @ 4-byte Spill
+; LE-NEXT: str r1, [sp, #64] @ 4-byte Spill
+; LE-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-NEXT: bl llrintl
+; LE-NEXT: add r3, sp, #768
+; LE-NEXT: mov r11, r0
+; LE-NEXT: str r1, [sp, #28] @ 4-byte Spill
+; LE-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-NEXT: bl llrintl
+; LE-NEXT: ldr r6, [sp, #784]
+; LE-NEXT: add r3, sp, #788
+; LE-NEXT: mov r8, r1
+; LE-NEXT: vmov.32 d14[0], r0
+; LE-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEXT: mov r0, r6
+; LE-NEXT: ldr r5, [sp, #736]
+; LE-NEXT: ldr r7, [sp, #752]
+; LE-NEXT: ldr r4, [sp, #720]
+; LE-NEXT: bl llrintl
+; LE-NEXT: add r3, sp, #740
+; LE-NEXT: mov r6, r1
+; LE-NEXT: vmov.32 d15[0], r0
+; LE-NEXT: mov r0, r5
+; LE-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEXT: bl llrintl
+; LE-NEXT: add r3, sp, #756
+; LE-NEXT: mov r5, r1
+; LE-NEXT: vmov.32 d12[0], r0
+; LE-NEXT: mov r0, r7
+; LE-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEXT: bl llrintl
+; LE-NEXT: add r3, sp, #724
+; LE-NEXT: mov r7, r1
+; LE-NEXT: vmov.32 d13[0], r0
+; LE-NEXT: mov r0, r4
+; LE-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEXT: bl llrintl
+; LE-NEXT: vmov.32 d13[1], r7
+; LE-NEXT: add lr, sp, #8
+; LE-NEXT: ldr r2, [sp, #296]
+; LE-NEXT: vmov.32 d12[1], r5
+; LE-NEXT: ldr r3, [sp, #300]
+; LE-NEXT: ldr r4, [sp, #576]
+; LE-NEXT: vmov.32 d11[0], r0
+; LE-NEXT: ldr r0, [sp, #56] @ 4-byte Reload
+; LE-NEXT: ldr r10, [sp, #384]
+; LE-NEXT: vmov.32 d15[1], r6
+; LE-NEXT: ldr r6, [sp, #352]
+; LE-NEXT: vmov.32 d14[1], r8
+; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #32
+; LE-NEXT: vmov.32 d11[1], r1
+; LE-NEXT: ldr r1, [sp, #60] @ 4-byte Reload
+; LE-NEXT: vmov.32 d8[0], r11
+; LE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #176
+; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEXT: bl llrintl
+; LE-NEXT: add lr, sp, #160
+; LE-NEXT: add r3, sp, #356
+; LE-NEXT: mov r5, r1
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEXT: vmov.32 d16[0], r0
+; LE-NEXT: mov r0, r6
+; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT: bl llrintl
+; LE-NEXT: add lr, sp, #112
+; LE-NEXT: add r3, sp, #388
+; LE-NEXT: mov r7, r1
+; LE-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload
+; LE-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEXT: vmov.32 d14[0], r0
+; LE-NEXT: mov r0, r10
+; LE-NEXT: bl llrintl
+; LE-NEXT: add lr, sp, #128
+; LE-NEXT: add r3, sp, #580
+; LE-NEXT: mov r6, r1
+; LE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEXT: vmov.32 d12[0], r0
+; LE-NEXT: mov r0, r4
+; LE-NEXT: bl llrintl
+; LE-NEXT: add lr, sp, #80
+; LE-NEXT: add r3, sp, #708
+; LE-NEXT: mov r4, r1
+; LE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEXT: vmov.32 d8[0], r0
+; LE-NEXT: ldr r0, [sp, #704]
+; LE-NEXT: bl llrintl
+; LE-NEXT: vmov.32 d8[1], r4
+; LE-NEXT: add lr, sp, #80
+; LE-NEXT: ldr r2, [sp, #52] @ 4-byte Reload
+; LE-NEXT: vmov.32 d12[1], r6
+; LE-NEXT: ldr r6, [sp, #644]
+; LE-NEXT: ldr r3, [sp, #652]
+; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #128
+; LE-NEXT: vmov.32 d14[1], r7
+; LE-NEXT: ldr r4, [sp, #480]
+; LE-NEXT: ldr r7, [sp, #656]
+; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #112
+; LE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #160
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: vmov.32 d10[0], r0
+; LE-NEXT: ldr r0, [sp, #28] @ 4-byte Reload
+; LE-NEXT: ldr r10, [sp, #496]
+; LE-NEXT: vmov.32 d16[1], r5
+; LE-NEXT: add r5, r9, #192
+; LE-NEXT: ldr r8, [sp, #608]
+; LE-NEXT: vmov.32 d10[1], r1
+; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #176
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: vmov.32 d16[1], r0
+; LE-NEXT: ldr r0, [sp, #640]
+; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #96
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #8
+; LE-NEXT: vmov.32 d16[1], r2
+; LE-NEXT: ldr r2, [sp, #648]
+; LE-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; LE-NEXT: vst1.64 {d10, d11}, [r5:128]!
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; LE-NEXT: ldr r1, [sp, #48] @ 4-byte Reload
+; LE-NEXT: vmov.32 d9[0], r1
+; LE-NEXT: mov r1, r6
+; LE-NEXT: bl llrintl
+; LE-NEXT: add r3, sp, #660
+; LE-NEXT: mov r11, r1
+; LE-NEXT: vmov.32 d12[0], r0
+; LE-NEXT: mov r0, r7
+; LE-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEXT: bl llrintl
+; LE-NEXT: add r3, sp, #484
+; LE-NEXT: mov r7, r1
+; LE-NEXT: vmov.32 d13[0], r0
+; LE-NEXT: mov r0, r4
+; LE-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEXT: bl llrintl
+; LE-NEXT: add r3, sp, #500
+; LE-NEXT: mov r6, r1
+; LE-NEXT: vmov.32 d10[0], r0
+; LE-NEXT: mov r0, r10
+; LE-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEXT: bl llrintl
+; LE-NEXT: add r3, sp, #612
+; LE-NEXT: mov r4, r1
+; LE-NEXT: vmov.32 d11[0], r0
+; LE-NEXT: mov r0, r8
+; LE-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEXT: bl llrintl
+; LE-NEXT: vmov.32 d8[0], r0
+; LE-NEXT: ldr r0, [sp, #64] @ 4-byte Reload
+; LE-NEXT: add lr, sp, #96
+; LE-NEXT: add r8, r9, #128
+; LE-NEXT: vmov.32 d13[1], r7
+; LE-NEXT: ldr r2, [sp, #344]
+; LE-NEXT: ldr r3, [sp, #348]
+; LE-NEXT: vmov.32 d12[1], r11
+; LE-NEXT: ldr r7, [sp, #452]
+; LE-NEXT: ldr r10, [sp, #416]
+; LE-NEXT: vmov.32 d9[1], r0
+; LE-NEXT: ldr r0, [sp, #336]
+; LE-NEXT: vmov.32 d8[1], r1
+; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #64
+; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEXT: add lr, sp, #32
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #144
+; LE-NEXT: vmov.32 d11[1], r4
+; LE-NEXT: ldr r4, [sp, #340]
+; LE-NEXT: vst1.64 {d16, d17}, [r5:128]
+; LE-NEXT: mov r1, r4
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #80
+; LE-NEXT: vmov.32 d10[1], r6
+; LE-NEXT: ldr r6, [sp, #448]
+; LE-NEXT: vst1.64 {d16, d17}, [r8:128]!
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: vst1.64 {d16, d17}, [r8:128]!
+; LE-NEXT: bl llrintl
+; LE-NEXT: ldr r2, [sp, #456]
+; LE-NEXT: mov r11, r1
+; LE-NEXT: ldr r3, [sp, #460]
+; LE-NEXT: vmov.32 d15[0], r0
+; LE-NEXT: mov r0, r6
+; LE-NEXT: mov r1, r7
+; LE-NEXT: ldr r5, [sp, #432]
+; LE-NEXT: bl llrintl
+; LE-NEXT: add r3, sp, #468
+; LE-NEXT: vmov.32 d12[0], r0
+; LE-NEXT: ldr r0, [sp, #464]
+; LE-NEXT: mov r6, r1
+; LE-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEXT: bl llrintl
+; LE-NEXT: add r3, sp, #420
+; LE-NEXT: mov r7, r1
+; LE-NEXT: vmov.32 d13[0], r0
+; LE-NEXT: mov r0, r10
+; LE-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEXT: bl llrintl
+; LE-NEXT: add r3, sp, #436
+; LE-NEXT: mov r4, r1
+; LE-NEXT: vmov.32 d8[0], r0
+; LE-NEXT: mov r0, r5
+; LE-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEXT: bl llrintl
+; LE-NEXT: add r3, sp, #324
+; LE-NEXT: vmov.32 d9[0], r0
+; LE-NEXT: ldr r0, [sp, #320]
+; LE-NEXT: mov r5, r1
+; LE-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEXT: bl llrintl
+; LE-NEXT: add lr, sp, #64
+; LE-NEXT: vmov.32 d9[1], r5
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #96
+; LE-NEXT: vmov.32 d13[1], r7
+; LE-NEXT: vst1.64 {d16, d17}, [r8:128]!
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #176
+; LE-NEXT: vmov.32 d8[1], r4
+; LE-NEXT: vmov.32 d12[1], r6
+; LE-NEXT: vmov.32 d14[0], r0
+; LE-NEXT: add r0, r9, #64
+; LE-NEXT: vst1.64 {d16, d17}, [r8:128]
+; LE-NEXT: vst1.64 {d8, d9}, [r0:128]!
+; LE-NEXT: vst1.64 {d12, d13}, [r0:128]!
+; LE-NEXT: vst1.64 {d10, d11}, [r0:128]!
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #160
+; LE-NEXT: vmov.32 d15[1], r11
+; LE-NEXT: vst1.64 {d16, d17}, [r0:128]
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #112
+; LE-NEXT: vmov.32 d14[1], r1
+; LE-NEXT: vst1.64 {d16, d17}, [r9:128]!
+; LE-NEXT: vst1.64 {d14, d15}, [r9:128]!
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: add lr, sp, #128
+; LE-NEXT: vst1.64 {d16, d17}, [r9:128]!
+; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT: vst1.64 {d16, d17}, [r9:128]
+; LE-NEXT: add sp, sp, #192
+; LE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT: add sp, sp, #4
+; LE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; LE-NEON-LABEL: llrint_v32f128:
+; LE-NEON: @ %bb.0:
+; LE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEON-NEXT: .pad #4
+; LE-NEON-NEXT: sub sp, sp, #4
+; LE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT: .pad #192
+; LE-NEON-NEXT: sub sp, sp, #192
+; LE-NEON-NEXT: str r3, [sp, #60] @ 4-byte Spill
+; LE-NEON-NEXT: add r3, sp, #688
+; LE-NEON-NEXT: str r2, [sp, #56] @ 4-byte Spill
+; LE-NEON-NEXT: mov r9, r0
+; LE-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: add r3, sp, #560
+; LE-NEON-NEXT: mov r4, r0
+; LE-NEON-NEXT: str r1, [sp, #64] @ 4-byte Spill
+; LE-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: vmov.32 d17[0], r0
+; LE-NEON-NEXT: ldr r7, [sp, #544]
+; LE-NEON-NEXT: ldr r6, [sp, #548]
+; LE-NEON-NEXT: add lr, sp, #96
+; LE-NEON-NEXT: ldr r2, [sp, #552]
+; LE-NEON-NEXT: vmov.32 d17[1], r1
+; LE-NEON-NEXT: ldr r3, [sp, #556]
+; LE-NEON-NEXT: mov r0, r7
+; LE-NEON-NEXT: mov r1, r6
+; LE-NEON-NEXT: vorr q4, q8, q8
+; LE-NEON-NEXT: ldr r5, [sp, #528]
+; LE-NEON-NEXT: vmov.32 d17[0], r4
+; LE-NEON-NEXT: ldr r10, [sp, #304]
+; LE-NEON-NEXT: ldr r8, [sp, #368]
+; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: add r3, sp, #532
+; LE-NEON-NEXT: vmov.32 d8[0], r0
+; LE-NEON-NEXT: mov r11, r1
+; LE-NEON-NEXT: add lr, sp, #144
+; LE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT: mov r0, r5
+; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: add r3, sp, #308
+; LE-NEON-NEXT: mov r5, r1
+; LE-NEON-NEXT: add lr, sp, #176
+; LE-NEON-NEXT: vmov.32 d17[0], r0
+; LE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT: mov r0, r10
+; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: add r3, sp, #372
+; LE-NEON-NEXT: mov r10, r1
+; LE-NEON-NEXT: vmov.32 d11[0], r0
+; LE-NEON-NEXT: mov r0, r8
+; LE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: add r3, sp, #404
+; LE-NEON-NEXT: vmov.32 d15[0], r0
+; LE-NEON-NEXT: ldr r0, [sp, #400]
+; LE-NEON-NEXT: mov r6, r1
+; LE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: add r3, sp, #596
+; LE-NEON-NEXT: vmov.32 d9[0], r0
+; LE-NEON-NEXT: ldr r0, [sp, #592]
+; LE-NEON-NEXT: mov r7, r1
+; LE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: add r3, sp, #676
+; LE-NEON-NEXT: vmov.32 d13[0], r0
+; LE-NEON-NEXT: ldr r0, [sp, #672]
+; LE-NEON-NEXT: mov r4, r1
+; LE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: add lr, sp, #96
+; LE-NEON-NEXT: vmov.32 d13[1], r4
+; LE-NEON-NEXT: str r1, [sp, #52] @ 4-byte Spill
+; LE-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #80
+; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #128
+; LE-NEON-NEXT: vmov.32 d9[1], r7
+; LE-NEON-NEXT: ldr r1, [sp, #628]
+; LE-NEON-NEXT: ldr r2, [sp, #632]
+; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #112
+; LE-NEON-NEXT: vmov.32 d15[1], r6
+; LE-NEON-NEXT: ldr r3, [sp, #636]
+; LE-NEON-NEXT: ldr r7, [sp, #64] @ 4-byte Reload
+; LE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #160
+; LE-NEON-NEXT: vmov.32 d11[1], r10
+; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #176
+; LE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #144
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: vmov.32 d18[0], r0
+; LE-NEON-NEXT: ldr r0, [sp, #624]
+; LE-NEON-NEXT: vmov.32 d16[1], r11
+; LE-NEON-NEXT: vmov.32 d9[1], r5
+; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #96
+; LE-NEON-NEXT: vmov.32 d19[1], r7
+; LE-NEON-NEXT: vstmia lr, {d18, d19} @ 16-byte Spill
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: add r3, sp, #512
+; LE-NEON-NEXT: str r0, [sp, #48] @ 4-byte Spill
+; LE-NEON-NEXT: str r1, [sp, #64] @ 4-byte Spill
+; LE-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: add r3, sp, #768
+; LE-NEON-NEXT: mov r11, r0
+; LE-NEON-NEXT: str r1, [sp, #28] @ 4-byte Spill
+; LE-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: ldr r6, [sp, #784]
+; LE-NEON-NEXT: add r3, sp, #788
+; LE-NEON-NEXT: mov r8, r1
+; LE-NEON-NEXT: vmov.32 d14[0], r0
+; LE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT: mov r0, r6
+; LE-NEON-NEXT: ldr r5, [sp, #736]
+; LE-NEON-NEXT: ldr r7, [sp, #752]
+; LE-NEON-NEXT: ldr r4, [sp, #720]
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: add r3, sp, #740
+; LE-NEON-NEXT: mov r6, r1
+; LE-NEON-NEXT: vmov.32 d15[0], r0
+; LE-NEON-NEXT: mov r0, r5
+; LE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: add r3, sp, #756
+; LE-NEON-NEXT: mov r5, r1
+; LE-NEON-NEXT: vmov.32 d12[0], r0
+; LE-NEON-NEXT: mov r0, r7
+; LE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: add r3, sp, #724
+; LE-NEON-NEXT: mov r7, r1
+; LE-NEON-NEXT: vmov.32 d13[0], r0
+; LE-NEON-NEXT: mov r0, r4
+; LE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: vmov.32 d13[1], r7
+; LE-NEON-NEXT: add lr, sp, #8
+; LE-NEON-NEXT: ldr r2, [sp, #296]
+; LE-NEON-NEXT: vmov.32 d12[1], r5
+; LE-NEON-NEXT: ldr r3, [sp, #300]
+; LE-NEON-NEXT: ldr r4, [sp, #576]
+; LE-NEON-NEXT: vmov.32 d11[0], r0
+; LE-NEON-NEXT: ldr r0, [sp, #56] @ 4-byte Reload
+; LE-NEON-NEXT: ldr r10, [sp, #384]
+; LE-NEON-NEXT: vmov.32 d15[1], r6
+; LE-NEON-NEXT: ldr r6, [sp, #352]
+; LE-NEON-NEXT: vmov.32 d14[1], r8
+; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #32
+; LE-NEON-NEXT: vmov.32 d11[1], r1
+; LE-NEON-NEXT: ldr r1, [sp, #60] @ 4-byte Reload
+; LE-NEON-NEXT: vmov.32 d8[0], r11
+; LE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #176
+; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: add lr, sp, #160
+; LE-NEON-NEXT: add r3, sp, #356
+; LE-NEON-NEXT: mov r5, r1
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT: vmov.32 d16[0], r0
+; LE-NEON-NEXT: mov r0, r6
+; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: add lr, sp, #112
+; LE-NEON-NEXT: add r3, sp, #388
+; LE-NEON-NEXT: mov r7, r1
+; LE-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload
+; LE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT: vmov.32 d14[0], r0
+; LE-NEON-NEXT: mov r0, r10
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: add lr, sp, #128
+; LE-NEON-NEXT: add r3, sp, #580
+; LE-NEON-NEXT: mov r6, r1
+; LE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT: vmov.32 d12[0], r0
+; LE-NEON-NEXT: mov r0, r4
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: add lr, sp, #80
+; LE-NEON-NEXT: add r3, sp, #708
+; LE-NEON-NEXT: mov r4, r1
+; LE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT: vmov.32 d8[0], r0
+; LE-NEON-NEXT: ldr r0, [sp, #704]
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: vmov.32 d8[1], r4
+; LE-NEON-NEXT: add lr, sp, #80
+; LE-NEON-NEXT: ldr r2, [sp, #52] @ 4-byte Reload
+; LE-NEON-NEXT: vmov.32 d12[1], r6
+; LE-NEON-NEXT: ldr r6, [sp, #644]
+; LE-NEON-NEXT: ldr r3, [sp, #652]
+; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #128
+; LE-NEON-NEXT: vmov.32 d14[1], r7
+; LE-NEON-NEXT: ldr r4, [sp, #480]
+; LE-NEON-NEXT: ldr r7, [sp, #656]
+; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #112
+; LE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #160
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: vmov.32 d10[0], r0
+; LE-NEON-NEXT: ldr r0, [sp, #28] @ 4-byte Reload
+; LE-NEON-NEXT: ldr r10, [sp, #496]
+; LE-NEON-NEXT: vmov.32 d16[1], r5
+; LE-NEON-NEXT: add r5, r9, #192
+; LE-NEON-NEXT: ldr r8, [sp, #608]
+; LE-NEON-NEXT: vmov.32 d10[1], r1
+; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #176
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: vmov.32 d16[1], r0
+; LE-NEON-NEXT: ldr r0, [sp, #640]
+; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #96
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #8
+; LE-NEON-NEXT: vmov.32 d16[1], r2
+; LE-NEON-NEXT: ldr r2, [sp, #648]
+; LE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; LE-NEON-NEXT: vst1.64 {d10, d11}, [r5:128]!
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; LE-NEON-NEXT: ldr r1, [sp, #48] @ 4-byte Reload
+; LE-NEON-NEXT: vmov.32 d9[0], r1
+; LE-NEON-NEXT: mov r1, r6
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: add r3, sp, #660
+; LE-NEON-NEXT: mov r11, r1
+; LE-NEON-NEXT: vmov.32 d12[0], r0
+; LE-NEON-NEXT: mov r0, r7
+; LE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: add r3, sp, #484
+; LE-NEON-NEXT: mov r7, r1
+; LE-NEON-NEXT: vmov.32 d13[0], r0
+; LE-NEON-NEXT: mov r0, r4
+; LE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: add r3, sp, #500
+; LE-NEON-NEXT: mov r6, r1
+; LE-NEON-NEXT: vmov.32 d10[0], r0
+; LE-NEON-NEXT: mov r0, r10
+; LE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: add r3, sp, #612
+; LE-NEON-NEXT: mov r4, r1
+; LE-NEON-NEXT: vmov.32 d11[0], r0
+; LE-NEON-NEXT: mov r0, r8
+; LE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: vmov.32 d8[0], r0
+; LE-NEON-NEXT: ldr r0, [sp, #64] @ 4-byte Reload
+; LE-NEON-NEXT: add lr, sp, #96
+; LE-NEON-NEXT: add r8, r9, #128
+; LE-NEON-NEXT: vmov.32 d13[1], r7
+; LE-NEON-NEXT: ldr r2, [sp, #344]
+; LE-NEON-NEXT: ldr r3, [sp, #348]
+; LE-NEON-NEXT: vmov.32 d12[1], r11
+; LE-NEON-NEXT: ldr r7, [sp, #452]
+; LE-NEON-NEXT: ldr r10, [sp, #416]
+; LE-NEON-NEXT: vmov.32 d9[1], r0
+; LE-NEON-NEXT: ldr r0, [sp, #336]
+; LE-NEON-NEXT: vmov.32 d8[1], r1
+; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #64
+; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEON-NEXT: add lr, sp, #32
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #144
+; LE-NEON-NEXT: vmov.32 d11[1], r4
+; LE-NEON-NEXT: ldr r4, [sp, #340]
+; LE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]
+; LE-NEON-NEXT: mov r1, r4
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #80
+; LE-NEON-NEXT: vmov.32 d10[1], r6
+; LE-NEON-NEXT: ldr r6, [sp, #448]
+; LE-NEON-NEXT: vst1.64 {d16, d17}, [r8:128]!
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: vst1.64 {d16, d17}, [r8:128]!
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: ldr r2, [sp, #456]
+; LE-NEON-NEXT: mov r11, r1
+; LE-NEON-NEXT: ldr r3, [sp, #460]
+; LE-NEON-NEXT: vmov.32 d15[0], r0
+; LE-NEON-NEXT: mov r0, r6
+; LE-NEON-NEXT: mov r1, r7
+; LE-NEON-NEXT: ldr r5, [sp, #432]
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: add r3, sp, #468
+; LE-NEON-NEXT: vmov.32 d12[0], r0
+; LE-NEON-NEXT: ldr r0, [sp, #464]
+; LE-NEON-NEXT: mov r6, r1
+; LE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: add r3, sp, #420
+; LE-NEON-NEXT: mov r7, r1
+; LE-NEON-NEXT: vmov.32 d13[0], r0
+; LE-NEON-NEXT: mov r0, r10
+; LE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: add r3, sp, #436
+; LE-NEON-NEXT: mov r4, r1
+; LE-NEON-NEXT: vmov.32 d8[0], r0
+; LE-NEON-NEXT: mov r0, r5
+; LE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: add r3, sp, #324
+; LE-NEON-NEXT: vmov.32 d9[0], r0
+; LE-NEON-NEXT: ldr r0, [sp, #320]
+; LE-NEON-NEXT: mov r5, r1
+; LE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT: bl llrintl
+; LE-NEON-NEXT: add lr, sp, #64
+; LE-NEON-NEXT: vmov.32 d9[1], r5
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #96
+; LE-NEON-NEXT: vmov.32 d13[1], r7
+; LE-NEON-NEXT: vst1.64 {d16, d17}, [r8:128]!
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #176
+; LE-NEON-NEXT: vmov.32 d8[1], r4
+; LE-NEON-NEXT: vmov.32 d12[1], r6
+; LE-NEON-NEXT: vmov.32 d14[0], r0
+; LE-NEON-NEXT: add r0, r9, #64
+; LE-NEON-NEXT: vst1.64 {d16, d17}, [r8:128]
+; LE-NEON-NEXT: vst1.64 {d8, d9}, [r0:128]!
+; LE-NEON-NEXT: vst1.64 {d12, d13}, [r0:128]!
+; LE-NEON-NEXT: vst1.64 {d10, d11}, [r0:128]!
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #160
+; LE-NEON-NEXT: vmov.32 d15[1], r11
+; LE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #112
+; LE-NEON-NEXT: vmov.32 d14[1], r1
+; LE-NEON-NEXT: vst1.64 {d16, d17}, [r9:128]!
+; LE-NEON-NEXT: vst1.64 {d14, d15}, [r9:128]!
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: add lr, sp, #128
+; LE-NEON-NEXT: vst1.64 {d16, d17}, [r9:128]!
+; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT: vst1.64 {d16, d17}, [r9:128]
+; LE-NEON-NEXT: add sp, sp, #192
+; LE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT: add sp, sp, #4
+; LE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-LABEL: llrint_v32f128:
+; BE: @ %bb.0:
+; BE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEXT: .pad #4
+; BE-NEXT: sub sp, sp, #4
+; BE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT: .pad #152
+; BE-NEXT: sub sp, sp, #152
+; BE-NEXT: str r3, [sp, #120] @ 4-byte Spill
+; BE-NEXT: add r3, sp, #712
+; BE-NEXT: str r2, [sp, #112] @ 4-byte Spill
+; BE-NEXT: mov r9, r0
+; BE-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-NEXT: bl llrintl
+; BE-NEXT: ldr r7, [sp, #648]
+; BE-NEXT: add r3, sp, #652
+; BE-NEXT: mov r5, r1
+; BE-NEXT: vmov.32 d9[0], r0
+; BE-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEXT: mov r0, r7
+; BE-NEXT: ldr r6, [sp, #520]
+; BE-NEXT: ldr r8, [sp, #632]
+; BE-NEXT: bl llrintl
+; BE-NEXT: add r3, sp, #524
+; BE-NEXT: mov r7, r1
+; BE-NEXT: vmov.32 d10[0], r0
+; BE-NEXT: mov r0, r6
+; BE-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEXT: bl llrintl
+; BE-NEXT: add r3, sp, #636
+; BE-NEXT: mov r4, r1
+; BE-NEXT: vmov.32 d8[0], r0
+; BE-NEXT: mov r0, r8
+; BE-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEXT: bl llrintl
+; BE-NEXT: mov r6, r1
+; BE-NEXT: vmov.32 d11[0], r0
+; BE-NEXT: ldr r0, [sp, #488]
+; BE-NEXT: vmov.32 d8[1], r4
+; BE-NEXT: ldr r1, [sp, #492]
+; BE-NEXT: ldr r2, [sp, #496]
+; BE-NEXT: vmov.32 d10[1], r7
+; BE-NEXT: ldr r3, [sp, #500]
+; BE-NEXT: vmov.32 d9[1], r5
+; BE-NEXT: vstr d8, [sp, #144] @ 8-byte Spill
+; BE-NEXT: vstr d10, [sp, #136] @ 8-byte Spill
+; BE-NEXT: vstr d9, [sp, #128] @ 8-byte Spill
+; BE-NEXT: bl llrintl
+; BE-NEXT: add r3, sp, #680
+; BE-NEXT: str r0, [sp, #104] @ 4-byte Spill
+; BE-NEXT: str r1, [sp, #88] @ 4-byte Spill
+; BE-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-NEXT: bl llrintl
+; BE-NEXT: vmov.32 d16[0], r0
+; BE-NEXT: ldr r0, [sp, #728]
+; BE-NEXT: ldr r2, [sp, #736]
+; BE-NEXT: vmov.32 d11[1], r6
+; BE-NEXT: ldr r6, [sp, #732]
+; BE-NEXT: ldr r3, [sp, #740]
+; BE-NEXT: vmov.32 d16[1], r1
+; BE-NEXT: ldr r5, [sp, #504]
+; BE-NEXT: mov r1, r6
+; BE-NEXT: ldr r7, [sp, #744]
+; BE-NEXT: ldr r4, [sp, #748]
+; BE-NEXT: vstr d11, [sp, #24] @ 8-byte Spill
+; BE-NEXT: vstr d16, [sp, #8] @ 8-byte Spill
+; BE-NEXT: bl llrintl
+; BE-NEXT: ldr r2, [sp, #752]
+; BE-NEXT: mov r11, r1
+; BE-NEXT: ldr r3, [sp, #756]
+; BE-NEXT: vmov.32 d13[0], r0
+; BE-NEXT: mov r0, r7
+; BE-NEXT: mov r1, r4
+; BE-NEXT: ldr r10, [sp, #552]
+; BE-NEXT: ldr r6, [sp, #664]
+; BE-NEXT: bl llrintl
+; BE-NEXT: add r3, sp, #508
+; BE-NEXT: mov r8, r1
+; BE-NEXT: vmov.32 d8[0], r0
+; BE-NEXT: mov r0, r5
+; BE-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEXT: bl llrintl
+; BE-NEXT: add r3, sp, #540
+; BE-NEXT: vmov.32 d10[0], r0
+; BE-NEXT: ldr r0, [sp, #536]
+; BE-NEXT: mov r7, r1
+; BE-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEXT: bl llrintl
+; BE-NEXT: add r3, sp, #556
+; BE-NEXT: mov r5, r1
+; BE-NEXT: vmov.32 d12[0], r0
+; BE-NEXT: mov r0, r10
+; BE-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEXT: bl llrintl
+; BE-NEXT: add r3, sp, #668
+; BE-NEXT: mov r4, r1
+; BE-NEXT: vmov.32 d9[0], r0
+; BE-NEXT: mov r0, r6
+; BE-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEXT: bl llrintl
+; BE-NEXT: add r3, sp, #700
+; BE-NEXT: vmov.32 d14[0], r0
+; BE-NEXT: ldr r0, [sp, #696]
+; BE-NEXT: mov r6, r1
+; BE-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEXT: bl llrintl
+; BE-NEXT: vmov.32 d11[0], r0
+; BE-NEXT: ldr r0, [sp, #104] @ 4-byte Reload
+; BE-NEXT: ldr r2, [sp, #256]
+; BE-NEXT: vmov.32 d13[1], r11
+; BE-NEXT: ldr r3, [sp, #260]
+; BE-NEXT: vmov.32 d14[1], r6
+; BE-NEXT: ldr r6, [sp, #264]
+; BE-NEXT: vmov.32 d9[1], r4
+; BE-NEXT: ldr r4, [sp, #344]
+; BE-NEXT: vmov.32 d12[1], r5
+; BE-NEXT: ldr r5, [sp, #312]
+; BE-NEXT: vmov.32 d8[1], r8
+; BE-NEXT: ldr r8, [sp, #328]
+; BE-NEXT: vmov.32 d10[1], r7
+; BE-NEXT: vstr d13, [sp, #32] @ 8-byte Spill
+; BE-NEXT: vmov.32 d11[1], r1
+; BE-NEXT: ldr r1, [sp, #120] @ 4-byte Reload
+; BE-NEXT: vmov.32 d13[0], r0
+; BE-NEXT: ldr r0, [sp, #112] @ 4-byte Reload
+; BE-NEXT: vstr d14, [sp] @ 8-byte Spill
+; BE-NEXT: vstr d9, [sp, #16] @ 8-byte Spill
+; BE-NEXT: vstr d12, [sp, #56] @ 8-byte Spill
+; BE-NEXT: vstr d10, [sp, #64] @ 8-byte Spill
+; BE-NEXT: vstr d8, [sp, #40] @ 8-byte Spill
+; BE-NEXT: bl llrintl
+; BE-NEXT: add r3, sp, #268
+; BE-NEXT: mov r11, r1
+; BE-NEXT: vmov.32 d14[0], r0
+; BE-NEXT: mov r0, r6
+; BE-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEXT: bl llrintl
+; BE-NEXT: add r3, sp, #316
+; BE-NEXT: mov r10, r1
+; BE-NEXT: vmov.32 d8[0], r0
+; BE-NEXT: mov r0, r5
+; BE-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEXT: bl llrintl
+; BE-NEXT: add r3, sp, #332
+; BE-NEXT: mov r7, r1
+; BE-NEXT: vmov.32 d12[0], r0
+; BE-NEXT: mov r0, r8
+; BE-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEXT: bl llrintl
+; BE-NEXT: add r3, sp, #348
+; BE-NEXT: mov r5, r1
+; BE-NEXT: vmov.32 d15[0], r0
+; BE-NEXT: mov r0, r4
+; BE-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEXT: bl llrintl
+; BE-NEXT: add r3, sp, #364
+; BE-NEXT: vmov.32 d9[0], r0
+; BE-NEXT: ldr r0, [sp, #360]
+; BE-NEXT: mov r4, r1
+; BE-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEXT: bl llrintl
+; BE-NEXT: add r3, sp, #476
+; BE-NEXT: vmov.32 d10[0], r0
+; BE-NEXT: ldr r0, [sp, #472]
+; BE-NEXT: mov r6, r1
+; BE-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEXT: bl llrintl
+; BE-NEXT: vmov.32 d16[0], r0
+; BE-NEXT: ldr r0, [sp, #88] @ 4-byte Reload
+; BE-NEXT: ldr r2, [sp, #592]
+; BE-NEXT: vldr d20, [sp, #136] @ 8-byte Reload
+; BE-NEXT: vmov.32 d16[1], r1
+; BE-NEXT: ldr r1, [sp, #588]
+; BE-NEXT: ldr r3, [sp, #596]
+; BE-NEXT: vldr d22, [sp, #24] @ 8-byte Reload
+; BE-NEXT: vldr d18, [sp, #8] @ 8-byte Reload
+; BE-NEXT: vrev64.32 d21, d20
+; BE-NEXT: vmov.32 d10[1], r6
+; BE-NEXT: ldr r6, [sp, #600]
+; BE-NEXT: vmov.32 d9[1], r4
+; BE-NEXT: ldr r4, [sp, #616]
+; BE-NEXT: vmov.32 d12[1], r7
+; BE-NEXT: ldr r7, [sp, #604]
+; BE-NEXT: vmov.32 d8[1], r10
+; BE-NEXT: add r10, r9, #192
+; BE-NEXT: vmov.32 d14[1], r11
+; BE-NEXT: ldr r11, [sp, #440]
+; BE-NEXT: vmov.32 d13[1], r0
+; BE-NEXT: ldr r0, [sp, #584]
+; BE-NEXT: vmov.32 d15[1], r5
+; BE-NEXT: vstr d16, [sp, #48] @ 8-byte Spill
+; BE-NEXT: vldr d16, [sp, #128] @ 8-byte Reload
+; BE-NEXT: vrev64.32 d20, d22
+; BE-NEXT: vldr d22, [sp] @ 8-byte Reload
+; BE-NEXT: vrev64.32 d19, d18
+; BE-NEXT: vrev64.32 d17, d16
+; BE-NEXT: vrev64.32 d18, d22
+; BE-NEXT: vstr d10, [sp, #120] @ 8-byte Spill
+; BE-NEXT: vstr d9, [sp, #112] @ 8-byte Spill
+; BE-NEXT: vstr d15, [sp, #104] @ 8-byte Spill
+; BE-NEXT: vstr d12, [sp, #96] @ 8-byte Spill
+; BE-NEXT: vstr d8, [sp, #80] @ 8-byte Spill
+; BE-NEXT: vstr d14, [sp, #72] @ 8-byte Spill
+; BE-NEXT: vstr d13, [sp, #88] @ 8-byte Spill
+; BE-NEXT: vst1.64 {d20, d21}, [r10:128]!
+; BE-NEXT: vrev64.32 d16, d11
+; BE-NEXT: vst1.64 {d18, d19}, [r10:128]!
+; BE-NEXT: vst1.64 {d16, d17}, [r10:128]!
+; BE-NEXT: bl llrintl
+; BE-NEXT: ldr r2, [sp, #608]
+; BE-NEXT: mov r8, r1
+; BE-NEXT: ldr r3, [sp, #612]
+; BE-NEXT: vmov.32 d8[0], r0
+; BE-NEXT: mov r0, r6
+; BE-NEXT: mov r1, r7
+; BE-NEXT: ldr r5, [sp, #456]
+; BE-NEXT: bl llrintl
+; BE-NEXT: add r3, sp, #620
+; BE-NEXT: mov r6, r1
+; BE-NEXT: vmov.32 d9[0], r0
+; BE-NEXT: mov r0, r4
+; BE-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEXT: bl llrintl
+; BE-NEXT: add r3, sp, #444
+; BE-NEXT: mov r7, r1
+; BE-NEXT: vmov.32 d10[0], r0
+; BE-NEXT: mov r0, r11
+; BE-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEXT: bl llrintl
+; BE-NEXT: add r3, sp, #460
+; BE-NEXT: mov r4, r1
+; BE-NEXT: vmov.32 d11[0], r0
+; BE-NEXT: mov r0, r5
+; BE-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEXT: bl llrintl
+; BE-NEXT: add r3, sp, #572
+; BE-NEXT: vmov.32 d13[0], r0
+; BE-NEXT: ldr r0, [sp, #568]
+; BE-NEXT: mov r5, r1
+; BE-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEXT: bl llrintl
+; BE-NEXT: vldr d16, [sp, #16] @ 8-byte Reload
+; BE-NEXT: vldr d18, [sp, #56] @ 8-byte Reload
+; BE-NEXT: vrev64.32 d17, d16
+; BE-NEXT: ldr r2, [sp, #304]
+; BE-NEXT: vrev64.32 d16, d18
+; BE-NEXT: ldr r3, [sp, #308]
+; BE-NEXT: vldr d18, [sp, #144] @ 8-byte Reload
+; BE-NEXT: vldr d20, [sp, #64] @ 8-byte Reload
+; BE-NEXT: vrev64.32 d19, d18
+; BE-NEXT: vrev64.32 d18, d20
+; BE-NEXT: vldr d20, [sp, #40] @ 8-byte Reload
+; BE-NEXT: vldr d22, [sp, #32] @ 8-byte Reload
+; BE-NEXT: vmov.32 d14[0], r0
+; BE-NEXT: ldr r0, [sp, #296]
+; BE-NEXT: vmov.32 d10[1], r7
+; BE-NEXT: ldr r7, [sp, #412]
+; BE-NEXT: vmov.32 d9[1], r6
+; BE-NEXT: ldr r6, [sp, #408]
+; BE-NEXT: vmov.32 d8[1], r8
+; BE-NEXT: add r8, r9, #128
+; BE-NEXT: vrev64.32 d21, d20
+; BE-NEXT: vmov.32 d13[1], r5
+; BE-NEXT: ldr r5, [sp, #300]
+; BE-NEXT: vrev64.32 d20, d22
+; BE-NEXT: vmov.32 d14[1], r1
+; BE-NEXT: mov r1, r5
+; BE-NEXT: vstr d10, [sp, #136] @ 8-byte Spill
+; BE-NEXT: vstr d9, [sp, #128] @ 8-byte Spill
+; BE-NEXT: vstr d8, [sp, #24] @ 8-byte Spill
+; BE-NEXT: vst1.64 {d20, d21}, [r10:128]
+; BE-NEXT: vst1.64 {d18, d19}, [r8:128]!
+; BE-NEXT: vmov.32 d11[1], r4
+; BE-NEXT: ldr r4, [sp, #424]
+; BE-NEXT: ldr r10, [sp, #376]
+; BE-NEXT: vst1.64 {d16, d17}, [r8:128]!
+; BE-NEXT: bl llrintl
+; BE-NEXT: ldr r2, [sp, #416]
+; BE-NEXT: mov r11, r1
+; BE-NEXT: ldr r3, [sp, #420]
+; BE-NEXT: vmov.32 d15[0], r0
+; BE-NEXT: mov r0, r6
+; BE-NEXT: mov r1, r7
+; BE-NEXT: ldr r5, [sp, #392]
+; BE-NEXT: bl llrintl
+; BE-NEXT: add r3, sp, #428
+; BE-NEXT: mov r6, r1
+; BE-NEXT: vmov.32 d8[0], r0
+; BE-NEXT: mov r0, r4
+; BE-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEXT: bl llrintl
+; BE-NEXT: add r3, sp, #380
+; BE-NEXT: mov r7, r1
+; BE-NEXT: vmov.32 d9[0], r0
+; BE-NEXT: mov r0, r10
+; BE-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEXT: bl llrintl
+; BE-NEXT: add r3, sp, #396
+; BE-NEXT: mov r4, r1
+; BE-NEXT: vmov.32 d12[0], r0
+; BE-NEXT: mov r0, r5
+; BE-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEXT: bl llrintl
+; BE-NEXT: add r3, sp, #284
+; BE-NEXT: vmov.32 d10[0], r0
+; BE-NEXT: ldr r0, [sp, #280]
+; BE-NEXT: mov r5, r1
+; BE-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEXT: bl llrintl
+; BE-NEXT: vldr d16, [sp, #120] @ 8-byte Reload
+; BE-NEXT: vldr d18, [sp, #112] @ 8-byte Reload
+; BE-NEXT: vrev64.32 d17, d16
+; BE-NEXT: vldr d26, [sp, #136] @ 8-byte Reload
+; BE-NEXT: vrev64.32 d16, d18
+; BE-NEXT: vldr d18, [sp, #104] @ 8-byte Reload
+; BE-NEXT: vrev64.32 d31, d26
+; BE-NEXT: vldr d26, [sp, #128] @ 8-byte Reload
+; BE-NEXT: vldr d20, [sp, #96] @ 8-byte Reload
+; BE-NEXT: vrev64.32 d19, d18
+; BE-NEXT: vrev64.32 d18, d20
+; BE-NEXT: vldr d20, [sp, #80] @ 8-byte Reload
+; BE-NEXT: vrev64.32 d30, d26
+; BE-NEXT: vldr d26, [sp, #24] @ 8-byte Reload
+; BE-NEXT: vmov.32 d10[1], r5
+; BE-NEXT: vldr d22, [sp, #72] @ 8-byte Reload
+; BE-NEXT: vrev64.32 d21, d20
+; BE-NEXT: vrev64.32 d1, d26
+; BE-NEXT: vmov.32 d9[1], r7
+; BE-NEXT: vmov.32 d12[1], r4
+; BE-NEXT: vrev64.32 d20, d22
+; BE-NEXT: vldr d22, [sp, #88] @ 8-byte Reload
+; BE-NEXT: vmov.32 d8[1], r6
+; BE-NEXT: vrev64.32 d0, d14
+; BE-NEXT: vmov.32 d28[0], r0
+; BE-NEXT: add r0, r9, #64
+; BE-NEXT: vrev64.32 d3, d10
+; BE-NEXT: vldr d24, [sp, #48] @ 8-byte Reload
+; BE-NEXT: vrev64.32 d23, d22
+; BE-NEXT: vrev64.32 d5, d9
+; BE-NEXT: vst1.64 {d0, d1}, [r8:128]!
+; BE-NEXT: vrev64.32 d2, d12
+; BE-NEXT: vmov.32 d15[1], r11
+; BE-NEXT: vrev64.32 d22, d24
+; BE-NEXT: vrev64.32 d25, d13
+; BE-NEXT: vrev64.32 d4, d8
+; BE-NEXT: vst1.64 {d30, d31}, [r8:128]
+; BE-NEXT: vst1.64 {d2, d3}, [r0:128]!
+; BE-NEXT: vmov.32 d28[1], r1
+; BE-NEXT: vrev64.32 d24, d11
+; BE-NEXT: vst1.64 {d4, d5}, [r0:128]!
+; BE-NEXT: vrev64.32 d27, d15
+; BE-NEXT: vst1.64 {d24, d25}, [r0:128]!
+; BE-NEXT: vrev64.32 d26, d28
+; BE-NEXT: vst1.64 {d22, d23}, [r0:128]
+; BE-NEXT: vst1.64 {d20, d21}, [r9:128]!
+; BE-NEXT: vst1.64 {d26, d27}, [r9:128]!
+; BE-NEXT: vst1.64 {d18, d19}, [r9:128]!
+; BE-NEXT: vst1.64 {d16, d17}, [r9:128]
+; BE-NEXT: add sp, sp, #152
+; BE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT: add sp, sp, #4
+; BE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-NEON-LABEL: llrint_v32f128:
+; BE-NEON: @ %bb.0:
+; BE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEON-NEXT: .pad #4
+; BE-NEON-NEXT: sub sp, sp, #4
+; BE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT: .pad #152
+; BE-NEON-NEXT: sub sp, sp, #152
+; BE-NEON-NEXT: str r3, [sp, #120] @ 4-byte Spill
+; BE-NEON-NEXT: add r3, sp, #712
+; BE-NEON-NEXT: str r2, [sp, #112] @ 4-byte Spill
+; BE-NEON-NEXT: mov r9, r0
+; BE-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: ldr r7, [sp, #648]
+; BE-NEON-NEXT: add r3, sp, #652
+; BE-NEON-NEXT: mov r5, r1
+; BE-NEON-NEXT: vmov.32 d9[0], r0
+; BE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT: mov r0, r7
+; BE-NEON-NEXT: ldr r6, [sp, #520]
+; BE-NEON-NEXT: ldr r8, [sp, #632]
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: add r3, sp, #524
+; BE-NEON-NEXT: mov r7, r1
+; BE-NEON-NEXT: vmov.32 d10[0], r0
+; BE-NEON-NEXT: mov r0, r6
+; BE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: add r3, sp, #636
+; BE-NEON-NEXT: mov r4, r1
+; BE-NEON-NEXT: vmov.32 d8[0], r0
+; BE-NEON-NEXT: mov r0, r8
+; BE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: mov r6, r1
+; BE-NEON-NEXT: vmov.32 d11[0], r0
+; BE-NEON-NEXT: ldr r0, [sp, #488]
+; BE-NEON-NEXT: vmov.32 d8[1], r4
+; BE-NEON-NEXT: ldr r1, [sp, #492]
+; BE-NEON-NEXT: ldr r2, [sp, #496]
+; BE-NEON-NEXT: vmov.32 d10[1], r7
+; BE-NEON-NEXT: ldr r3, [sp, #500]
+; BE-NEON-NEXT: vmov.32 d9[1], r5
+; BE-NEON-NEXT: vstr d8, [sp, #144] @ 8-byte Spill
+; BE-NEON-NEXT: vstr d10, [sp, #136] @ 8-byte Spill
+; BE-NEON-NEXT: vstr d9, [sp, #128] @ 8-byte Spill
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: add r3, sp, #680
+; BE-NEON-NEXT: str r0, [sp, #104] @ 4-byte Spill
+; BE-NEON-NEXT: str r1, [sp, #88] @ 4-byte Spill
+; BE-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: vmov.32 d16[0], r0
+; BE-NEON-NEXT: ldr r0, [sp, #728]
+; BE-NEON-NEXT: ldr r2, [sp, #736]
+; BE-NEON-NEXT: vmov.32 d11[1], r6
+; BE-NEON-NEXT: ldr r6, [sp, #732]
+; BE-NEON-NEXT: ldr r3, [sp, #740]
+; BE-NEON-NEXT: vmov.32 d16[1], r1
+; BE-NEON-NEXT: ldr r5, [sp, #504]
+; BE-NEON-NEXT: mov r1, r6
+; BE-NEON-NEXT: ldr r7, [sp, #744]
+; BE-NEON-NEXT: ldr r4, [sp, #748]
+; BE-NEON-NEXT: vstr d11, [sp, #24] @ 8-byte Spill
+; BE-NEON-NEXT: vstr d16, [sp, #8] @ 8-byte Spill
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: ldr r2, [sp, #752]
+; BE-NEON-NEXT: mov r11, r1
+; BE-NEON-NEXT: ldr r3, [sp, #756]
+; BE-NEON-NEXT: vmov.32 d13[0], r0
+; BE-NEON-NEXT: mov r0, r7
+; BE-NEON-NEXT: mov r1, r4
+; BE-NEON-NEXT: ldr r10, [sp, #552]
+; BE-NEON-NEXT: ldr r6, [sp, #664]
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: add r3, sp, #508
+; BE-NEON-NEXT: mov r8, r1
+; BE-NEON-NEXT: vmov.32 d8[0], r0
+; BE-NEON-NEXT: mov r0, r5
+; BE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: add r3, sp, #540
+; BE-NEON-NEXT: vmov.32 d10[0], r0
+; BE-NEON-NEXT: ldr r0, [sp, #536]
+; BE-NEON-NEXT: mov r7, r1
+; BE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: add r3, sp, #556
+; BE-NEON-NEXT: mov r5, r1
+; BE-NEON-NEXT: vmov.32 d12[0], r0
+; BE-NEON-NEXT: mov r0, r10
+; BE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: add r3, sp, #668
+; BE-NEON-NEXT: mov r4, r1
+; BE-NEON-NEXT: vmov.32 d9[0], r0
+; BE-NEON-NEXT: mov r0, r6
+; BE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: add r3, sp, #700
+; BE-NEON-NEXT: vmov.32 d14[0], r0
+; BE-NEON-NEXT: ldr r0, [sp, #696]
+; BE-NEON-NEXT: mov r6, r1
+; BE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: vmov.32 d11[0], r0
+; BE-NEON-NEXT: ldr r0, [sp, #104] @ 4-byte Reload
+; BE-NEON-NEXT: ldr r2, [sp, #256]
+; BE-NEON-NEXT: vmov.32 d13[1], r11
+; BE-NEON-NEXT: ldr r3, [sp, #260]
+; BE-NEON-NEXT: vmov.32 d14[1], r6
+; BE-NEON-NEXT: ldr r6, [sp, #264]
+; BE-NEON-NEXT: vmov.32 d9[1], r4
+; BE-NEON-NEXT: ldr r4, [sp, #344]
+; BE-NEON-NEXT: vmov.32 d12[1], r5
+; BE-NEON-NEXT: ldr r5, [sp, #312]
+; BE-NEON-NEXT: vmov.32 d8[1], r8
+; BE-NEON-NEXT: ldr r8, [sp, #328]
+; BE-NEON-NEXT: vmov.32 d10[1], r7
+; BE-NEON-NEXT: vstr d13, [sp, #32] @ 8-byte Spill
+; BE-NEON-NEXT: vmov.32 d11[1], r1
+; BE-NEON-NEXT: ldr r1, [sp, #120] @ 4-byte Reload
+; BE-NEON-NEXT: vmov.32 d13[0], r0
+; BE-NEON-NEXT: ldr r0, [sp, #112] @ 4-byte Reload
+; BE-NEON-NEXT: vstr d14, [sp] @ 8-byte Spill
+; BE-NEON-NEXT: vstr d9, [sp, #16] @ 8-byte Spill
+; BE-NEON-NEXT: vstr d12, [sp, #56] @ 8-byte Spill
+; BE-NEON-NEXT: vstr d10, [sp, #64] @ 8-byte Spill
+; BE-NEON-NEXT: vstr d8, [sp, #40] @ 8-byte Spill
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: add r3, sp, #268
+; BE-NEON-NEXT: mov r11, r1
+; BE-NEON-NEXT: vmov.32 d14[0], r0
+; BE-NEON-NEXT: mov r0, r6
+; BE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: add r3, sp, #316
+; BE-NEON-NEXT: mov r10, r1
+; BE-NEON-NEXT: vmov.32 d8[0], r0
+; BE-NEON-NEXT: mov r0, r5
+; BE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: add r3, sp, #332
+; BE-NEON-NEXT: mov r7, r1
+; BE-NEON-NEXT: vmov.32 d12[0], r0
+; BE-NEON-NEXT: mov r0, r8
+; BE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: add r3, sp, #348
+; BE-NEON-NEXT: mov r5, r1
+; BE-NEON-NEXT: vmov.32 d15[0], r0
+; BE-NEON-NEXT: mov r0, r4
+; BE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: add r3, sp, #364
+; BE-NEON-NEXT: vmov.32 d9[0], r0
+; BE-NEON-NEXT: ldr r0, [sp, #360]
+; BE-NEON-NEXT: mov r4, r1
+; BE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: add r3, sp, #476
+; BE-NEON-NEXT: vmov.32 d10[0], r0
+; BE-NEON-NEXT: ldr r0, [sp, #472]
+; BE-NEON-NEXT: mov r6, r1
+; BE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: vmov.32 d16[0], r0
+; BE-NEON-NEXT: ldr r0, [sp, #88] @ 4-byte Reload
+; BE-NEON-NEXT: ldr r2, [sp, #592]
+; BE-NEON-NEXT: vldr d20, [sp, #136] @ 8-byte Reload
+; BE-NEON-NEXT: vmov.32 d16[1], r1
+; BE-NEON-NEXT: ldr r1, [sp, #588]
+; BE-NEON-NEXT: ldr r3, [sp, #596]
+; BE-NEON-NEXT: vldr d22, [sp, #24] @ 8-byte Reload
+; BE-NEON-NEXT: vldr d18, [sp, #8] @ 8-byte Reload
+; BE-NEON-NEXT: vrev64.32 d21, d20
+; BE-NEON-NEXT: vmov.32 d10[1], r6
+; BE-NEON-NEXT: ldr r6, [sp, #600]
+; BE-NEON-NEXT: vmov.32 d9[1], r4
+; BE-NEON-NEXT: ldr r4, [sp, #616]
+; BE-NEON-NEXT: vmov.32 d12[1], r7
+; BE-NEON-NEXT: ldr r7, [sp, #604]
+; BE-NEON-NEXT: vmov.32 d8[1], r10
+; BE-NEON-NEXT: add r10, r9, #192
+; BE-NEON-NEXT: vmov.32 d14[1], r11
+; BE-NEON-NEXT: ldr r11, [sp, #440]
+; BE-NEON-NEXT: vmov.32 d13[1], r0
+; BE-NEON-NEXT: ldr r0, [sp, #584]
+; BE-NEON-NEXT: vmov.32 d15[1], r5
+; BE-NEON-NEXT: vstr d16, [sp, #48] @ 8-byte Spill
+; BE-NEON-NEXT: vldr d16, [sp, #128] @ 8-byte Reload
+; BE-NEON-NEXT: vrev64.32 d20, d22
+; BE-NEON-NEXT: vldr d22, [sp] @ 8-byte Reload
+; BE-NEON-NEXT: vrev64.32 d19, d18
+; BE-NEON-NEXT: vrev64.32 d17, d16
+; BE-NEON-NEXT: vrev64.32 d18, d22
+; BE-NEON-NEXT: vstr d10, [sp, #120] @ 8-byte Spill
+; BE-NEON-NEXT: vstr d9, [sp, #112] @ 8-byte Spill
+; BE-NEON-NEXT: vstr d15, [sp, #104] @ 8-byte Spill
+; BE-NEON-NEXT: vstr d12, [sp, #96] @ 8-byte Spill
+; BE-NEON-NEXT: vstr d8, [sp, #80] @ 8-byte Spill
+; BE-NEON-NEXT: vstr d14, [sp, #72] @ 8-byte Spill
+; BE-NEON-NEXT: vstr d13, [sp, #88] @ 8-byte Spill
+; BE-NEON-NEXT: vst1.64 {d20, d21}, [r10:128]!
+; BE-NEON-NEXT: vrev64.32 d16, d11
+; BE-NEON-NEXT: vst1.64 {d18, d19}, [r10:128]!
+; BE-NEON-NEXT: vst1.64 {d16, d17}, [r10:128]!
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: ldr r2, [sp, #608]
+; BE-NEON-NEXT: mov r8, r1
+; BE-NEON-NEXT: ldr r3, [sp, #612]
+; BE-NEON-NEXT: vmov.32 d8[0], r0
+; BE-NEON-NEXT: mov r0, r6
+; BE-NEON-NEXT: mov r1, r7
+; BE-NEON-NEXT: ldr r5, [sp, #456]
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: add r3, sp, #620
+; BE-NEON-NEXT: mov r6, r1
+; BE-NEON-NEXT: vmov.32 d9[0], r0
+; BE-NEON-NEXT: mov r0, r4
+; BE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: add r3, sp, #444
+; BE-NEON-NEXT: mov r7, r1
+; BE-NEON-NEXT: vmov.32 d10[0], r0
+; BE-NEON-NEXT: mov r0, r11
+; BE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: add r3, sp, #460
+; BE-NEON-NEXT: mov r4, r1
+; BE-NEON-NEXT: vmov.32 d11[0], r0
+; BE-NEON-NEXT: mov r0, r5
+; BE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: add r3, sp, #572
+; BE-NEON-NEXT: vmov.32 d13[0], r0
+; BE-NEON-NEXT: ldr r0, [sp, #568]
+; BE-NEON-NEXT: mov r5, r1
+; BE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: vldr d16, [sp, #16] @ 8-byte Reload
+; BE-NEON-NEXT: vldr d18, [sp, #56] @ 8-byte Reload
+; BE-NEON-NEXT: vrev64.32 d17, d16
+; BE-NEON-NEXT: ldr r2, [sp, #304]
+; BE-NEON-NEXT: vrev64.32 d16, d18
+; BE-NEON-NEXT: ldr r3, [sp, #308]
+; BE-NEON-NEXT: vldr d18, [sp, #144] @ 8-byte Reload
+; BE-NEON-NEXT: vldr d20, [sp, #64] @ 8-byte Reload
+; BE-NEON-NEXT: vrev64.32 d19, d18
+; BE-NEON-NEXT: vrev64.32 d18, d20
+; BE-NEON-NEXT: vldr d20, [sp, #40] @ 8-byte Reload
+; BE-NEON-NEXT: vldr d22, [sp, #32] @ 8-byte Reload
+; BE-NEON-NEXT: vmov.32 d14[0], r0
+; BE-NEON-NEXT: ldr r0, [sp, #296]
+; BE-NEON-NEXT: vmov.32 d10[1], r7
+; BE-NEON-NEXT: ldr r7, [sp, #412]
+; BE-NEON-NEXT: vmov.32 d9[1], r6
+; BE-NEON-NEXT: ldr r6, [sp, #408]
+; BE-NEON-NEXT: vmov.32 d8[1], r8
+; BE-NEON-NEXT: add r8, r9, #128
+; BE-NEON-NEXT: vrev64.32 d21, d20
+; BE-NEON-NEXT: vmov.32 d13[1], r5
+; BE-NEON-NEXT: ldr r5, [sp, #300]
+; BE-NEON-NEXT: vrev64.32 d20, d22
+; BE-NEON-NEXT: vmov.32 d14[1], r1
+; BE-NEON-NEXT: mov r1, r5
+; BE-NEON-NEXT: vstr d10, [sp, #136] @ 8-byte Spill
+; BE-NEON-NEXT: vstr d9, [sp, #128] @ 8-byte Spill
+; BE-NEON-NEXT: vstr d8, [sp, #24] @ 8-byte Spill
+; BE-NEON-NEXT: vst1.64 {d20, d21}, [r10:128]
+; BE-NEON-NEXT: vst1.64 {d18, d19}, [r8:128]!
+; BE-NEON-NEXT: vmov.32 d11[1], r4
+; BE-NEON-NEXT: ldr r4, [sp, #424]
+; BE-NEON-NEXT: ldr r10, [sp, #376]
+; BE-NEON-NEXT: vst1.64 {d16, d17}, [r8:128]!
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: ldr r2, [sp, #416]
+; BE-NEON-NEXT: mov r11, r1
+; BE-NEON-NEXT: ldr r3, [sp, #420]
+; BE-NEON-NEXT: vmov.32 d15[0], r0
+; BE-NEON-NEXT: mov r0, r6
+; BE-NEON-NEXT: mov r1, r7
+; BE-NEON-NEXT: ldr r5, [sp, #392]
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: add r3, sp, #428
+; BE-NEON-NEXT: mov r6, r1
+; BE-NEON-NEXT: vmov.32 d8[0], r0
+; BE-NEON-NEXT: mov r0, r4
+; BE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: add r3, sp, #380
+; BE-NEON-NEXT: mov r7, r1
+; BE-NEON-NEXT: vmov.32 d9[0], r0
+; BE-NEON-NEXT: mov r0, r10
+; BE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: add r3, sp, #396
+; BE-NEON-NEXT: mov r4, r1
+; BE-NEON-NEXT: vmov.32 d12[0], r0
+; BE-NEON-NEXT: mov r0, r5
+; BE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: add r3, sp, #284
+; BE-NEON-NEXT: vmov.32 d10[0], r0
+; BE-NEON-NEXT: ldr r0, [sp, #280]
+; BE-NEON-NEXT: mov r5, r1
+; BE-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT: bl llrintl
+; BE-NEON-NEXT: vldr d16, [sp, #120] @ 8-byte Reload
+; BE-NEON-NEXT: vldr d18, [sp, #112] @ 8-byte Reload
+; BE-NEON-NEXT: vrev64.32 d17, d16
+; BE-NEON-NEXT: vldr d26, [sp, #136] @ 8-byte Reload
+; BE-NEON-NEXT: vrev64.32 d16, d18
+; BE-NEON-NEXT: vldr d18, [sp, #104] @ 8-byte Reload
+; BE-NEON-NEXT: vrev64.32 d31, d26
+; BE-NEON-NEXT: vldr d26, [sp, #128] @ 8-byte Reload
+; BE-NEON-NEXT: vldr d20, [sp, #96] @ 8-byte Reload
+; BE-NEON-NEXT: vrev64.32 d19, d18
+; BE-NEON-NEXT: vrev64.32 d18, d20
+; BE-NEON-NEXT: vldr d20, [sp, #80] @ 8-byte Reload
+; BE-NEON-NEXT: vrev64.32 d30, d26
+; BE-NEON-NEXT: vldr d26, [sp, #24] @ 8-byte Reload
+; BE-NEON-NEXT: vmov.32 d10[1], r5
+; BE-NEON-NEXT: vldr d22, [sp, #72] @ 8-byte Reload
+; BE-NEON-NEXT: vrev64.32 d21, d20
+; BE-NEON-NEXT: vrev64.32 d1, d26
+; BE-NEON-NEXT: vmov.32 d9[1], r7
+; BE-NEON-NEXT: vmov.32 d12[1], r4
+; BE-NEON-NEXT: vrev64.32 d20, d22
+; BE-NEON-NEXT: vldr d22, [sp, #88] @ 8-byte Reload
+; BE-NEON-NEXT: vmov.32 d8[1], r6
+; BE-NEON-NEXT: vrev64.32 d0, d14
+; BE-NEON-NEXT: vmov.32 d28[0], r0
+; BE-NEON-NEXT: add r0, r9, #64
+; BE-NEON-NEXT: vrev64.32 d3, d10
+; BE-NEON-NEXT: vldr d24, [sp, #48] @ 8-byte Reload
+; BE-NEON-NEXT: vrev64.32 d23, d22
+; BE-NEON-NEXT: vrev64.32 d5, d9
+; BE-NEON-NEXT: vst1.64 {d0, d1}, [r8:128]!
+; BE-NEON-NEXT: vrev64.32 d2, d12
+; BE-NEON-NEXT: vmov.32 d15[1], r11
+; BE-NEON-NEXT: vrev64.32 d22, d24
+; BE-NEON-NEXT: vrev64.32 d25, d13
+; BE-NEON-NEXT: vrev64.32 d4, d8
+; BE-NEON-NEXT: vst1.64 {d30, d31}, [r8:128]
+; BE-NEON-NEXT: vst1.64 {d2, d3}, [r0:128]!
+; BE-NEON-NEXT: vmov.32 d28[1], r1
+; BE-NEON-NEXT: vrev64.32 d24, d11
+; BE-NEON-NEXT: vst1.64 {d4, d5}, [r0:128]!
+; BE-NEON-NEXT: vrev64.32 d27, d15
+; BE-NEON-NEXT: vst1.64 {d24, d25}, [r0:128]!
+; BE-NEON-NEXT: vrev64.32 d26, d28
+; BE-NEON-NEXT: vst1.64 {d22, d23}, [r0:128]
+; BE-NEON-NEXT: vst1.64 {d20, d21}, [r9:128]!
+; BE-NEON-NEXT: vst1.64 {d26, d27}, [r9:128]!
+; BE-NEON-NEXT: vst1.64 {d18, d19}, [r9:128]!
+; BE-NEON-NEXT: vst1.64 {d16, d17}, [r9:128]
+; BE-NEON-NEXT: add sp, sp, #152
+; BE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT: add sp, sp, #4
+; BE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+ %a = call <32 x i64> @llvm.llrint.v32i64.v16f128(<32 x fp128> %x)
+ ret <32 x i64> %a
+}
+declare <32 x i64> @llvm.llrint.v32i64.v32f128(<32 x fp128>)
diff --git a/llvm/test/CodeGen/ARM/vector-lrint.ll b/llvm/test/CodeGen/ARM/vector-lrint.ll
new file mode 100644
index 0000000000000..50c8b9ff6d913
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/vector-lrint.ll
@@ -0,0 +1,13251 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armv7-unknown-none-eabihf | FileCheck %s --check-prefix=LE-I32
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armv7-unknown-none-eabihf | FileCheck %s --check-prefix=LE-I64
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefix=LE-I32-NEON
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefix=LE-I64-NEON
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armebv7-unknown-none-eabihf | FileCheck %s --check-prefix=BE-I32
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armebv7-unknown-none-eabihf | FileCheck %s --check-prefix=BE-I64
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefix=BE-I32-NEON
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefix=BE-I64-NEON
+
+; FIXME: crash "Do not know how to soft promote this operator's operand!"
+; define <1 x iXLen> @lrint_v1f16(<1 x half> %x) {
+; %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half> %x)
+; ret <1 x iXLen> %a
+; }
+; declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half>)
+
+; define <2 x iXLen> @lrint_v2f16(<2 x half> %x) {
+; %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half> %x)
+; ret <2 x iXLen> %a
+; }
+; declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half>)
+
+; define <4 x iXLen> @lrint_v4f16(<4 x half> %x) {
+; %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f16(<4 x half> %x)
+; ret <4 x iXLen> %a
+; }
+; declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f16(<4 x half>)
+
+; define <8 x iXLen> @lrint_v8f16(<8 x half> %x) {
+; %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f16(<8 x half> %x)
+; ret <8 x iXLen> %a
+; }
+; declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f16(<8 x half>)
+
+; define <16 x iXLen> @lrint_v16f16(<16 x half> %x) {
+; %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half> %x)
+; ret <16 x iXLen> %a
+; }
+; declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half>)
+
+; define <32 x iXLen> @lrint_v32f16(<32 x half> %x) {
+; %a = call <32 x iXLen> @llvm.lrint.v32iXLen.v32f16(<32 x half> %x)
+; ret <32 x iXLen> %a
+; }
+; declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f16(<32 x half>)
+
+define <1 x iXLen> @lrint_v1f32(<1 x float> %x) {
+; LE-I32-LABEL: lrint_v1f32:
+; LE-I32: @ %bb.0:
+; LE-I32-NEXT: .save {r11, lr}
+; LE-I32-NEXT: push {r11, lr}
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: pop {r11, pc}
+;
+; LE-I64-LABEL: lrint_v1f32:
+; LE-I64: @ %bb.0:
+; LE-I64-NEXT: .save {r11, lr}
+; LE-I64-NEXT: push {r11, lr}
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d0[0], r0
+; LE-I64-NEXT: vmov.32 d0[1], r1
+; LE-I64-NEXT: pop {r11, pc}
+;
+; LE-I32-NEON-LABEL: lrint_v1f32:
+; LE-I32-NEON: @ %bb.0:
+; LE-I32-NEON-NEXT: .save {r11, lr}
+; LE-I32-NEON-NEXT: push {r11, lr}
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: pop {r11, pc}
+;
+; LE-I64-NEON-LABEL: lrint_v1f32:
+; LE-I64-NEON: @ %bb.0:
+; LE-I64-NEON-NEXT: .save {r11, lr}
+; LE-I64-NEON-NEXT: push {r11, lr}
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.32 d0[0], r0
+; LE-I64-NEON-NEXT: vmov.32 d0[1], r1
+; LE-I64-NEON-NEXT: pop {r11, pc}
+;
+; BE-I32-LABEL: lrint_v1f32:
+; BE-I32: @ %bb.0:
+; BE-I32-NEXT: .save {r11, lr}
+; BE-I32-NEXT: push {r11, lr}
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: pop {r11, pc}
+;
+; BE-I64-LABEL: lrint_v1f32:
+; BE-I64: @ %bb.0:
+; BE-I64-NEXT: .save {r11, lr}
+; BE-I64-NEXT: push {r11, lr}
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: vmov.32 d16[1], r1
+; BE-I64-NEXT: vrev64.32 d0, d16
+; BE-I64-NEXT: pop {r11, pc}
+;
+; BE-I32-NEON-LABEL: lrint_v1f32:
+; BE-I32-NEON: @ %bb.0:
+; BE-I32-NEON-NEXT: .save {r11, lr}
+; BE-I32-NEON-NEXT: push {r11, lr}
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: pop {r11, pc}
+;
+; BE-I64-NEON-LABEL: lrint_v1f32:
+; BE-I64-NEON: @ %bb.0:
+; BE-I64-NEON-NEXT: .save {r11, lr}
+; BE-I64-NEON-NEXT: push {r11, lr}
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEON-NEXT: vmov.32 d16[1], r1
+; BE-I64-NEON-NEXT: vrev64.32 d0, d16
+; BE-I64-NEON-NEXT: pop {r11, pc}
+ %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f32(<1 x float> %x)
+ ret <1 x iXLen> %a
+}
+declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f32(<1 x float>)
+
+define <2 x iXLen> @lrint_v2f32(<2 x float> %x) {
+; LE-I32-LABEL: lrint_v2f32:
+; LE-I32: @ %bb.0:
+; LE-I32-NEXT: .save {r11, lr}
+; LE-I32-NEXT: push {r11, lr}
+; LE-I32-NEXT: .vsave {d8, d9}
+; LE-I32-NEXT: vpush {d8, d9}
+; LE-I32-NEXT: vmov.f64 d8, d0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.f32 s0, s17
+; LE-I32-NEXT: vmov.32 d9[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d9[1], r0
+; LE-I32-NEXT: vorr d0, d9, d9
+; LE-I32-NEXT: vpop {d8, d9}
+; LE-I32-NEXT: pop {r11, pc}
+;
+; LE-I64-LABEL: lrint_v2f32:
+; LE-I64: @ %bb.0:
+; LE-I64-NEXT: .save {r4, lr}
+; LE-I64-NEXT: push {r4, lr}
+; LE-I64-NEXT: .vsave {d10, d11}
+; LE-I64-NEXT: vpush {d10, d11}
+; LE-I64-NEXT: .vsave {d8}
+; LE-I64-NEXT: vpush {d8}
+; LE-I64-NEXT: vmov.f64 d8, d0
+; LE-I64-NEXT: vmov.f32 s0, s17
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s16
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEXT: vmov.32 d11[1], r4
+; LE-I64-NEXT: vmov.32 d10[1], r1
+; LE-I64-NEXT: vorr q0, q5, q5
+; LE-I64-NEXT: vpop {d8}
+; LE-I64-NEXT: vpop {d10, d11}
+; LE-I64-NEXT: pop {r4, pc}
+;
+; LE-I32-NEON-LABEL: lrint_v2f32:
+; LE-I32-NEON: @ %bb.0:
+; LE-I32-NEON-NEXT: .save {r11, lr}
+; LE-I32-NEON-NEXT: push {r11, lr}
+; LE-I32-NEON-NEXT: .vsave {d8, d9}
+; LE-I32-NEON-NEXT: vpush {d8, d9}
+; LE-I32-NEON-NEXT: vmov.f64 d8, d0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.f32 s0, s17
+; LE-I32-NEON-NEXT: vmov.32 d9[0], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.32 d9[1], r0
+; LE-I32-NEON-NEXT: vorr d0, d9, d9
+; LE-I32-NEON-NEXT: vpop {d8, d9}
+; LE-I32-NEON-NEXT: pop {r11, pc}
+;
+; LE-I64-NEON-LABEL: lrint_v2f32:
+; LE-I64-NEON: @ %bb.0:
+; LE-I64-NEON-NEXT: .save {r4, lr}
+; LE-I64-NEON-NEXT: push {r4, lr}
+; LE-I64-NEON-NEXT: .vsave {d10, d11}
+; LE-I64-NEON-NEXT: vpush {d10, d11}
+; LE-I64-NEON-NEXT: .vsave {d8}
+; LE-I64-NEON-NEXT: vpush {d8}
+; LE-I64-NEON-NEXT: vmov.f64 d8, d0
+; LE-I64-NEON-NEXT: vmov.f32 s0, s17
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.f32 s0, s16
+; LE-I64-NEON-NEXT: mov r4, r1
+; LE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT: vmov.32 d11[1], r4
+; LE-I64-NEON-NEXT: vmov.32 d10[1], r1
+; LE-I64-NEON-NEXT: vorr q0, q5, q5
+; LE-I64-NEON-NEXT: vpop {d8}
+; LE-I64-NEON-NEXT: vpop {d10, d11}
+; LE-I64-NEON-NEXT: pop {r4, pc}
+;
+; BE-I32-LABEL: lrint_v2f32:
+; BE-I32: @ %bb.0:
+; BE-I32-NEXT: .save {r11, lr}
+; BE-I32-NEXT: push {r11, lr}
+; BE-I32-NEXT: .vsave {d8, d9}
+; BE-I32-NEXT: vpush {d8, d9}
+; BE-I32-NEXT: vrev64.32 d8, d0
+; BE-I32-NEXT: vmov.f32 s0, s16
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.f32 s0, s17
+; BE-I32-NEXT: vmov.32 d9[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d9[1], r0
+; BE-I32-NEXT: vrev64.32 d0, d9
+; BE-I32-NEXT: vpop {d8, d9}
+; BE-I32-NEXT: pop {r11, pc}
+;
+; BE-I64-LABEL: lrint_v2f32:
+; BE-I64: @ %bb.0:
+; BE-I64-NEXT: .save {r4, lr}
+; BE-I64-NEXT: push {r4, lr}
+; BE-I64-NEXT: .vsave {d10, d11}
+; BE-I64-NEXT: vpush {d10, d11}
+; BE-I64-NEXT: .vsave {d8}
+; BE-I64-NEXT: vpush {d8}
+; BE-I64-NEXT: vrev64.32 d8, d0
+; BE-I64-NEXT: vmov.f32 s0, s17
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s16
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEXT: vmov.32 d11[1], r4
+; BE-I64-NEXT: vmov.32 d10[1], r1
+; BE-I64-NEXT: vrev64.32 q0, q5
+; BE-I64-NEXT: vpop {d8}
+; BE-I64-NEXT: vpop {d10, d11}
+; BE-I64-NEXT: pop {r4, pc}
+;
+; BE-I32-NEON-LABEL: lrint_v2f32:
+; BE-I32-NEON: @ %bb.0:
+; BE-I32-NEON-NEXT: .save {r11, lr}
+; BE-I32-NEON-NEXT: push {r11, lr}
+; BE-I32-NEON-NEXT: .vsave {d8, d9}
+; BE-I32-NEON-NEXT: vpush {d8, d9}
+; BE-I32-NEON-NEXT: vrev64.32 d8, d0
+; BE-I32-NEON-NEXT: vmov.f32 s0, s16
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.f32 s0, s17
+; BE-I32-NEON-NEXT: vmov.32 d9[0], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.32 d9[1], r0
+; BE-I32-NEON-NEXT: vrev64.32 d0, d9
+; BE-I32-NEON-NEXT: vpop {d8, d9}
+; BE-I32-NEON-NEXT: pop {r11, pc}
+;
+; BE-I64-NEON-LABEL: lrint_v2f32:
+; BE-I64-NEON: @ %bb.0:
+; BE-I64-NEON-NEXT: .save {r4, lr}
+; BE-I64-NEON-NEXT: push {r4, lr}
+; BE-I64-NEON-NEXT: .vsave {d10, d11}
+; BE-I64-NEON-NEXT: vpush {d10, d11}
+; BE-I64-NEON-NEXT: .vsave {d8}
+; BE-I64-NEON-NEXT: vpush {d8}
+; BE-I64-NEON-NEXT: vrev64.32 d8, d0
+; BE-I64-NEON-NEXT: vmov.f32 s0, s17
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.f32 s0, s16
+; BE-I64-NEON-NEXT: mov r4, r1
+; BE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT: vmov.32 d11[1], r4
+; BE-I64-NEON-NEXT: vmov.32 d10[1], r1
+; BE-I64-NEON-NEXT: vrev64.32 q0, q5
+; BE-I64-NEON-NEXT: vpop {d8}
+; BE-I64-NEON-NEXT: vpop {d10, d11}
+; BE-I64-NEON-NEXT: pop {r4, pc}
+ %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float> %x)
+ ret <2 x iXLen> %a
+}
+declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float>)
+
+define <4 x iXLen> @lrint_v4f32(<4 x float> %x) {
+; LE-I32-LABEL: lrint_v4f32:
+; LE-I32: @ %bb.0:
+; LE-I32-NEXT: .save {r11, lr}
+; LE-I32-NEXT: push {r11, lr}
+; LE-I32-NEXT: .vsave {d8, d9, d10, d11}
+; LE-I32-NEXT: vpush {d8, d9, d10, d11}
+; LE-I32-NEXT: vorr q4, q0, q0
+; LE-I32-NEXT: vmov.f32 s0, s18
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.f32 s0, s16
+; LE-I32-NEXT: vmov.32 d11[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.f32 s0, s19
+; LE-I32-NEXT: vmov.32 d10[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.f32 s0, s17
+; LE-I32-NEXT: vmov.32 d11[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d10[1], r0
+; LE-I32-NEXT: vorr q0, q5, q5
+; LE-I32-NEXT: vpop {d8, d9, d10, d11}
+; LE-I32-NEXT: pop {r11, pc}
+;
+; LE-I64-LABEL: lrint_v4f32:
+; LE-I64: @ %bb.0:
+; LE-I64-NEXT: .save {r4, r5, r6, lr}
+; LE-I64-NEXT: push {r4, r5, r6, lr}
+; LE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
+; LE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13}
+; LE-I64-NEXT: vorr q5, q0, q0
+; LE-I64-NEXT: vmov.f32 s0, s23
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s20
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s21
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s22
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEXT: vmov.32 d13[1], r6
+; LE-I64-NEXT: vmov.32 d9[1], r4
+; LE-I64-NEXT: vmov.32 d12[1], r5
+; LE-I64-NEXT: vmov.32 d8[1], r1
+; LE-I64-NEXT: vorr q0, q6, q6
+; LE-I64-NEXT: vorr q1, q4, q4
+; LE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13}
+; LE-I64-NEXT: pop {r4, r5, r6, pc}
+;
+; LE-I32-NEON-LABEL: lrint_v4f32:
+; LE-I32-NEON: @ %bb.0:
+; LE-I32-NEON-NEXT: .save {r11, lr}
+; LE-I32-NEON-NEXT: push {r11, lr}
+; LE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11}
+; LE-I32-NEON-NEXT: vpush {d8, d9, d10, d11}
+; LE-I32-NEON-NEXT: vorr q4, q0, q0
+; LE-I32-NEON-NEXT: vmov.f32 s0, s18
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.f32 s0, s16
+; LE-I32-NEON-NEXT: vmov.32 d11[0], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.f32 s0, s19
+; LE-I32-NEON-NEXT: vmov.32 d10[0], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.f32 s0, s17
+; LE-I32-NEON-NEXT: vmov.32 d11[1], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.32 d10[1], r0
+; LE-I32-NEON-NEXT: vorr q0, q5, q5
+; LE-I32-NEON-NEXT: vpop {d8, d9, d10, d11}
+; LE-I32-NEON-NEXT: pop {r11, pc}
+;
+; LE-I64-NEON-LABEL: lrint_v4f32:
+; LE-I64-NEON: @ %bb.0:
+; LE-I64-NEON-NEXT: .save {r4, r5, r6, lr}
+; LE-I64-NEON-NEXT: push {r4, r5, r6, lr}
+; LE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
+; LE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13}
+; LE-I64-NEON-NEXT: vorr q5, q0, q0
+; LE-I64-NEON-NEXT: vmov.f32 s0, s23
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.f32 s0, s20
+; LE-I64-NEON-NEXT: mov r4, r1
+; LE-I64-NEON-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.f32 s0, s21
+; LE-I64-NEON-NEXT: mov r5, r1
+; LE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.f32 s0, s22
+; LE-I64-NEON-NEXT: mov r6, r1
+; LE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT: vmov.32 d13[1], r6
+; LE-I64-NEON-NEXT: vmov.32 d9[1], r4
+; LE-I64-NEON-NEXT: vmov.32 d12[1], r5
+; LE-I64-NEON-NEXT: vmov.32 d8[1], r1
+; LE-I64-NEON-NEXT: vorr q0, q6, q6
+; LE-I64-NEON-NEXT: vorr q1, q4, q4
+; LE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13}
+; LE-I64-NEON-NEXT: pop {r4, r5, r6, pc}
+;
+; BE-I32-LABEL: lrint_v4f32:
+; BE-I32: @ %bb.0:
+; BE-I32-NEXT: .save {r11, lr}
+; BE-I32-NEXT: push {r11, lr}
+; BE-I32-NEXT: .vsave {d8, d9, d10, d11}
+; BE-I32-NEXT: vpush {d8, d9, d10, d11}
+; BE-I32-NEXT: vrev64.32 q4, q0
+; BE-I32-NEXT: vmov.f32 s0, s18
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.f32 s0, s16
+; BE-I32-NEXT: vmov.32 d11[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.f32 s0, s19
+; BE-I32-NEXT: vmov.32 d10[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.f32 s0, s17
+; BE-I32-NEXT: vmov.32 d11[1], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d10[1], r0
+; BE-I32-NEXT: vrev64.32 q0, q5
+; BE-I32-NEXT: vpop {d8, d9, d10, d11}
+; BE-I32-NEXT: pop {r11, pc}
+;
+; BE-I64-LABEL: lrint_v4f32:
+; BE-I64: @ %bb.0:
+; BE-I64-NEXT: .save {r4, r5, r6, lr}
+; BE-I64-NEXT: push {r4, r5, r6, lr}
+; BE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
+; BE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13}
+; BE-I64-NEXT: vrev64.32 d8, d1
+; BE-I64-NEXT: vrev64.32 d9, d0
+; BE-I64-NEXT: vmov.f32 s0, s17
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s18
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s19
+; BE-I64-NEXT: mov r5, r1
+; BE-I64-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s16
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEXT: vmov.32 d13[1], r6
+; BE-I64-NEXT: vmov.32 d11[1], r4
+; BE-I64-NEXT: vmov.32 d12[1], r5
+; BE-I64-NEXT: vmov.32 d10[1], r1
+; BE-I64-NEXT: vrev64.32 q0, q6
+; BE-I64-NEXT: vrev64.32 q1, q5
+; BE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13}
+; BE-I64-NEXT: pop {r4, r5, r6, pc}
+;
+; BE-I32-NEON-LABEL: lrint_v4f32:
+; BE-I32-NEON: @ %bb.0:
+; BE-I32-NEON-NEXT: .save {r11, lr}
+; BE-I32-NEON-NEXT: push {r11, lr}
+; BE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11}
+; BE-I32-NEON-NEXT: vpush {d8, d9, d10, d11}
+; BE-I32-NEON-NEXT: vrev64.32 q4, q0
+; BE-I32-NEON-NEXT: vmov.f32 s0, s18
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.f32 s0, s16
+; BE-I32-NEON-NEXT: vmov.32 d11[0], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.f32 s0, s19
+; BE-I32-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.f32 s0, s17
+; BE-I32-NEON-NEXT: vmov.32 d11[1], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.32 d10[1], r0
+; BE-I32-NEON-NEXT: vrev64.32 q0, q5
+; BE-I32-NEON-NEXT: vpop {d8, d9, d10, d11}
+; BE-I32-NEON-NEXT: pop {r11, pc}
+;
+; BE-I64-NEON-LABEL: lrint_v4f32:
+; BE-I64-NEON: @ %bb.0:
+; BE-I64-NEON-NEXT: .save {r4, r5, r6, lr}
+; BE-I64-NEON-NEXT: push {r4, r5, r6, lr}
+; BE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
+; BE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13}
+; BE-I64-NEON-NEXT: vrev64.32 d8, d1
+; BE-I64-NEON-NEXT: vrev64.32 d9, d0
+; BE-I64-NEON-NEXT: vmov.f32 s0, s17
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.f32 s0, s18
+; BE-I64-NEON-NEXT: mov r4, r1
+; BE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.f32 s0, s19
+; BE-I64-NEON-NEXT: mov r5, r1
+; BE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.f32 s0, s16
+; BE-I64-NEON-NEXT: mov r6, r1
+; BE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT: vmov.32 d13[1], r6
+; BE-I64-NEON-NEXT: vmov.32 d11[1], r4
+; BE-I64-NEON-NEXT: vmov.32 d12[1], r5
+; BE-I64-NEON-NEXT: vmov.32 d10[1], r1
+; BE-I64-NEON-NEXT: vrev64.32 q0, q6
+; BE-I64-NEON-NEXT: vrev64.32 q1, q5
+; BE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13}
+; BE-I64-NEON-NEXT: pop {r4, r5, r6, pc}
+ %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f32(<4 x float> %x)
+ ret <4 x iXLen> %a
+}
+declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f32(<4 x float>)
+
+define <8 x iXLen> @lrint_v8f32(<8 x float> %x) {
+; LE-I32-LABEL: lrint_v8f32:
+; LE-I32: @ %bb.0:
+; LE-I32-NEXT: .save {r11, lr}
+; LE-I32-NEXT: push {r11, lr}
+; LE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT: vorr q5, q1, q1
+; LE-I32-NEXT: vorr q7, q0, q0
+; LE-I32-NEXT: vmov.f32 s0, s20
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.f32 s0, s22
+; LE-I32-NEXT: vmov.32 d8[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.f32 s0, s30
+; LE-I32-NEXT: vmov.32 d9[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.f32 s0, s28
+; LE-I32-NEXT: vmov.32 d13[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.f32 s0, s31
+; LE-I32-NEXT: vmov.32 d12[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.f32 s0, s29
+; LE-I32-NEXT: vmov.32 d13[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.f32 s0, s23
+; LE-I32-NEXT: vmov.32 d12[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.f32 s0, s21
+; LE-I32-NEXT: vmov.32 d9[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d8[1], r0
+; LE-I32-NEXT: vorr q0, q6, q6
+; LE-I32-NEXT: vorr q1, q4, q4
+; LE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT: pop {r11, pc}
+;
+; LE-I64-LABEL: lrint_v8f32:
+; LE-I64: @ %bb.0:
+; LE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: .pad #40
+; LE-I64-NEXT: sub sp, sp, #40
+; LE-I64-NEXT: vorr q6, q1, q1
+; LE-I64-NEXT: add lr, sp, #24
+; LE-I64-NEXT: vorr q7, q0, q0
+; LE-I64-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill
+; LE-I64-NEXT: vmov.f32 s0, s27
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s24
+; LE-I64-NEXT: mov r8, r1
+; LE-I64-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s25
+; LE-I64-NEXT: mov r9, r1
+; LE-I64-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vorr q6, q7, q7
+; LE-I64-NEXT: add lr, sp, #8
+; LE-I64-NEXT: mov r10, r1
+; LE-I64-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEXT: vmov.f32 s0, s26
+; LE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s27
+; LE-I64-NEXT: mov r7, r1
+; LE-I64-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s24
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: add lr, sp, #8
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I64-NEXT: vmov.f32 s0, s1
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: add lr, sp, #24
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I64-NEXT: vmov.f32 s0, s2
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEXT: vmov.32 d13[1], r6
+; LE-I64-NEXT: vmov.32 d15[1], r4
+; LE-I64-NEXT: vmov.32 d11[1], r10
+; LE-I64-NEXT: vmov.32 d9[1], r8
+; LE-I64-NEXT: vmov.32 d12[1], r5
+; LE-I64-NEXT: vmov.32 d14[1], r7
+; LE-I64-NEXT: vorr q0, q6, q6
+; LE-I64-NEXT: vmov.32 d10[1], r9
+; LE-I64-NEXT: vorr q1, q7, q7
+; LE-I64-NEXT: vmov.32 d8[1], r1
+; LE-I64-NEXT: vorr q2, q5, q5
+; LE-I64-NEXT: vorr q3, q4, q4
+; LE-I64-NEXT: add sp, sp, #40
+; LE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; LE-I32-NEON-LABEL: lrint_v8f32:
+; LE-I32-NEON: @ %bb.0:
+; LE-I32-NEON-NEXT: .save {r11, lr}
+; LE-I32-NEON-NEXT: push {r11, lr}
+; LE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT: vorr q5, q1, q1
+; LE-I32-NEON-NEXT: vorr q7, q0, q0
+; LE-I32-NEON-NEXT: vmov.f32 s0, s20
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.f32 s0, s22
+; LE-I32-NEON-NEXT: vmov.32 d8[0], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.f32 s0, s30
+; LE-I32-NEON-NEXT: vmov.32 d9[0], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.f32 s0, s28
+; LE-I32-NEON-NEXT: vmov.32 d13[0], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.f32 s0, s31
+; LE-I32-NEON-NEXT: vmov.32 d12[0], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.f32 s0, s29
+; LE-I32-NEON-NEXT: vmov.32 d13[1], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.f32 s0, s23
+; LE-I32-NEON-NEXT: vmov.32 d12[1], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.f32 s0, s21
+; LE-I32-NEON-NEXT: vmov.32 d9[1], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.32 d8[1], r0
+; LE-I32-NEON-NEXT: vorr q0, q6, q6
+; LE-I32-NEON-NEXT: vorr q1, q4, q4
+; LE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT: pop {r11, pc}
+;
+; LE-I64-NEON-LABEL: lrint_v8f32:
+; LE-I64-NEON: @ %bb.0:
+; LE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT: .pad #40
+; LE-I64-NEON-NEXT: sub sp, sp, #40
+; LE-I64-NEON-NEXT: vorr q6, q1, q1
+; LE-I64-NEON-NEXT: add lr, sp, #24
+; LE-I64-NEON-NEXT: vorr q7, q0, q0
+; LE-I64-NEON-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill
+; LE-I64-NEON-NEXT: vmov.f32 s0, s27
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.f32 s0, s24
+; LE-I64-NEON-NEXT: mov r8, r1
+; LE-I64-NEON-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.f32 s0, s25
+; LE-I64-NEON-NEXT: mov r9, r1
+; LE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vorr q6, q7, q7
+; LE-I64-NEON-NEXT: add lr, sp, #8
+; LE-I64-NEON-NEXT: mov r10, r1
+; LE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEON-NEXT: vmov.f32 s0, s26
+; LE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.f32 s0, s27
+; LE-I64-NEON-NEXT: mov r7, r1
+; LE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.f32 s0, s24
+; LE-I64-NEON-NEXT: mov r4, r1
+; LE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: add lr, sp, #8
+; LE-I64-NEON-NEXT: mov r5, r1
+; LE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vmov.f32 s0, s1
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: add lr, sp, #24
+; LE-I64-NEON-NEXT: mov r6, r1
+; LE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vmov.f32 s0, s2
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT: vmov.32 d13[1], r6
+; LE-I64-NEON-NEXT: vmov.32 d15[1], r4
+; LE-I64-NEON-NEXT: vmov.32 d11[1], r10
+; LE-I64-NEON-NEXT: vmov.32 d9[1], r8
+; LE-I64-NEON-NEXT: vmov.32 d12[1], r5
+; LE-I64-NEON-NEXT: vmov.32 d14[1], r7
+; LE-I64-NEON-NEXT: vorr q0, q6, q6
+; LE-I64-NEON-NEXT: vmov.32 d10[1], r9
+; LE-I64-NEON-NEXT: vorr q1, q7, q7
+; LE-I64-NEON-NEXT: vmov.32 d8[1], r1
+; LE-I64-NEON-NEXT: vorr q2, q5, q5
+; LE-I64-NEON-NEXT: vorr q3, q4, q4
+; LE-I64-NEON-NEXT: add sp, sp, #40
+; LE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; BE-I32-LABEL: lrint_v8f32:
+; BE-I32: @ %bb.0:
+; BE-I32-NEXT: .save {r11, lr}
+; BE-I32-NEXT: push {r11, lr}
+; BE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT: vrev64.32 q4, q1
+; BE-I32-NEXT: vrev64.32 q5, q0
+; BE-I32-NEXT: vmov.f32 s0, s16
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.f32 s0, s20
+; BE-I32-NEXT: vmov.32 d12[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.f32 s0, s18
+; BE-I32-NEXT: vmov.32 d14[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.f32 s0, s22
+; BE-I32-NEXT: vmov.32 d13[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.f32 s0, s19
+; BE-I32-NEXT: vmov.32 d15[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.f32 s0, s23
+; BE-I32-NEXT: vmov.32 d13[1], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.f32 s0, s21
+; BE-I32-NEXT: vmov.32 d15[1], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.f32 s0, s17
+; BE-I32-NEXT: vmov.32 d14[1], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d12[1], r0
+; BE-I32-NEXT: vrev64.32 q0, q7
+; BE-I32-NEXT: vrev64.32 q1, q6
+; BE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT: pop {r11, pc}
+;
+; BE-I64-LABEL: lrint_v8f32:
+; BE-I64: @ %bb.0:
+; BE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT: .pad #32
+; BE-I64-NEXT: sub sp, sp, #32
+; BE-I64-NEXT: vorr q4, q1, q1
+; BE-I64-NEXT: add lr, sp, #8
+; BE-I64-NEXT: vorr q5, q0, q0
+; BE-I64-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; BE-I64-NEXT: vrev64.32 d12, d8
+; BE-I64-NEXT: vmov.f32 s0, s25
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s24
+; BE-I64-NEXT: mov r8, r1
+; BE-I64-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vrev64.32 d0, d11
+; BE-I64-NEXT: mov r9, r1
+; BE-I64-NEXT: vrev64.32 d8, d9
+; BE-I64-NEXT: vorr d9, d0, d0
+; BE-I64-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEXT: vstr d8, [sp, #24] @ 8-byte Spill
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s17
+; BE-I64-NEXT: mov r10, r1
+; BE-I64-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: add lr, sp, #8
+; BE-I64-NEXT: vmov.f32 s0, s19
+; BE-I64-NEXT: mov r7, r1
+; BE-I64-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT: vrev64.32 d8, d16
+; BE-I64-NEXT: vstr d8, [sp, #8] @ 8-byte Spill
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s16
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vldr d0, [sp, #8] @ 8-byte Reload
+; BE-I64-NEXT: mov r5, r1
+; BE-I64-NEXT: vmov.32 d8[0], r0
+; BE-I64-NEXT: vmov.f32 s0, s1
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vldr d0, [sp, #24] @ 8-byte Reload
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: @ kill: def $s0 killed $s0 killed $d0
+; BE-I64-NEXT: vmov.32 d9[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEXT: vmov.32 d9[1], r6
+; BE-I64-NEXT: vmov.32 d11[1], r4
+; BE-I64-NEXT: vmov.32 d15[1], r8
+; BE-I64-NEXT: vmov.32 d13[1], r7
+; BE-I64-NEXT: vmov.32 d8[1], r5
+; BE-I64-NEXT: vmov.32 d10[1], r10
+; BE-I64-NEXT: vmov.32 d14[1], r9
+; BE-I64-NEXT: vmov.32 d12[1], r1
+; BE-I64-NEXT: vrev64.32 q0, q4
+; BE-I64-NEXT: vrev64.32 q1, q5
+; BE-I64-NEXT: vrev64.32 q2, q7
+; BE-I64-NEXT: vrev64.32 q3, q6
+; BE-I64-NEXT: add sp, sp, #32
+; BE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; BE-I32-NEON-LABEL: lrint_v8f32:
+; BE-I32-NEON: @ %bb.0:
+; BE-I32-NEON-NEXT: .save {r11, lr}
+; BE-I32-NEON-NEXT: push {r11, lr}
+; BE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT: vrev64.32 q4, q1
+; BE-I32-NEON-NEXT: vrev64.32 q5, q0
+; BE-I32-NEON-NEXT: vmov.f32 s0, s16
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.f32 s0, s20
+; BE-I32-NEON-NEXT: vmov.32 d12[0], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.f32 s0, s18
+; BE-I32-NEON-NEXT: vmov.32 d14[0], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.f32 s0, s22
+; BE-I32-NEON-NEXT: vmov.32 d13[0], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.f32 s0, s19
+; BE-I32-NEON-NEXT: vmov.32 d15[0], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.f32 s0, s23
+; BE-I32-NEON-NEXT: vmov.32 d13[1], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.f32 s0, s21
+; BE-I32-NEON-NEXT: vmov.32 d15[1], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.f32 s0, s17
+; BE-I32-NEON-NEXT: vmov.32 d14[1], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.32 d12[1], r0
+; BE-I32-NEON-NEXT: vrev64.32 q0, q7
+; BE-I32-NEON-NEXT: vrev64.32 q1, q6
+; BE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT: pop {r11, pc}
+;
+; BE-I64-NEON-LABEL: lrint_v8f32:
+; BE-I64-NEON: @ %bb.0:
+; BE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT: .pad #32
+; BE-I64-NEON-NEXT: sub sp, sp, #32
+; BE-I64-NEON-NEXT: vorr q4, q1, q1
+; BE-I64-NEON-NEXT: add lr, sp, #8
+; BE-I64-NEON-NEXT: vorr q5, q0, q0
+; BE-I64-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; BE-I64-NEON-NEXT: vrev64.32 d12, d8
+; BE-I64-NEON-NEXT: vmov.f32 s0, s25
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.f32 s0, s24
+; BE-I64-NEON-NEXT: mov r8, r1
+; BE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vrev64.32 d0, d11
+; BE-I64-NEON-NEXT: mov r9, r1
+; BE-I64-NEON-NEXT: vrev64.32 d8, d9
+; BE-I64-NEON-NEXT: vorr d9, d0, d0
+; BE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT: vstr d8, [sp, #24] @ 8-byte Spill
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.f32 s0, s17
+; BE-I64-NEON-NEXT: mov r10, r1
+; BE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: add lr, sp, #8
+; BE-I64-NEON-NEXT: vmov.f32 s0, s19
+; BE-I64-NEON-NEXT: mov r7, r1
+; BE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT: vrev64.32 d8, d16
+; BE-I64-NEON-NEXT: vstr d8, [sp, #8] @ 8-byte Spill
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.f32 s0, s16
+; BE-I64-NEON-NEXT: mov r4, r1
+; BE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vldr d0, [sp, #8] @ 8-byte Reload
+; BE-I64-NEON-NEXT: mov r5, r1
+; BE-I64-NEON-NEXT: vmov.32 d8[0], r0
+; BE-I64-NEON-NEXT: vmov.f32 s0, s1
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vldr d0, [sp, #24] @ 8-byte Reload
+; BE-I64-NEON-NEXT: mov r6, r1
+; BE-I64-NEON-NEXT: @ kill: def $s0 killed $s0 killed $d0
+; BE-I64-NEON-NEXT: vmov.32 d9[0], r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEON-NEXT: vmov.32 d9[1], r6
+; BE-I64-NEON-NEXT: vmov.32 d11[1], r4
+; BE-I64-NEON-NEXT: vmov.32 d15[1], r8
+; BE-I64-NEON-NEXT: vmov.32 d13[1], r7
+; BE-I64-NEON-NEXT: vmov.32 d8[1], r5
+; BE-I64-NEON-NEXT: vmov.32 d10[1], r10
+; BE-I64-NEON-NEXT: vmov.32 d14[1], r9
+; BE-I64-NEON-NEXT: vmov.32 d12[1], r1
+; BE-I64-NEON-NEXT: vrev64.32 q0, q4
+; BE-I64-NEON-NEXT: vrev64.32 q1, q5
+; BE-I64-NEON-NEXT: vrev64.32 q2, q7
+; BE-I64-NEON-NEXT: vrev64.32 q3, q6
+; BE-I64-NEON-NEXT: add sp, sp, #32
+; BE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+ %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f32(<8 x float> %x)
+ ret <8 x iXLen> %a
+}
+declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f32(<8 x float>)
+
+define <16 x iXLen> @lrint_v16f32(<16 x float> %x) {
+; LE-I32-LABEL: lrint_v16f32:
+; LE-I32: @ %bb.0:
+; LE-I32-NEXT: .save {r11, lr}
+; LE-I32-NEXT: push {r11, lr}
+; LE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT: .pad #80
+; LE-I32-NEXT: sub sp, sp, #80
+; LE-I32-NEXT: vorr q5, q3, q3
+; LE-I32-NEXT: vstmia sp, {d0, d1} @ 16-byte Spill
+; LE-I32-NEXT: add lr, sp, #32
+; LE-I32-NEXT: vorr q6, q2, q2
+; LE-I32-NEXT: vorr q7, q1, q1
+; LE-I32-NEXT: vmov.f32 s0, s20
+; LE-I32-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.f32 s0, s22
+; LE-I32-NEXT: vmov.32 d8[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.f32 s0, s24
+; LE-I32-NEXT: add lr, sp, #48
+; LE-I32-NEXT: vmov.32 d9[0], r0
+; LE-I32-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I32-NEXT: add lr, sp, #16
+; LE-I32-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.f32 s0, s26
+; LE-I32-NEXT: vmov.32 d8[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: add lr, sp, #64
+; LE-I32-NEXT: vmov.32 d9[0], r0
+; LE-I32-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I32-NEXT: vorr q4, q7, q7
+; LE-I32-NEXT: vmov.f32 s0, s16
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.f32 s0, s18
+; LE-I32-NEXT: vmov.32 d10[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vldmia sp, {d12, d13} @ 16-byte Reload
+; LE-I32-NEXT: vmov.f32 s0, s26
+; LE-I32-NEXT: vmov.32 d11[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.f32 s0, s24
+; LE-I32-NEXT: vmov.32 d15[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.f32 s0, s27
+; LE-I32-NEXT: vmov.32 d14[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.f32 s0, s25
+; LE-I32-NEXT: vmov.32 d15[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.f32 s0, s19
+; LE-I32-NEXT: vmov.32 d14[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.f32 s0, s17
+; LE-I32-NEXT: vmov.32 d11[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: add lr, sp, #16
+; LE-I32-NEXT: vmov.32 d10[1], r0
+; LE-I32-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I32-NEXT: vmov.f32 s0, s27
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.f32 s0, s25
+; LE-I32-NEXT: add lr, sp, #64
+; LE-I32-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I32-NEXT: vmov.32 d9[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d8[1], r0
+; LE-I32-NEXT: add lr, sp, #64
+; LE-I32-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I32-NEXT: add lr, sp, #32
+; LE-I32-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I32-NEXT: vmov.f32 s0, s19
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.f32 s0, s17
+; LE-I32-NEXT: add lr, sp, #48
+; LE-I32-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I32-NEXT: vmov.32 d13[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: add lr, sp, #64
+; LE-I32-NEXT: vmov.32 d12[1], r0
+; LE-I32-NEXT: vorr q0, q7, q7
+; LE-I32-NEXT: vldmia lr, {d4, d5} @ 16-byte Reload
+; LE-I32-NEXT: vorr q1, q5, q5
+; LE-I32-NEXT: vorr q3, q6, q6
+; LE-I32-NEXT: add sp, sp, #80
+; LE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT: pop {r11, pc}
+;
+; LE-I64-LABEL: lrint_v16f32:
+; LE-I64: @ %bb.0:
+; LE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT: .pad #4
+; LE-I64-NEXT: sub sp, sp, #4
+; LE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: .pad #160
+; LE-I64-NEXT: sub sp, sp, #160
+; LE-I64-NEXT: add lr, sp, #112
+; LE-I64-NEXT: vorr q5, q3, q3
+; LE-I64-NEXT: vorr q6, q0, q0
+; LE-I64-NEXT: mov r4, r0
+; LE-I64-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #48
+; LE-I64-NEXT: vorr q7, q1, q1
+; LE-I64-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-I64-NEXT: vmov.f32 s0, s23
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s24
+; LE-I64-NEXT: add lr, sp, #144
+; LE-I64-NEXT: vmov.32 d17[0], r0
+; LE-I64-NEXT: str r1, [sp, #108] @ 4-byte Spill
+; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s25
+; LE-I64-NEXT: str r1, [sp, #84] @ 4-byte Spill
+; LE-I64-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s28
+; LE-I64-NEXT: add lr, sp, #128
+; LE-I64-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEXT: str r1, [sp, #44] @ 4-byte Spill
+; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s29
+; LE-I64-NEXT: mov r9, r1
+; LE-I64-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s30
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s31
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: add lr, sp, #112
+; LE-I64-NEXT: mov r7, r1
+; LE-I64-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload
+; LE-I64-NEXT: vmov.f32 s0, s29
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s22
+; LE-I64-NEXT: add lr, sp, #24
+; LE-I64-NEXT: vmov.32 d17[0], r0
+; LE-I64-NEXT: mov r11, r1
+; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT: vmov.32 d13[1], r7
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: add lr, sp, #144
+; LE-I64-NEXT: vmov.f32 s0, s21
+; LE-I64-NEXT: vmov.32 d12[1], r5
+; LE-I64-NEXT: str r1, [sp, #40] @ 4-byte Spill
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: vmov.32 d16[0], r0
+; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #88
+; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s20
+; LE-I64-NEXT: mov r10, r1
+; LE-I64-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEXT: vmov.32 d9[1], r6
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s31
+; LE-I64-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEXT: add lr, sp, #8
+; LE-I64-NEXT: mov r8, r1
+; LE-I64-NEXT: vmov.32 d8[1], r9
+; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #64
+; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: add lr, sp, #128
+; LE-I64-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
+; LE-I64-NEXT: mov r9, r1
+; LE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #48
+; LE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I64-NEXT: vmov.f32 s0, s27
+; LE-I64-NEXT: vmov.32 d11[1], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s26
+; LE-I64-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEXT: ldr r0, [sp, #84] @ 4-byte Reload
+; LE-I64-NEXT: add lr, sp, #128
+; LE-I64-NEXT: mov r7, r1
+; LE-I64-NEXT: vmov.32 d10[1], r0
+; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEXT: add lr, sp, #144
+; LE-I64-NEXT: ldr r0, [sp, #108] @ 4-byte Reload
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: vmov.32 d17[1], r0
+; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #112
+; LE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEXT: vmov.f32 s0, s20
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: add lr, sp, #24
+; LE-I64-NEXT: vmov.f32 s0, s22
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: vmov.32 d16[0], r0
+; LE-I64-NEXT: vmov.32 d17[1], r11
+; LE-I64-NEXT: vorr q6, q8, q8
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: add lr, sp, #144
+; LE-I64-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEXT: ldr r0, [sp, #40] @ 4-byte Reload
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #8
+; LE-I64-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #128
+; LE-I64-NEXT: vmov.32 d9[1], r9
+; LE-I64-NEXT: vmov.32 d12[1], r6
+; LE-I64-NEXT: vmov.32 d19[1], r10
+; LE-I64-NEXT: vmov.32 d8[1], r1
+; LE-I64-NEXT: vmov.32 d16[1], r0
+; LE-I64-NEXT: add r0, r4, #64
+; LE-I64-NEXT: vmov.32 d18[1], r8
+; LE-I64-NEXT: vst1.64 {d12, d13}, [r0:128]!
+; LE-I64-NEXT: vst1.64 {d8, d9}, [r0:128]!
+; LE-I64-NEXT: vst1.64 {d18, d19}, [r0:128]!
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]
+; LE-I64-NEXT: vmov.32 d15[1], r7
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #64
+; LE-I64-NEXT: vmov.32 d14[1], r5
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r4:128]!
+; LE-I64-NEXT: vst1.64 {d14, d15}, [r4:128]!
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #88
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r4:128]!
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r4:128]
+; LE-I64-NEXT: add sp, sp, #160
+; LE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: add sp, sp, #4
+; LE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; LE-I32-NEON-LABEL: lrint_v16f32:
+; LE-I32-NEON: @ %bb.0:
+; LE-I32-NEON-NEXT: .save {r11, lr}
+; LE-I32-NEON-NEXT: push {r11, lr}
+; LE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT: .pad #80
+; LE-I32-NEON-NEXT: sub sp, sp, #80
+; LE-I32-NEON-NEXT: vorr q5, q3, q3
+; LE-I32-NEON-NEXT: vstmia sp, {d0, d1} @ 16-byte Spill
+; LE-I32-NEON-NEXT: add lr, sp, #32
+; LE-I32-NEON-NEXT: vorr q6, q2, q2
+; LE-I32-NEON-NEXT: vorr q7, q1, q1
+; LE-I32-NEON-NEXT: vmov.f32 s0, s20
+; LE-I32-NEON-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.f32 s0, s22
+; LE-I32-NEON-NEXT: vmov.32 d8[0], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.f32 s0, s24
+; LE-I32-NEON-NEXT: add lr, sp, #48
+; LE-I32-NEON-NEXT: vmov.32 d9[0], r0
+; LE-I32-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I32-NEON-NEXT: add lr, sp, #16
+; LE-I32-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.f32 s0, s26
+; LE-I32-NEON-NEXT: vmov.32 d8[0], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: add lr, sp, #64
+; LE-I32-NEON-NEXT: vmov.32 d9[0], r0
+; LE-I32-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I32-NEON-NEXT: vorr q4, q7, q7
+; LE-I32-NEON-NEXT: vmov.f32 s0, s16
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.f32 s0, s18
+; LE-I32-NEON-NEXT: vmov.32 d10[0], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vldmia sp, {d12, d13} @ 16-byte Reload
+; LE-I32-NEON-NEXT: vmov.f32 s0, s26
+; LE-I32-NEON-NEXT: vmov.32 d11[0], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.f32 s0, s24
+; LE-I32-NEON-NEXT: vmov.32 d15[0], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.f32 s0, s27
+; LE-I32-NEON-NEXT: vmov.32 d14[0], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.f32 s0, s25
+; LE-I32-NEON-NEXT: vmov.32 d15[1], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.f32 s0, s19
+; LE-I32-NEON-NEXT: vmov.32 d14[1], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.f32 s0, s17
+; LE-I32-NEON-NEXT: vmov.32 d11[1], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: add lr, sp, #16
+; LE-I32-NEON-NEXT: vmov.32 d10[1], r0
+; LE-I32-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I32-NEON-NEXT: vmov.f32 s0, s27
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.f32 s0, s25
+; LE-I32-NEON-NEXT: add lr, sp, #64
+; LE-I32-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I32-NEON-NEXT: vmov.32 d9[1], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.32 d8[1], r0
+; LE-I32-NEON-NEXT: add lr, sp, #64
+; LE-I32-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I32-NEON-NEXT: add lr, sp, #32
+; LE-I32-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I32-NEON-NEXT: vmov.f32 s0, s19
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.f32 s0, s17
+; LE-I32-NEON-NEXT: add lr, sp, #48
+; LE-I32-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I32-NEON-NEXT: vmov.32 d13[1], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: add lr, sp, #64
+; LE-I32-NEON-NEXT: vmov.32 d12[1], r0
+; LE-I32-NEON-NEXT: vorr q0, q7, q7
+; LE-I32-NEON-NEXT: vldmia lr, {d4, d5} @ 16-byte Reload
+; LE-I32-NEON-NEXT: vorr q1, q5, q5
+; LE-I32-NEON-NEXT: vorr q3, q6, q6
+; LE-I32-NEON-NEXT: add sp, sp, #80
+; LE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT: pop {r11, pc}
+;
+; LE-I64-NEON-LABEL: lrint_v16f32:
+; LE-I64-NEON: @ %bb.0:
+; LE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEON-NEXT: .pad #4
+; LE-I64-NEON-NEXT: sub sp, sp, #4
+; LE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT: .pad #160
+; LE-I64-NEON-NEXT: sub sp, sp, #160
+; LE-I64-NEON-NEXT: add lr, sp, #112
+; LE-I64-NEON-NEXT: vorr q5, q3, q3
+; LE-I64-NEON-NEXT: vorr q6, q0, q0
+; LE-I64-NEON-NEXT: mov r4, r0
+; LE-I64-NEON-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #48
+; LE-I64-NEON-NEXT: vorr q7, q1, q1
+; LE-I64-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-I64-NEON-NEXT: vmov.f32 s0, s23
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.f32 s0, s24
+; LE-I64-NEON-NEXT: add lr, sp, #144
+; LE-I64-NEON-NEXT: vmov.32 d17[0], r0
+; LE-I64-NEON-NEXT: str r1, [sp, #108] @ 4-byte Spill
+; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.f32 s0, s25
+; LE-I64-NEON-NEXT: str r1, [sp, #84] @ 4-byte Spill
+; LE-I64-NEON-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.f32 s0, s28
+; LE-I64-NEON-NEXT: add lr, sp, #128
+; LE-I64-NEON-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEON-NEXT: str r1, [sp, #44] @ 4-byte Spill
+; LE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.f32 s0, s29
+; LE-I64-NEON-NEXT: mov r9, r1
+; LE-I64-NEON-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.f32 s0, s30
+; LE-I64-NEON-NEXT: mov r6, r1
+; LE-I64-NEON-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.f32 s0, s31
+; LE-I64-NEON-NEXT: mov r5, r1
+; LE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: add lr, sp, #112
+; LE-I64-NEON-NEXT: mov r7, r1
+; LE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vmov.f32 s0, s29
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.f32 s0, s22
+; LE-I64-NEON-NEXT: add lr, sp, #24
+; LE-I64-NEON-NEXT: vmov.32 d17[0], r0
+; LE-I64-NEON-NEXT: mov r11, r1
+; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT: vmov.32 d13[1], r7
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: add lr, sp, #144
+; LE-I64-NEON-NEXT: vmov.f32 s0, s21
+; LE-I64-NEON-NEXT: vmov.32 d12[1], r5
+; LE-I64-NEON-NEXT: str r1, [sp, #40] @ 4-byte Spill
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vmov.32 d16[0], r0
+; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #88
+; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.f32 s0, s20
+; LE-I64-NEON-NEXT: mov r10, r1
+; LE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT: vmov.32 d9[1], r6
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.f32 s0, s31
+; LE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT: add lr, sp, #8
+; LE-I64-NEON-NEXT: mov r8, r1
+; LE-I64-NEON-NEXT: vmov.32 d8[1], r9
+; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #64
+; LE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: add lr, sp, #128
+; LE-I64-NEON-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEON-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
+; LE-I64-NEON-NEXT: mov r9, r1
+; LE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #48
+; LE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vmov.f32 s0, s27
+; LE-I64-NEON-NEXT: vmov.32 d11[1], r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.f32 s0, s26
+; LE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEON-NEXT: ldr r0, [sp, #84] @ 4-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #128
+; LE-I64-NEON-NEXT: mov r7, r1
+; LE-I64-NEON-NEXT: vmov.32 d10[1], r0
+; LE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEON-NEXT: add lr, sp, #144
+; LE-I64-NEON-NEXT: ldr r0, [sp, #108] @ 4-byte Reload
+; LE-I64-NEON-NEXT: mov r5, r1
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vmov.32 d17[1], r0
+; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #112
+; LE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vmov.f32 s0, s20
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: add lr, sp, #24
+; LE-I64-NEON-NEXT: vmov.f32 s0, s22
+; LE-I64-NEON-NEXT: mov r6, r1
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vmov.32 d16[0], r0
+; LE-I64-NEON-NEXT: vmov.32 d17[1], r11
+; LE-I64-NEON-NEXT: vorr q6, q8, q8
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: add lr, sp, #144
+; LE-I64-NEON-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT: ldr r0, [sp, #40] @ 4-byte Reload
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #8
+; LE-I64-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #128
+; LE-I64-NEON-NEXT: vmov.32 d9[1], r9
+; LE-I64-NEON-NEXT: vmov.32 d12[1], r6
+; LE-I64-NEON-NEXT: vmov.32 d19[1], r10
+; LE-I64-NEON-NEXT: vmov.32 d8[1], r1
+; LE-I64-NEON-NEXT: vmov.32 d16[1], r0
+; LE-I64-NEON-NEXT: add r0, r4, #64
+; LE-I64-NEON-NEXT: vmov.32 d18[1], r8
+; LE-I64-NEON-NEXT: vst1.64 {d12, d13}, [r0:128]!
+; LE-I64-NEON-NEXT: vst1.64 {d8, d9}, [r0:128]!
+; LE-I64-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]!
+; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]
+; LE-I64-NEON-NEXT: vmov.32 d15[1], r7
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #64
+; LE-I64-NEON-NEXT: vmov.32 d14[1], r5
+; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r4:128]!
+; LE-I64-NEON-NEXT: vst1.64 {d14, d15}, [r4:128]!
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #88
+; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r4:128]!
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r4:128]
+; LE-I64-NEON-NEXT: add sp, sp, #160
+; LE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT: add sp, sp, #4
+; LE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I32-LABEL: lrint_v16f32:
+; BE-I32: @ %bb.0:
+; BE-I32-NEXT: .save {r11, lr}
+; BE-I32-NEXT: push {r11, lr}
+; BE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT: .pad #96
+; BE-I32-NEXT: sub sp, sp, #96
+; BE-I32-NEXT: vrev64.32 q3, q3
+; BE-I32-NEXT: add lr, sp, #64
+; BE-I32-NEXT: vrev64.32 q4, q0
+; BE-I32-NEXT: vmov.f32 s0, s12
+; BE-I32-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill
+; BE-I32-NEXT: add lr, sp, #32
+; BE-I32-NEXT: vrev64.32 q5, q1
+; BE-I32-NEXT: vrev64.32 q7, q2
+; BE-I32-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.f32 s0, s16
+; BE-I32-NEXT: vmov.32 d16[0], r0
+; BE-I32-NEXT: add lr, sp, #80
+; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.f32 s0, s18
+; BE-I32-NEXT: vmov.32 d12[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.f32 s0, s20
+; BE-I32-NEXT: add lr, sp, #48
+; BE-I32-NEXT: vmov.32 d13[0], r0
+; BE-I32-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I32-NEXT: add lr, sp, #16
+; BE-I32-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.f32 s0, s22
+; BE-I32-NEXT: vmov.32 d8[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.f32 s0, s28
+; BE-I32-NEXT: vmov.32 d9[0], r0
+; BE-I32-NEXT: vstmia sp, {d8, d9} @ 16-byte Spill
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: add lr, sp, #64
+; BE-I32-NEXT: vmov.32 d12[0], r0
+; BE-I32-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I32-NEXT: vmov.f32 s0, s22
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.f32 s0, s30
+; BE-I32-NEXT: add lr, sp, #80
+; BE-I32-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I32-NEXT: vmov.32 d9[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.f32 s0, s23
+; BE-I32-NEXT: vmov.32 d13[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.f32 s0, s31
+; BE-I32-NEXT: add lr, sp, #80
+; BE-I32-NEXT: vmov.32 d9[1], r0
+; BE-I32-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.f32 s0, s29
+; BE-I32-NEXT: vmov.32 d13[1], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: add lr, sp, #16
+; BE-I32-NEXT: vmov.32 d12[1], r0
+; BE-I32-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I32-NEXT: vmov.f32 s0, s19
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.f32 s0, s17
+; BE-I32-NEXT: vldmia sp, {d10, d11} @ 16-byte Reload
+; BE-I32-NEXT: vmov.32 d11[1], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: add lr, sp, #32
+; BE-I32-NEXT: vmov.32 d10[1], r0
+; BE-I32-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I32-NEXT: vmov.f32 s0, s19
+; BE-I32-NEXT: vorr q7, q5, q5
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.f32 s0, s17
+; BE-I32-NEXT: add lr, sp, #48
+; BE-I32-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I32-NEXT: vmov.32 d11[1], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: add lr, sp, #64
+; BE-I32-NEXT: vmov.32 d10[1], r0
+; BE-I32-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-I32-NEXT: vmov.f32 s0, s1
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: add lr, sp, #80
+; BE-I32-NEXT: vrev64.32 q0, q5
+; BE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEXT: vrev64.32 q1, q7
+; BE-I32-NEXT: vmov.32 d16[1], r0
+; BE-I32-NEXT: vrev64.32 q2, q6
+; BE-I32-NEXT: vrev64.32 q3, q8
+; BE-I32-NEXT: add sp, sp, #96
+; BE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT: pop {r11, pc}
+;
+; BE-I64-LABEL: lrint_v16f32:
+; BE-I64: @ %bb.0:
+; BE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT: .pad #4
+; BE-I64-NEXT: sub sp, sp, #4
+; BE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT: .pad #144
+; BE-I64-NEXT: sub sp, sp, #144
+; BE-I64-NEXT: vorr q6, q3, q3
+; BE-I64-NEXT: add lr, sp, #112
+; BE-I64-NEXT: vorr q7, q0, q0
+; BE-I64-NEXT: mov r4, r0
+; BE-I64-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill
+; BE-I64-NEXT: add lr, sp, #96
+; BE-I64-NEXT: vrev64.32 d8, d13
+; BE-I64-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill
+; BE-I64-NEXT: vmov.f32 s0, s17
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s16
+; BE-I64-NEXT: str r1, [sp, #88] @ 4-byte Spill
+; BE-I64-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vrev64.32 d8, d14
+; BE-I64-NEXT: add lr, sp, #128
+; BE-I64-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEXT: str r1, [sp, #92] @ 4-byte Spill
+; BE-I64-NEXT: vmov.f32 s0, s16
+; BE-I64-NEXT: vrev64.32 d9, d12
+; BE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEXT: vstr d9, [sp, #64] @ 8-byte Spill
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s19
+; BE-I64-NEXT: mov r9, r1
+; BE-I64-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s17
+; BE-I64-NEXT: str r1, [sp, #84] @ 4-byte Spill
+; BE-I64-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEXT: vrev64.32 d9, d15
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s18
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s19
+; BE-I64-NEXT: mov r5, r1
+; BE-I64-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vldr d0, [sp, #64] @ 8-byte Reload
+; BE-I64-NEXT: mov r7, r1
+; BE-I64-NEXT: @ kill: def $s0 killed $s0 killed $d0
+; BE-I64-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEXT: add lr, sp, #40
+; BE-I64-NEXT: str r1, [sp, #60] @ 4-byte Spill
+; BE-I64-NEXT: vmov.32 d15[1], r7
+; BE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEXT: add lr, sp, #96
+; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT: vrev64.32 d8, d16
+; BE-I64-NEXT: vmov.f32 s0, s17
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s16
+; BE-I64-NEXT: vmov.32 d14[1], r5
+; BE-I64-NEXT: add lr, sp, #64
+; BE-I64-NEXT: mov r10, r1
+; BE-I64-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEXT: add lr, sp, #24
+; BE-I64-NEXT: mov r11, r1
+; BE-I64-NEXT: vmov.32 d13[1], r6
+; BE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEXT: add lr, sp, #96
+; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT: vrev64.32 d8, d17
+; BE-I64-NEXT: vmov.f32 s0, s17
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s16
+; BE-I64-NEXT: vmov.32 d12[1], r9
+; BE-I64-NEXT: add lr, sp, #96
+; BE-I64-NEXT: mov r8, r1
+; BE-I64-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEXT: add lr, sp, #8
+; BE-I64-NEXT: ldr r0, [sp, #88] @ 4-byte Reload
+; BE-I64-NEXT: mov r9, r1
+; BE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEXT: add lr, sp, #112
+; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT: add lr, sp, #128
+; BE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEXT: vrev64.32 d8, d16
+; BE-I64-NEXT: vmov.32 d11[1], r0
+; BE-I64-NEXT: vmov.f32 s0, s17
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s16
+; BE-I64-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEXT: ldr r0, [sp, #92] @ 4-byte Reload
+; BE-I64-NEXT: add lr, sp, #128
+; BE-I64-NEXT: mov r7, r1
+; BE-I64-NEXT: vmov.32 d10[1], r0
+; BE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: add lr, sp, #112
+; BE-I64-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEXT: ldr r0, [sp, #84] @ 4-byte Reload
+; BE-I64-NEXT: mov r5, r1
+; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT: add lr, sp, #40
+; BE-I64-NEXT: vrev64.32 d8, d17
+; BE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-I64-NEXT: vmov.f32 s0, s17
+; BE-I64-NEXT: vmov.32 d13[1], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s16
+; BE-I64-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEXT: ldr r0, [sp, #60] @ 4-byte Reload
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: vmov.32 d12[1], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: add lr, sp, #24
+; BE-I64-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEXT: add r0, r4, #64
+; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT: add lr, sp, #8
+; BE-I64-NEXT: vmov.32 d17[1], r10
+; BE-I64-NEXT: vmov.32 d16[1], r11
+; BE-I64-NEXT: vorr q12, q8, q8
+; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT: add lr, sp, #128
+; BE-I64-NEXT: vmov.32 d15[1], r7
+; BE-I64-NEXT: vmov.32 d11[1], r6
+; BE-I64-NEXT: vmov.32 d14[1], r5
+; BE-I64-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I64-NEXT: add lr, sp, #96
+; BE-I64-NEXT: vmov.32 d10[1], r1
+; BE-I64-NEXT: vmov.32 d17[1], r8
+; BE-I64-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload
+; BE-I64-NEXT: add lr, sp, #64
+; BE-I64-NEXT: vmov.32 d16[1], r9
+; BE-I64-NEXT: vrev64.32 q14, q7
+; BE-I64-NEXT: vorr q13, q8, q8
+; BE-I64-NEXT: vrev64.32 q15, q5
+; BE-I64-NEXT: vldmia lr, {d22, d23} @ 16-byte Reload
+; BE-I64-NEXT: vrev64.32 q8, q6
+; BE-I64-NEXT: vst1.64 {d28, d29}, [r0:128]!
+; BE-I64-NEXT: vst1.64 {d30, d31}, [r0:128]!
+; BE-I64-NEXT: vrev64.32 q9, q9
+; BE-I64-NEXT: vrev64.32 q10, q10
+; BE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEXT: vrev64.32 q11, q11
+; BE-I64-NEXT: vrev64.32 q12, q12
+; BE-I64-NEXT: vst1.64 {d18, d19}, [r0:128]
+; BE-I64-NEXT: vst1.64 {d20, d21}, [r4:128]!
+; BE-I64-NEXT: vst1.64 {d22, d23}, [r4:128]!
+; BE-I64-NEXT: vrev64.32 q13, q13
+; BE-I64-NEXT: vst1.64 {d24, d25}, [r4:128]!
+; BE-I64-NEXT: vst1.64 {d26, d27}, [r4:128]
+; BE-I64-NEXT: add sp, sp, #144
+; BE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT: add sp, sp, #4
+; BE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I32-NEON-LABEL: lrint_v16f32:
+; BE-I32-NEON: @ %bb.0:
+; BE-I32-NEON-NEXT: .save {r11, lr}
+; BE-I32-NEON-NEXT: push {r11, lr}
+; BE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT: .pad #96
+; BE-I32-NEON-NEXT: sub sp, sp, #96
+; BE-I32-NEON-NEXT: vrev64.32 q3, q3
+; BE-I32-NEON-NEXT: add lr, sp, #64
+; BE-I32-NEON-NEXT: vrev64.32 q4, q0
+; BE-I32-NEON-NEXT: vmov.f32 s0, s12
+; BE-I32-NEON-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill
+; BE-I32-NEON-NEXT: add lr, sp, #32
+; BE-I32-NEON-NEXT: vrev64.32 q5, q1
+; BE-I32-NEON-NEXT: vrev64.32 q7, q2
+; BE-I32-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.f32 s0, s16
+; BE-I32-NEON-NEXT: vmov.32 d16[0], r0
+; BE-I32-NEON-NEXT: add lr, sp, #80
+; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.f32 s0, s18
+; BE-I32-NEON-NEXT: vmov.32 d12[0], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.f32 s0, s20
+; BE-I32-NEON-NEXT: add lr, sp, #48
+; BE-I32-NEON-NEXT: vmov.32 d13[0], r0
+; BE-I32-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I32-NEON-NEXT: add lr, sp, #16
+; BE-I32-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.f32 s0, s22
+; BE-I32-NEON-NEXT: vmov.32 d8[0], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.f32 s0, s28
+; BE-I32-NEON-NEXT: vmov.32 d9[0], r0
+; BE-I32-NEON-NEXT: vstmia sp, {d8, d9} @ 16-byte Spill
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: add lr, sp, #64
+; BE-I32-NEON-NEXT: vmov.32 d12[0], r0
+; BE-I32-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I32-NEON-NEXT: vmov.f32 s0, s22
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.f32 s0, s30
+; BE-I32-NEON-NEXT: add lr, sp, #80
+; BE-I32-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I32-NEON-NEXT: vmov.32 d9[0], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.f32 s0, s23
+; BE-I32-NEON-NEXT: vmov.32 d13[0], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.f32 s0, s31
+; BE-I32-NEON-NEXT: add lr, sp, #80
+; BE-I32-NEON-NEXT: vmov.32 d9[1], r0
+; BE-I32-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.f32 s0, s29
+; BE-I32-NEON-NEXT: vmov.32 d13[1], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: add lr, sp, #16
+; BE-I32-NEON-NEXT: vmov.32 d12[1], r0
+; BE-I32-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I32-NEON-NEXT: vmov.f32 s0, s19
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.f32 s0, s17
+; BE-I32-NEON-NEXT: vldmia sp, {d10, d11} @ 16-byte Reload
+; BE-I32-NEON-NEXT: vmov.32 d11[1], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: add lr, sp, #32
+; BE-I32-NEON-NEXT: vmov.32 d10[1], r0
+; BE-I32-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I32-NEON-NEXT: vmov.f32 s0, s19
+; BE-I32-NEON-NEXT: vorr q7, q5, q5
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.f32 s0, s17
+; BE-I32-NEON-NEXT: add lr, sp, #48
+; BE-I32-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I32-NEON-NEXT: vmov.32 d11[1], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: add lr, sp, #64
+; BE-I32-NEON-NEXT: vmov.32 d10[1], r0
+; BE-I32-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-I32-NEON-NEXT: vmov.f32 s0, s1
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: add lr, sp, #80
+; BE-I32-NEON-NEXT: vrev64.32 q0, q5
+; BE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEON-NEXT: vrev64.32 q1, q7
+; BE-I32-NEON-NEXT: vmov.32 d16[1], r0
+; BE-I32-NEON-NEXT: vrev64.32 q2, q6
+; BE-I32-NEON-NEXT: vrev64.32 q3, q8
+; BE-I32-NEON-NEXT: add sp, sp, #96
+; BE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT: pop {r11, pc}
+;
+; BE-I64-NEON-LABEL: lrint_v16f32:
+; BE-I64-NEON: @ %bb.0:
+; BE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEON-NEXT: .pad #4
+; BE-I64-NEON-NEXT: sub sp, sp, #4
+; BE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT: .pad #144
+; BE-I64-NEON-NEXT: sub sp, sp, #144
+; BE-I64-NEON-NEXT: vorr q6, q3, q3
+; BE-I64-NEON-NEXT: add lr, sp, #112
+; BE-I64-NEON-NEXT: vorr q7, q0, q0
+; BE-I64-NEON-NEXT: mov r4, r0
+; BE-I64-NEON-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill
+; BE-I64-NEON-NEXT: add lr, sp, #96
+; BE-I64-NEON-NEXT: vrev64.32 d8, d13
+; BE-I64-NEON-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill
+; BE-I64-NEON-NEXT: vmov.f32 s0, s17
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.f32 s0, s16
+; BE-I64-NEON-NEXT: str r1, [sp, #88] @ 4-byte Spill
+; BE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vrev64.32 d8, d14
+; BE-I64-NEON-NEXT: add lr, sp, #128
+; BE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT: str r1, [sp, #92] @ 4-byte Spill
+; BE-I64-NEON-NEXT: vmov.f32 s0, s16
+; BE-I64-NEON-NEXT: vrev64.32 d9, d12
+; BE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEON-NEXT: vstr d9, [sp, #64] @ 8-byte Spill
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.f32 s0, s19
+; BE-I64-NEON-NEXT: mov r9, r1
+; BE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.f32 s0, s17
+; BE-I64-NEON-NEXT: str r1, [sp, #84] @ 4-byte Spill
+; BE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT: vrev64.32 d9, d15
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.f32 s0, s18
+; BE-I64-NEON-NEXT: mov r6, r1
+; BE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.f32 s0, s19
+; BE-I64-NEON-NEXT: mov r5, r1
+; BE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vldr d0, [sp, #64] @ 8-byte Reload
+; BE-I64-NEON-NEXT: mov r7, r1
+; BE-I64-NEON-NEXT: @ kill: def $s0 killed $s0 killed $d0
+; BE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT: add lr, sp, #40
+; BE-I64-NEON-NEXT: str r1, [sp, #60] @ 4-byte Spill
+; BE-I64-NEON-NEXT: vmov.32 d15[1], r7
+; BE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEON-NEXT: add lr, sp, #96
+; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT: vrev64.32 d8, d16
+; BE-I64-NEON-NEXT: vmov.f32 s0, s17
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.f32 s0, s16
+; BE-I64-NEON-NEXT: vmov.32 d14[1], r5
+; BE-I64-NEON-NEXT: add lr, sp, #64
+; BE-I64-NEON-NEXT: mov r10, r1
+; BE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT: add lr, sp, #24
+; BE-I64-NEON-NEXT: mov r11, r1
+; BE-I64-NEON-NEXT: vmov.32 d13[1], r6
+; BE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEON-NEXT: add lr, sp, #96
+; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT: vrev64.32 d8, d17
+; BE-I64-NEON-NEXT: vmov.f32 s0, s17
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.f32 s0, s16
+; BE-I64-NEON-NEXT: vmov.32 d12[1], r9
+; BE-I64-NEON-NEXT: add lr, sp, #96
+; BE-I64-NEON-NEXT: mov r8, r1
+; BE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT: add lr, sp, #8
+; BE-I64-NEON-NEXT: ldr r0, [sp, #88] @ 4-byte Reload
+; BE-I64-NEON-NEXT: mov r9, r1
+; BE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEON-NEXT: add lr, sp, #112
+; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT: add lr, sp, #128
+; BE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEON-NEXT: vrev64.32 d8, d16
+; BE-I64-NEON-NEXT: vmov.32 d11[1], r0
+; BE-I64-NEON-NEXT: vmov.f32 s0, s17
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.f32 s0, s16
+; BE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT: ldr r0, [sp, #92] @ 4-byte Reload
+; BE-I64-NEON-NEXT: add lr, sp, #128
+; BE-I64-NEON-NEXT: mov r7, r1
+; BE-I64-NEON-NEXT: vmov.32 d10[1], r0
+; BE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: add lr, sp, #112
+; BE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT: ldr r0, [sp, #84] @ 4-byte Reload
+; BE-I64-NEON-NEXT: mov r5, r1
+; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT: add lr, sp, #40
+; BE-I64-NEON-NEXT: vrev64.32 d8, d17
+; BE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-I64-NEON-NEXT: vmov.f32 s0, s17
+; BE-I64-NEON-NEXT: vmov.32 d13[1], r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.f32 s0, s16
+; BE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT: ldr r0, [sp, #60] @ 4-byte Reload
+; BE-I64-NEON-NEXT: mov r6, r1
+; BE-I64-NEON-NEXT: vmov.32 d12[1], r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: add lr, sp, #24
+; BE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT: add r0, r4, #64
+; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT: add lr, sp, #8
+; BE-I64-NEON-NEXT: vmov.32 d17[1], r10
+; BE-I64-NEON-NEXT: vmov.32 d16[1], r11
+; BE-I64-NEON-NEXT: vorr q12, q8, q8
+; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT: add lr, sp, #128
+; BE-I64-NEON-NEXT: vmov.32 d15[1], r7
+; BE-I64-NEON-NEXT: vmov.32 d11[1], r6
+; BE-I64-NEON-NEXT: vmov.32 d14[1], r5
+; BE-I64-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I64-NEON-NEXT: add lr, sp, #96
+; BE-I64-NEON-NEXT: vmov.32 d10[1], r1
+; BE-I64-NEON-NEXT: vmov.32 d17[1], r8
+; BE-I64-NEON-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload
+; BE-I64-NEON-NEXT: add lr, sp, #64
+; BE-I64-NEON-NEXT: vmov.32 d16[1], r9
+; BE-I64-NEON-NEXT: vrev64.32 q14, q7
+; BE-I64-NEON-NEXT: vorr q13, q8, q8
+; BE-I64-NEON-NEXT: vrev64.32 q15, q5
+; BE-I64-NEON-NEXT: vldmia lr, {d22, d23} @ 16-byte Reload
+; BE-I64-NEON-NEXT: vrev64.32 q8, q6
+; BE-I64-NEON-NEXT: vst1.64 {d28, d29}, [r0:128]!
+; BE-I64-NEON-NEXT: vst1.64 {d30, d31}, [r0:128]!
+; BE-I64-NEON-NEXT: vrev64.32 q9, q9
+; BE-I64-NEON-NEXT: vrev64.32 q10, q10
+; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEON-NEXT: vrev64.32 q11, q11
+; BE-I64-NEON-NEXT: vrev64.32 q12, q12
+; BE-I64-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]
+; BE-I64-NEON-NEXT: vst1.64 {d20, d21}, [r4:128]!
+; BE-I64-NEON-NEXT: vst1.64 {d22, d23}, [r4:128]!
+; BE-I64-NEON-NEXT: vrev64.32 q13, q13
+; BE-I64-NEON-NEXT: vst1.64 {d24, d25}, [r4:128]!
+; BE-I64-NEON-NEXT: vst1.64 {d26, d27}, [r4:128]
+; BE-I64-NEON-NEXT: add sp, sp, #144
+; BE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT: add sp, sp, #4
+; BE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+ %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float> %x)
+ ret <16 x iXLen> %a
+}
+declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float>)
+
+define <32 x iXLen> @lrint_v32f32(<32 x float> %x) {
+; LE-I32-LABEL: lrint_v32f32:
+; LE-I32: @ %bb.0:
+; LE-I32-NEXT: .save {r4, r5, r6, lr}
+; LE-I32-NEXT: push {r4, r5, r6, lr}
+; LE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT: .pad #144
+; LE-I32-NEXT: sub sp, sp, #144
+; LE-I32-NEXT: add lr, sp, #48
+; LE-I32-NEXT: mov r4, r0
+; LE-I32-NEXT: add r0, sp, #224
+; LE-I32-NEXT: vorr q4, q0, q0
+; LE-I32-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill
+; LE-I32-NEXT: add lr, sp, #16
+; LE-I32-NEXT: vorr q6, q3, q3
+; LE-I32-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-I32-NEXT: vmov.f32 s0, s4
+; LE-I32-NEXT: add lr, sp, #80
+; LE-I32-NEXT: vorr q5, q1, q1
+; LE-I32-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I32-NEXT: add r0, sp, #272
+; LE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEXT: add lr, sp, #64
+; LE-I32-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I32-NEXT: add r0, sp, #240
+; LE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEXT: add lr, sp, #32
+; LE-I32-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEXT: vstmia sp, {d2, d3} @ 16-byte Spill
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.f32 s0, s18
+; LE-I32-NEXT: vmov.32 d14[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.f32 s0, s22
+; LE-I32-NEXT: add lr, sp, #112
+; LE-I32-NEXT: vmov.32 d17[0], r0
+; LE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: add lr, sp, #128
+; LE-I32-NEXT: vmov.32 d15[0], r0
+; LE-I32-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I32-NEXT: add lr, sp, #48
+; LE-I32-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I32-NEXT: vmov.f32 s0, s20
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.f32 s0, s22
+; LE-I32-NEXT: vmov.32 d8[0], r0
+; LE-I32-NEXT: vorr q7, q5, q5
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.f32 s0, s26
+; LE-I32-NEXT: vmov.32 d9[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.f32 s0, s24
+; LE-I32-NEXT: vmov.32 d11[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.f32 s0, s27
+; LE-I32-NEXT: vmov.32 d10[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.f32 s0, s25
+; LE-I32-NEXT: vmov.32 d11[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.f32 s0, s31
+; LE-I32-NEXT: vmov.32 d10[1], r0
+; LE-I32-NEXT: add lr, sp, #96
+; LE-I32-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.f32 s0, s29
+; LE-I32-NEXT: vmov.32 d9[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vldmia sp, {d14, d15} @ 16-byte Reload
+; LE-I32-NEXT: vmov.f32 s0, s31
+; LE-I32-NEXT: add lr, sp, #48
+; LE-I32-NEXT: vmov.32 d8[1], r0
+; LE-I32-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: add lr, sp, #128
+; LE-I32-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I32-NEXT: add lr, sp, #16
+; LE-I32-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I32-NEXT: vmov.f32 s0, s23
+; LE-I32-NEXT: vmov.32 d9[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.f32 s0, s29
+; LE-I32-NEXT: add lr, sp, #112
+; LE-I32-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I32-NEXT: vmov.32 d13[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.f32 s0, s20
+; LE-I32-NEXT: vmov.32 d8[1], r0
+; LE-I32-NEXT: add lr, sp, #128
+; LE-I32-NEXT: add r0, sp, #256
+; LE-I32-NEXT: vld1.64 {d14, d15}, [r0]
+; LE-I32-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.f32 s0, s21
+; LE-I32-NEXT: vorr q4, q6, q6
+; LE-I32-NEXT: vmov.32 d8[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vorr q6, q7, q7
+; LE-I32-NEXT: vmov.32 d8[1], r0
+; LE-I32-NEXT: add lr, sp, #112
+; LE-I32-NEXT: vstmia sp, {d14, d15} @ 16-byte Spill
+; LE-I32-NEXT: vmov.f32 s0, s24
+; LE-I32-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: add lr, sp, #32
+; LE-I32-NEXT: vmov.32 d14[0], r0
+; LE-I32-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I32-NEXT: vmov.f32 s0, s18
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.f32 s0, s16
+; LE-I32-NEXT: vmov.32 d11[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.f32 s0, s19
+; LE-I32-NEXT: vmov.32 d10[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.f32 s0, s26
+; LE-I32-NEXT: vmov.32 d11[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.f32 s0, s17
+; LE-I32-NEXT: add lr, sp, #16
+; LE-I32-NEXT: vmov.32 d15[0], r0
+; LE-I32-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d10[1], r0
+; LE-I32-NEXT: add lr, sp, #32
+; LE-I32-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I32-NEXT: add lr, sp, #64
+; LE-I32-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I32-NEXT: vmov.f32 s0, s20
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: add lr, sp, #80
+; LE-I32-NEXT: vmov.32 d14[0], r0
+; LE-I32-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I32-NEXT: vmov.f32 s0, s26
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.f32 s0, s24
+; LE-I32-NEXT: vmov.32 d9[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.f32 s0, s27
+; LE-I32-NEXT: vmov.32 d8[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.f32 s0, s22
+; LE-I32-NEXT: vmov.32 d9[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.f32 s0, s25
+; LE-I32-NEXT: vmov.32 d15[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.f32 s0, s23
+; LE-I32-NEXT: vmov.32 d8[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.f32 s0, s21
+; LE-I32-NEXT: vmov.32 d15[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vldmia sp, {d12, d13} @ 16-byte Reload
+; LE-I32-NEXT: vmov.f32 s0, s27
+; LE-I32-NEXT: vmov.32 d14[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.f32 s0, s25
+; LE-I32-NEXT: add lr, sp, #16
+; LE-I32-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I32-NEXT: vmov.32 d11[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: add lr, sp, #112
+; LE-I32-NEXT: vmov.32 d10[1], r0
+; LE-I32-NEXT: mov r0, r4
+; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT: add lr, sp, #128
+; LE-I32-NEXT: vst1.32 {d16, d17}, [r0:128]!
+; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT: add lr, sp, #48
+; LE-I32-NEXT: vst1.32 {d16, d17}, [r0:128]!
+; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT: add lr, sp, #96
+; LE-I32-NEXT: vst1.32 {d16, d17}, [r0:128]!
+; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT: add lr, sp, #32
+; LE-I32-NEXT: vst1.64 {d16, d17}, [r0:128]
+; LE-I32-NEXT: add r0, r4, #64
+; LE-I32-NEXT: vst1.32 {d8, d9}, [r0:128]!
+; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT: vst1.32 {d16, d17}, [r0:128]!
+; LE-I32-NEXT: vst1.32 {d10, d11}, [r0:128]!
+; LE-I32-NEXT: vst1.64 {d14, d15}, [r0:128]
+; LE-I32-NEXT: add sp, sp, #144
+; LE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT: pop {r4, r5, r6, pc}
+;
+; LE-I64-LABEL: lrint_v32f32:
+; LE-I64: @ %bb.0:
+; LE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT: .pad #4
+; LE-I64-NEXT: sub sp, sp, #4
+; LE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: .pad #184
+; LE-I64-NEXT: sub sp, sp, #184
+; LE-I64-NEXT: add lr, sp, #152
+; LE-I64-NEXT: vorr q7, q3, q3
+; LE-I64-NEXT: vorr q4, q2, q2
+; LE-I64-NEXT: mov r5, r0
+; LE-I64-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #88
+; LE-I64-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-I64-NEXT: vmov.f32 s0, s3
+; LE-I64-NEXT: str r0, [sp, #68] @ 4-byte Spill
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s18
+; LE-I64-NEXT: add lr, sp, #168
+; LE-I64-NEXT: vmov.32 d17[0], r0
+; LE-I64-NEXT: str r1, [sp, #16] @ 4-byte Spill
+; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s16
+; LE-I64-NEXT: mov r8, r1
+; LE-I64-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s17
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s19
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s31
+; LE-I64-NEXT: mov r7, r1
+; LE-I64-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s30
+; LE-I64-NEXT: str r1, [sp, #8] @ 4-byte Spill
+; LE-I64-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEXT: vmov.32 d11[1], r7
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s29
+; LE-I64-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEXT: add lr, sp, #104
+; LE-I64-NEXT: str r1, [sp, #12] @ 4-byte Spill
+; LE-I64-NEXT: vmov.32 d13[1], r4
+; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEXT: add r0, sp, #320
+; LE-I64-NEXT: add lr, sp, #120
+; LE-I64-NEXT: mov r11, r1
+; LE-I64-NEXT: vld1.64 {d0, d1}, [r0]
+; LE-I64-NEXT: add r0, sp, #304
+; LE-I64-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #72
+; LE-I64-NEXT: vld1.64 {d0, d1}, [r0]
+; LE-I64-NEXT: add r0, sp, #336
+; LE-I64-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #32
+; LE-I64-NEXT: vld1.64 {d0, d1}, [r0]
+; LE-I64-NEXT: add r0, sp, #288
+; LE-I64-NEXT: vmov.32 d12[1], r6
+; LE-I64-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #48
+; LE-I64-NEXT: vld1.64 {d0, d1}, [r0]
+; LE-I64-NEXT: vmov.32 d10[1], r8
+; LE-I64-NEXT: add r8, r5, #64
+; LE-I64-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #152
+; LE-I64-NEXT: vst1.64 {d12, d13}, [r8:128]!
+; LE-I64-NEXT: vst1.64 {d10, d11}, [r8:128]!
+; LE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I64-NEXT: vmov.f32 s0, s27
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s28
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s26
+; LE-I64-NEXT: mov r9, r1
+; LE-I64-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEXT: vmov.32 d11[1], r4
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEXT: add lr, sp, #136
+; LE-I64-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
+; LE-I64-NEXT: mov r10, r1
+; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #168
+; LE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #88
+; LE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I64-NEXT: vmov.f32 s0, s26
+; LE-I64-NEXT: vmov.32 d11[1], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s25
+; LE-I64-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEXT: add lr, sp, #168
+; LE-I64-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
+; LE-I64-NEXT: mov r7, r1
+; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #104
+; LE-I64-NEXT: vorr q5, q6, q6
+; LE-I64-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload
+; LE-I64-NEXT: vmov.32 d15[1], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s20
+; LE-I64-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
+; LE-I64-NEXT: add lr, sp, #104
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: vmov.32 d14[1], r0
+; LE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEXT: add lr, sp, #152
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: vorr q7, q6, q6
+; LE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I64-NEXT: vmov.32 d9[1], r11
+; LE-I64-NEXT: vmov.f32 s0, s25
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s24
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEXT: vmov.32 d8[1], r9
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: add lr, sp, #136
+; LE-I64-NEXT: mov r11, r1
+; LE-I64-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: vmov.32 d16[1], r10
+; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #120
+; LE-I64-NEXT: vst1.64 {d8, d9}, [r8:128]!
+; LE-I64-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I64-NEXT: vmov.f32 s0, s1
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: add lr, sp, #152
+; LE-I64-NEXT: vmov.32 d17[0], r0
+; LE-I64-NEXT: mov r10, r1
+; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #104
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #72
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r8:128]
+; LE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEXT: vmov.f32 s0, s19
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: add lr, sp, #168
+; LE-I64-NEXT: vmov.f32 s0, s18
+; LE-I64-NEXT: mov r8, r1
+; LE-I64-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: vmov.32 d16[1], r7
+; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s17
+; LE-I64-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEXT: add lr, sp, #104
+; LE-I64-NEXT: mov r7, r1
+; LE-I64-NEXT: vmov.32 d15[1], r4
+; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s16
+; LE-I64-NEXT: mov r9, r1
+; LE-I64-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEXT: vmov.32 d14[1], r6
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEXT: add lr, sp, #88
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: vmov.32 d11[1], r5
+; LE-I64-NEXT: vmov.32 d10[1], r11
+; LE-I64-NEXT: ldr r11, [sp, #68] @ 4-byte Reload
+; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #16
+; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #32
+; LE-I64-NEXT: vst1.64 {d14, d15}, [r11:128]!
+; LE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEXT: vmov.f32 s0, s23
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: add lr, sp, #152
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #120
+; LE-I64-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I64-NEXT: @ kill: def $s0 killed $s0 killed $q0
+; LE-I64-NEXT: vmov.32 d13[1], r10
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s22
+; LE-I64-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEXT: add lr, sp, #152
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #104
+; LE-I64-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload
+; LE-I64-NEXT: vmov.32 d15[1], r8
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s21
+; LE-I64-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEXT: add lr, sp, #72
+; LE-I64-NEXT: mov r8, r1
+; LE-I64-NEXT: vmov.32 d14[1], r7
+; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #104
+; LE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s20
+; LE-I64-NEXT: add lr, sp, #88
+; LE-I64-NEXT: mov r7, r1
+; LE-I64-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I64-NEXT: vmov.32 d13[1], r9
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEXT: add lr, sp, #32
+; LE-I64-NEXT: mov r9, r1
+; LE-I64-NEXT: vmov.32 d12[1], r6
+; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #88
+; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #120
+; LE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEXT: vmov.f32 s0, s19
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s18
+; LE-I64-NEXT: add lr, sp, #72
+; LE-I64-NEXT: mov r10, r1
+; LE-I64-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I64-NEXT: vmov.32 d13[1], r4
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: add lr, sp, #152
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: vmov.32 d16[1], r5
+; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #168
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #48
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r11:128]!
+; LE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEXT: vmov.f32 s0, s21
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s20
+; LE-I64-NEXT: vmov.32 d12[1], r8
+; LE-I64-NEXT: add lr, sp, #72
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s23
+; LE-I64-NEXT: add lr, sp, #32
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I64-NEXT: vmov.32 d13[1], r7
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: add lr, sp, #48
+; LE-I64-NEXT: mov r7, r1
+; LE-I64-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I64-NEXT: vmov.f32 s0, s2
+; LE-I64-NEXT: vmov.32 d12[1], r9
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: add lr, sp, #16
+; LE-I64-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #136
+; LE-I64-NEXT: vmov.32 d11[1], r7
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r11:128]!
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #152
+; LE-I64-NEXT: vmov.32 d15[1], r10
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r11:128]
+; LE-I64-NEXT: vmov.32 d10[1], r1
+; LE-I64-NEXT: ldr r1, [sp, #68] @ 4-byte Reload
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: add r0, r1, #192
+; LE-I64-NEXT: add lr, sp, #72
+; LE-I64-NEXT: vmov.32 d14[1], r4
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; LE-I64-NEXT: vst1.64 {d14, d15}, [r0:128]!
+; LE-I64-NEXT: vst1.64 {d12, d13}, [r0:128]!
+; LE-I64-NEXT: vmov.32 d9[1], r5
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #88
+; LE-I64-NEXT: vmov.32 d8[1], r6
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]
+; LE-I64-NEXT: add r0, r1, #128
+; LE-I64-NEXT: vst1.64 {d8, d9}, [r0:128]!
+; LE-I64-NEXT: vst1.64 {d10, d11}, [r0:128]!
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #104
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]
+; LE-I64-NEXT: add sp, sp, #184
+; LE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: add sp, sp, #4
+; LE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; LE-I32-NEON-LABEL: lrint_v32f32:
+; LE-I32-NEON: @ %bb.0:
+; LE-I32-NEON-NEXT: .save {r4, r5, r6, lr}
+; LE-I32-NEON-NEXT: push {r4, r5, r6, lr}
+; LE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT: .pad #144
+; LE-I32-NEON-NEXT: sub sp, sp, #144
+; LE-I32-NEON-NEXT: add lr, sp, #48
+; LE-I32-NEON-NEXT: mov r4, r0
+; LE-I32-NEON-NEXT: add r0, sp, #224
+; LE-I32-NEON-NEXT: vorr q4, q0, q0
+; LE-I32-NEON-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill
+; LE-I32-NEON-NEXT: add lr, sp, #16
+; LE-I32-NEON-NEXT: vorr q6, q3, q3
+; LE-I32-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-I32-NEON-NEXT: vmov.f32 s0, s4
+; LE-I32-NEON-NEXT: add lr, sp, #80
+; LE-I32-NEON-NEXT: vorr q5, q1, q1
+; LE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I32-NEON-NEXT: add r0, sp, #272
+; LE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEON-NEXT: add lr, sp, #64
+; LE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I32-NEON-NEXT: add r0, sp, #240
+; LE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEON-NEXT: add lr, sp, #32
+; LE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEON-NEXT: vstmia sp, {d2, d3} @ 16-byte Spill
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.f32 s0, s18
+; LE-I32-NEON-NEXT: vmov.32 d14[0], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.f32 s0, s22
+; LE-I32-NEON-NEXT: add lr, sp, #112
+; LE-I32-NEON-NEXT: vmov.32 d17[0], r0
+; LE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: add lr, sp, #128
+; LE-I32-NEON-NEXT: vmov.32 d15[0], r0
+; LE-I32-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I32-NEON-NEXT: add lr, sp, #48
+; LE-I32-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I32-NEON-NEXT: vmov.f32 s0, s20
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.f32 s0, s22
+; LE-I32-NEON-NEXT: vmov.32 d8[0], r0
+; LE-I32-NEON-NEXT: vorr q7, q5, q5
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.f32 s0, s26
+; LE-I32-NEON-NEXT: vmov.32 d9[0], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.f32 s0, s24
+; LE-I32-NEON-NEXT: vmov.32 d11[0], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.f32 s0, s27
+; LE-I32-NEON-NEXT: vmov.32 d10[0], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.f32 s0, s25
+; LE-I32-NEON-NEXT: vmov.32 d11[1], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.f32 s0, s31
+; LE-I32-NEON-NEXT: vmov.32 d10[1], r0
+; LE-I32-NEON-NEXT: add lr, sp, #96
+; LE-I32-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.f32 s0, s29
+; LE-I32-NEON-NEXT: vmov.32 d9[1], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vldmia sp, {d14, d15} @ 16-byte Reload
+; LE-I32-NEON-NEXT: vmov.f32 s0, s31
+; LE-I32-NEON-NEXT: add lr, sp, #48
+; LE-I32-NEON-NEXT: vmov.32 d8[1], r0
+; LE-I32-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: add lr, sp, #128
+; LE-I32-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I32-NEON-NEXT: add lr, sp, #16
+; LE-I32-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I32-NEON-NEXT: vmov.f32 s0, s23
+; LE-I32-NEON-NEXT: vmov.32 d9[1], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.f32 s0, s29
+; LE-I32-NEON-NEXT: add lr, sp, #112
+; LE-I32-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I32-NEON-NEXT: vmov.32 d13[1], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.f32 s0, s20
+; LE-I32-NEON-NEXT: vmov.32 d8[1], r0
+; LE-I32-NEON-NEXT: add lr, sp, #128
+; LE-I32-NEON-NEXT: add r0, sp, #256
+; LE-I32-NEON-NEXT: vld1.64 {d14, d15}, [r0]
+; LE-I32-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.f32 s0, s21
+; LE-I32-NEON-NEXT: vorr q4, q6, q6
+; LE-I32-NEON-NEXT: vmov.32 d8[0], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vorr q6, q7, q7
+; LE-I32-NEON-NEXT: vmov.32 d8[1], r0
+; LE-I32-NEON-NEXT: add lr, sp, #112
+; LE-I32-NEON-NEXT: vstmia sp, {d14, d15} @ 16-byte Spill
+; LE-I32-NEON-NEXT: vmov.f32 s0, s24
+; LE-I32-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: add lr, sp, #32
+; LE-I32-NEON-NEXT: vmov.32 d14[0], r0
+; LE-I32-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I32-NEON-NEXT: vmov.f32 s0, s18
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.f32 s0, s16
+; LE-I32-NEON-NEXT: vmov.32 d11[0], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.f32 s0, s19
+; LE-I32-NEON-NEXT: vmov.32 d10[0], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.f32 s0, s26
+; LE-I32-NEON-NEXT: vmov.32 d11[1], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.f32 s0, s17
+; LE-I32-NEON-NEXT: add lr, sp, #16
+; LE-I32-NEON-NEXT: vmov.32 d15[0], r0
+; LE-I32-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.32 d10[1], r0
+; LE-I32-NEON-NEXT: add lr, sp, #32
+; LE-I32-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I32-NEON-NEXT: add lr, sp, #64
+; LE-I32-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I32-NEON-NEXT: vmov.f32 s0, s20
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: add lr, sp, #80
+; LE-I32-NEON-NEXT: vmov.32 d14[0], r0
+; LE-I32-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I32-NEON-NEXT: vmov.f32 s0, s26
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.f32 s0, s24
+; LE-I32-NEON-NEXT: vmov.32 d9[0], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.f32 s0, s27
+; LE-I32-NEON-NEXT: vmov.32 d8[0], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.f32 s0, s22
+; LE-I32-NEON-NEXT: vmov.32 d9[1], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.f32 s0, s25
+; LE-I32-NEON-NEXT: vmov.32 d15[0], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.f32 s0, s23
+; LE-I32-NEON-NEXT: vmov.32 d8[1], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.f32 s0, s21
+; LE-I32-NEON-NEXT: vmov.32 d15[1], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vldmia sp, {d12, d13} @ 16-byte Reload
+; LE-I32-NEON-NEXT: vmov.f32 s0, s27
+; LE-I32-NEON-NEXT: vmov.32 d14[1], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.f32 s0, s25
+; LE-I32-NEON-NEXT: add lr, sp, #16
+; LE-I32-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I32-NEON-NEXT: vmov.32 d11[1], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: add lr, sp, #112
+; LE-I32-NEON-NEXT: vmov.32 d10[1], r0
+; LE-I32-NEON-NEXT: mov r0, r4
+; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT: add lr, sp, #128
+; LE-I32-NEON-NEXT: vst1.32 {d16, d17}, [r0:128]!
+; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT: add lr, sp, #48
+; LE-I32-NEON-NEXT: vst1.32 {d16, d17}, [r0:128]!
+; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT: add lr, sp, #96
+; LE-I32-NEON-NEXT: vst1.32 {d16, d17}, [r0:128]!
+; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT: add lr, sp, #32
+; LE-I32-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]
+; LE-I32-NEON-NEXT: add r0, r4, #64
+; LE-I32-NEON-NEXT: vst1.32 {d8, d9}, [r0:128]!
+; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT: vst1.32 {d16, d17}, [r0:128]!
+; LE-I32-NEON-NEXT: vst1.32 {d10, d11}, [r0:128]!
+; LE-I32-NEON-NEXT: vst1.64 {d14, d15}, [r0:128]
+; LE-I32-NEON-NEXT: add sp, sp, #144
+; LE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT: pop {r4, r5, r6, pc}
+;
+; LE-I64-NEON-LABEL: lrint_v32f32:
+; LE-I64-NEON: @ %bb.0:
+; LE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEON-NEXT: .pad #4
+; LE-I64-NEON-NEXT: sub sp, sp, #4
+; LE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT: .pad #184
+; LE-I64-NEON-NEXT: sub sp, sp, #184
+; LE-I64-NEON-NEXT: add lr, sp, #152
+; LE-I64-NEON-NEXT: vorr q7, q3, q3
+; LE-I64-NEON-NEXT: vorr q4, q2, q2
+; LE-I64-NEON-NEXT: mov r5, r0
+; LE-I64-NEON-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #88
+; LE-I64-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-I64-NEON-NEXT: vmov.f32 s0, s3
+; LE-I64-NEON-NEXT: str r0, [sp, #68] @ 4-byte Spill
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.f32 s0, s18
+; LE-I64-NEON-NEXT: add lr, sp, #168
+; LE-I64-NEON-NEXT: vmov.32 d17[0], r0
+; LE-I64-NEON-NEXT: str r1, [sp, #16] @ 4-byte Spill
+; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.f32 s0, s16
+; LE-I64-NEON-NEXT: mov r8, r1
+; LE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.f32 s0, s17
+; LE-I64-NEON-NEXT: mov r6, r1
+; LE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.f32 s0, s19
+; LE-I64-NEON-NEXT: mov r4, r1
+; LE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.f32 s0, s31
+; LE-I64-NEON-NEXT: mov r7, r1
+; LE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.f32 s0, s30
+; LE-I64-NEON-NEXT: str r1, [sp, #8] @ 4-byte Spill
+; LE-I64-NEON-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEON-NEXT: vmov.32 d11[1], r7
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.f32 s0, s29
+; LE-I64-NEON-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT: add lr, sp, #104
+; LE-I64-NEON-NEXT: str r1, [sp, #12] @ 4-byte Spill
+; LE-I64-NEON-NEXT: vmov.32 d13[1], r4
+; LE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEON-NEXT: add r0, sp, #320
+; LE-I64-NEON-NEXT: add lr, sp, #120
+; LE-I64-NEON-NEXT: mov r11, r1
+; LE-I64-NEON-NEXT: vld1.64 {d0, d1}, [r0]
+; LE-I64-NEON-NEXT: add r0, sp, #304
+; LE-I64-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #72
+; LE-I64-NEON-NEXT: vld1.64 {d0, d1}, [r0]
+; LE-I64-NEON-NEXT: add r0, sp, #336
+; LE-I64-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #32
+; LE-I64-NEON-NEXT: vld1.64 {d0, d1}, [r0]
+; LE-I64-NEON-NEXT: add r0, sp, #288
+; LE-I64-NEON-NEXT: vmov.32 d12[1], r6
+; LE-I64-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #48
+; LE-I64-NEON-NEXT: vld1.64 {d0, d1}, [r0]
+; LE-I64-NEON-NEXT: vmov.32 d10[1], r8
+; LE-I64-NEON-NEXT: add r8, r5, #64
+; LE-I64-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #152
+; LE-I64-NEON-NEXT: vst1.64 {d12, d13}, [r8:128]!
+; LE-I64-NEON-NEXT: vst1.64 {d10, d11}, [r8:128]!
+; LE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vmov.f32 s0, s27
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.f32 s0, s28
+; LE-I64-NEON-NEXT: mov r4, r1
+; LE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.f32 s0, s26
+; LE-I64-NEON-NEXT: mov r9, r1
+; LE-I64-NEON-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT: vmov.32 d11[1], r4
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT: add lr, sp, #136
+; LE-I64-NEON-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
+; LE-I64-NEON-NEXT: mov r10, r1
+; LE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #168
+; LE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #88
+; LE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vmov.f32 s0, s26
+; LE-I64-NEON-NEXT: vmov.32 d11[1], r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.f32 s0, s25
+; LE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT: add lr, sp, #168
+; LE-I64-NEON-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
+; LE-I64-NEON-NEXT: mov r7, r1
+; LE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #104
+; LE-I64-NEON-NEXT: vorr q5, q6, q6
+; LE-I64-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vmov.32 d15[1], r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.f32 s0, s20
+; LE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #104
+; LE-I64-NEON-NEXT: mov r4, r1
+; LE-I64-NEON-NEXT: vmov.32 d14[1], r0
+; LE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT: add lr, sp, #152
+; LE-I64-NEON-NEXT: mov r6, r1
+; LE-I64-NEON-NEXT: vorr q7, q6, q6
+; LE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vmov.32 d9[1], r11
+; LE-I64-NEON-NEXT: vmov.f32 s0, s25
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.f32 s0, s24
+; LE-I64-NEON-NEXT: mov r5, r1
+; LE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEON-NEXT: vmov.32 d8[1], r9
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: add lr, sp, #136
+; LE-I64-NEON-NEXT: mov r11, r1
+; LE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vmov.32 d16[1], r10
+; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #120
+; LE-I64-NEON-NEXT: vst1.64 {d8, d9}, [r8:128]!
+; LE-I64-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vmov.f32 s0, s1
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: add lr, sp, #152
+; LE-I64-NEON-NEXT: vmov.32 d17[0], r0
+; LE-I64-NEON-NEXT: mov r10, r1
+; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #104
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #72
+; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r8:128]
+; LE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vmov.f32 s0, s19
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: add lr, sp, #168
+; LE-I64-NEON-NEXT: vmov.f32 s0, s18
+; LE-I64-NEON-NEXT: mov r8, r1
+; LE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vmov.32 d16[1], r7
+; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.f32 s0, s17
+; LE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT: add lr, sp, #104
+; LE-I64-NEON-NEXT: mov r7, r1
+; LE-I64-NEON-NEXT: vmov.32 d15[1], r4
+; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.f32 s0, s16
+; LE-I64-NEON-NEXT: mov r9, r1
+; LE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT: vmov.32 d14[1], r6
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT: add lr, sp, #88
+; LE-I64-NEON-NEXT: mov r6, r1
+; LE-I64-NEON-NEXT: vmov.32 d11[1], r5
+; LE-I64-NEON-NEXT: vmov.32 d10[1], r11
+; LE-I64-NEON-NEXT: ldr r11, [sp, #68] @ 4-byte Reload
+; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #16
+; LE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #32
+; LE-I64-NEON-NEXT: vst1.64 {d14, d15}, [r11:128]!
+; LE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vmov.f32 s0, s23
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: add lr, sp, #152
+; LE-I64-NEON-NEXT: mov r4, r1
+; LE-I64-NEON-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #120
+; LE-I64-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I64-NEON-NEXT: @ kill: def $s0 killed $s0 killed $q0
+; LE-I64-NEON-NEXT: vmov.32 d13[1], r10
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.f32 s0, s22
+; LE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT: add lr, sp, #152
+; LE-I64-NEON-NEXT: mov r5, r1
+; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #104
+; LE-I64-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vmov.32 d15[1], r8
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.f32 s0, s21
+; LE-I64-NEON-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT: add lr, sp, #72
+; LE-I64-NEON-NEXT: mov r8, r1
+; LE-I64-NEON-NEXT: vmov.32 d14[1], r7
+; LE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #104
+; LE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.f32 s0, s20
+; LE-I64-NEON-NEXT: add lr, sp, #88
+; LE-I64-NEON-NEXT: mov r7, r1
+; LE-I64-NEON-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vmov.32 d13[1], r9
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT: add lr, sp, #32
+; LE-I64-NEON-NEXT: mov r9, r1
+; LE-I64-NEON-NEXT: vmov.32 d12[1], r6
+; LE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #88
+; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #120
+; LE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vmov.f32 s0, s19
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.f32 s0, s18
+; LE-I64-NEON-NEXT: add lr, sp, #72
+; LE-I64-NEON-NEXT: mov r10, r1
+; LE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vmov.32 d13[1], r4
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: add lr, sp, #152
+; LE-I64-NEON-NEXT: mov r4, r1
+; LE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vmov.32 d16[1], r5
+; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #168
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #48
+; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r11:128]!
+; LE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vmov.f32 s0, s21
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.f32 s0, s20
+; LE-I64-NEON-NEXT: vmov.32 d12[1], r8
+; LE-I64-NEON-NEXT: add lr, sp, #72
+; LE-I64-NEON-NEXT: mov r5, r1
+; LE-I64-NEON-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.f32 s0, s23
+; LE-I64-NEON-NEXT: add lr, sp, #32
+; LE-I64-NEON-NEXT: mov r6, r1
+; LE-I64-NEON-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vmov.32 d13[1], r7
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: add lr, sp, #48
+; LE-I64-NEON-NEXT: mov r7, r1
+; LE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vmov.f32 s0, s2
+; LE-I64-NEON-NEXT: vmov.32 d12[1], r9
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: add lr, sp, #16
+; LE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #136
+; LE-I64-NEON-NEXT: vmov.32 d11[1], r7
+; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r11:128]!
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #152
+; LE-I64-NEON-NEXT: vmov.32 d15[1], r10
+; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r11:128]
+; LE-I64-NEON-NEXT: vmov.32 d10[1], r1
+; LE-I64-NEON-NEXT: ldr r1, [sp, #68] @ 4-byte Reload
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add r0, r1, #192
+; LE-I64-NEON-NEXT: add lr, sp, #72
+; LE-I64-NEON-NEXT: vmov.32 d14[1], r4
+; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; LE-I64-NEON-NEXT: vst1.64 {d14, d15}, [r0:128]!
+; LE-I64-NEON-NEXT: vst1.64 {d12, d13}, [r0:128]!
+; LE-I64-NEON-NEXT: vmov.32 d9[1], r5
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #88
+; LE-I64-NEON-NEXT: vmov.32 d8[1], r6
+; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]
+; LE-I64-NEON-NEXT: add r0, r1, #128
+; LE-I64-NEON-NEXT: vst1.64 {d8, d9}, [r0:128]!
+; LE-I64-NEON-NEXT: vst1.64 {d10, d11}, [r0:128]!
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #104
+; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]
+; LE-I64-NEON-NEXT: add sp, sp, #184
+; LE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT: add sp, sp, #4
+; LE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I32-LABEL: lrint_v32f32:
+; BE-I32: @ %bb.0:
+; BE-I32-NEXT: .save {r4, r5, r6, lr}
+; BE-I32-NEXT: push {r4, r5, r6, lr}
+; BE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT: .pad #144
+; BE-I32-NEXT: sub sp, sp, #144
+; BE-I32-NEXT: mov r4, r0
+; BE-I32-NEXT: add r0, sp, #256
+; BE-I32-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I32-NEXT: add lr, sp, #16
+; BE-I32-NEXT: add r0, sp, #272
+; BE-I32-NEXT: vrev64.32 q4, q3
+; BE-I32-NEXT: vrev64.32 q7, q1
+; BE-I32-NEXT: vrev64.32 q8, q8
+; BE-I32-NEXT: vld1.64 {d18, d19}, [r0]
+; BE-I32-NEXT: add r0, sp, #224
+; BE-I32-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; BE-I32-NEXT: add lr, sp, #80
+; BE-I32-NEXT: vrev64.32 q5, q0
+; BE-I32-NEXT: vmov.f32 s0, s28
+; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT: add lr, sp, #64
+; BE-I32-NEXT: vrev64.32 q8, q9
+; BE-I32-NEXT: vld1.64 {d20, d21}, [r0]
+; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT: add lr, sp, #32
+; BE-I32-NEXT: vrev64.32 q8, q10
+; BE-I32-NEXT: vrev64.32 q6, q2
+; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT: vstmia sp, {d14, d15} @ 16-byte Spill
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.f32 s0, s18
+; BE-I32-NEXT: vmov.32 d16[0], r0
+; BE-I32-NEXT: add lr, sp, #128
+; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.f32 s0, s20
+; BE-I32-NEXT: add lr, sp, #112
+; BE-I32-NEXT: vmov.32 d17[0], r0
+; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT: add lr, sp, #48
+; BE-I32-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.f32 s0, s22
+; BE-I32-NEXT: vmov.32 d8[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.f32 s0, s30
+; BE-I32-NEXT: vmov.32 d9[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.f32 s0, s26
+; BE-I32-NEXT: add lr, sp, #128
+; BE-I32-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload
+; BE-I32-NEXT: vmov.32 d15[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.f32 s0, s24
+; BE-I32-NEXT: vmov.32 d11[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.f32 s0, s27
+; BE-I32-NEXT: vmov.32 d10[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.f32 s0, s25
+; BE-I32-NEXT: vmov.32 d11[1], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vldmia sp, {d12, d13} @ 16-byte Reload
+; BE-I32-NEXT: vmov.f32 s0, s27
+; BE-I32-NEXT: add lr, sp, #96
+; BE-I32-NEXT: vmov.32 d10[1], r0
+; BE-I32-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: add lr, sp, #48
+; BE-I32-NEXT: vmov.32 d15[1], r0
+; BE-I32-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I32-NEXT: vmov.f32 s0, s23
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.f32 s0, s21
+; BE-I32-NEXT: vmov.32 d9[1], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d8[1], r0
+; BE-I32-NEXT: add lr, sp, #48
+; BE-I32-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; BE-I32-NEXT: add lr, sp, #16
+; BE-I32-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I32-NEXT: vmov.f32 s0, s23
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.f32 s0, s25
+; BE-I32-NEXT: add lr, sp, #112
+; BE-I32-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I32-NEXT: vmov.32 d9[1], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.f32 s0, s20
+; BE-I32-NEXT: vmov.32 d14[1], r0
+; BE-I32-NEXT: add r0, sp, #240
+; BE-I32-NEXT: add lr, sp, #128
+; BE-I32-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I32-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I32-NEXT: vrev64.32 q6, q8
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.f32 s0, s21
+; BE-I32-NEXT: vmov.32 d8[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.f32 s0, s24
+; BE-I32-NEXT: vmov.32 d8[1], r0
+; BE-I32-NEXT: add lr, sp, #112
+; BE-I32-NEXT: vorr q7, q6, q6
+; BE-I32-NEXT: vstmia sp, {d12, d13} @ 16-byte Spill
+; BE-I32-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: add lr, sp, #32
+; BE-I32-NEXT: vmov.32 d12[0], r0
+; BE-I32-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I32-NEXT: vmov.f32 s0, s18
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.f32 s0, s16
+; BE-I32-NEXT: vmov.32 d11[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.f32 s0, s19
+; BE-I32-NEXT: vmov.32 d10[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.f32 s0, s30
+; BE-I32-NEXT: vmov.32 d11[1], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.f32 s0, s17
+; BE-I32-NEXT: add lr, sp, #16
+; BE-I32-NEXT: vmov.32 d13[0], r0
+; BE-I32-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d10[1], r0
+; BE-I32-NEXT: add lr, sp, #32
+; BE-I32-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I32-NEXT: add lr, sp, #64
+; BE-I32-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I32-NEXT: vmov.f32 s0, s20
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: add lr, sp, #80
+; BE-I32-NEXT: vmov.32 d8[0], r0
+; BE-I32-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-I32-NEXT: vmov.f32 s0, s26
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.f32 s0, s24
+; BE-I32-NEXT: vmov.32 d15[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.f32 s0, s27
+; BE-I32-NEXT: vmov.32 d14[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.f32 s0, s22
+; BE-I32-NEXT: vmov.32 d15[1], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.f32 s0, s25
+; BE-I32-NEXT: vmov.32 d9[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.f32 s0, s23
+; BE-I32-NEXT: vmov.32 d14[1], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.f32 s0, s21
+; BE-I32-NEXT: vmov.32 d9[1], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vldmia sp, {d12, d13} @ 16-byte Reload
+; BE-I32-NEXT: vmov.f32 s0, s27
+; BE-I32-NEXT: vmov.32 d8[1], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: add lr, sp, #16
+; BE-I32-NEXT: vmov.f32 s0, s25
+; BE-I32-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I32-NEXT: add lr, sp, #112
+; BE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEXT: vmov.32 d11[1], r0
+; BE-I32-NEXT: vrev64.32 q8, q8
+; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: add lr, sp, #48
+; BE-I32-NEXT: vmov.32 d10[1], r0
+; BE-I32-NEXT: mov r0, r4
+; BE-I32-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I32-NEXT: add lr, sp, #128
+; BE-I32-NEXT: vrev64.32 q8, q4
+; BE-I32-NEXT: vst1.32 {d18, d19}, [r0:128]!
+; BE-I32-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I32-NEXT: add lr, sp, #96
+; BE-I32-NEXT: vst1.32 {d18, d19}, [r0:128]!
+; BE-I32-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I32-NEXT: add lr, sp, #112
+; BE-I32-NEXT: vst1.32 {d18, d19}, [r0:128]!
+; BE-I32-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I32-NEXT: add lr, sp, #32
+; BE-I32-NEXT: vst1.64 {d18, d19}, [r0:128]
+; BE-I32-NEXT: add r0, r4, #64
+; BE-I32-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I32-NEXT: vst1.32 {d18, d19}, [r0:128]!
+; BE-I32-NEXT: vst1.32 {d10, d11}, [r0:128]!
+; BE-I32-NEXT: vst1.32 {d14, d15}, [r0:128]!
+; BE-I32-NEXT: vst1.64 {d16, d17}, [r0:128]
+; BE-I32-NEXT: add sp, sp, #144
+; BE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT: pop {r4, r5, r6, pc}
+;
+; BE-I64-LABEL: lrint_v32f32:
+; BE-I64: @ %bb.0:
+; BE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT: .pad #4
+; BE-I64-NEXT: sub sp, sp, #4
+; BE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT: .pad #256
+; BE-I64-NEXT: sub sp, sp, #256
+; BE-I64-NEXT: add lr, sp, #208
+; BE-I64-NEXT: str r0, [sp, #156] @ 4-byte Spill
+; BE-I64-NEXT: add r0, sp, #408
+; BE-I64-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill
+; BE-I64-NEXT: add lr, sp, #120
+; BE-I64-NEXT: vld1.64 {d10, d11}, [r0]
+; BE-I64-NEXT: add r0, sp, #392
+; BE-I64-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill
+; BE-I64-NEXT: add lr, sp, #160
+; BE-I64-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill
+; BE-I64-NEXT: add lr, sp, #176
+; BE-I64-NEXT: vrev64.32 d8, d10
+; BE-I64-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; BE-I64-NEXT: add lr, sp, #136
+; BE-I64-NEXT: vmov.f32 s0, s17
+; BE-I64-NEXT: vld1.64 {d12, d13}, [r0]
+; BE-I64-NEXT: add r0, sp, #360
+; BE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I64-NEXT: add lr, sp, #192
+; BE-I64-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I64-NEXT: add r0, sp, #376
+; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT: add lr, sp, #40
+; BE-I64-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s16
+; BE-I64-NEXT: str r1, [sp, #88] @ 4-byte Spill
+; BE-I64-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vrev64.32 d9, d11
+; BE-I64-NEXT: add lr, sp, #240
+; BE-I64-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEXT: str r1, [sp, #104] @ 4-byte Spill
+; BE-I64-NEXT: vmov.f32 s0, s18
+; BE-I64-NEXT: vrev64.32 d8, d13
+; BE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s17
+; BE-I64-NEXT: mov r5, r1
+; BE-I64-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s19
+; BE-I64-NEXT: add lr, sp, #192
+; BE-I64-NEXT: str r1, [sp, #72] @ 4-byte Spill
+; BE-I64-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT: vrev64.32 d10, d16
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s20
+; BE-I64-NEXT: add lr, sp, #224
+; BE-I64-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s21
+; BE-I64-NEXT: mov r7, r1
+; BE-I64-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s16
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEXT: add lr, sp, #24
+; BE-I64-NEXT: mov r9, r1
+; BE-I64-NEXT: vmov.32 d15[1], r6
+; BE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I64-NEXT: add lr, sp, #192
+; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT: vrev64.32 d8, d17
+; BE-I64-NEXT: vmov.f32 s0, s17
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s16
+; BE-I64-NEXT: vmov.32 d14[1], r7
+; BE-I64-NEXT: add lr, sp, #56
+; BE-I64-NEXT: mov r10, r1
+; BE-I64-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEXT: add lr, sp, #192
+; BE-I64-NEXT: mov r11, r1
+; BE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEXT: add lr, sp, #40
+; BE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-I64-NEXT: add lr, sp, #224
+; BE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEXT: vrev64.32 d8, d12
+; BE-I64-NEXT: vmov.32 d11[1], r4
+; BE-I64-NEXT: vmov.f32 s0, s17
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s16
+; BE-I64-NEXT: vmov.32 d10[1], r5
+; BE-I64-NEXT: add lr, sp, #224
+; BE-I64-NEXT: mov r8, r1
+; BE-I64-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vrev64.32 d8, d13
+; BE-I64-NEXT: add lr, sp, #8
+; BE-I64-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEXT: ldr r0, [sp, #88] @ 4-byte Reload
+; BE-I64-NEXT: vmov.f32 s0, s17
+; BE-I64-NEXT: mov r5, r1
+; BE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I64-NEXT: add lr, sp, #240
+; BE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEXT: vmov.32 d11[1], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s16
+; BE-I64-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEXT: ldr r0, [sp, #104] @ 4-byte Reload
+; BE-I64-NEXT: add lr, sp, #240
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: vmov.32 d10[1], r0
+; BE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: add lr, sp, #136
+; BE-I64-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEXT: ldr r0, [sp, #72] @ 4-byte Reload
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT: add lr, sp, #24
+; BE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-I64-NEXT: vrev64.32 d8, d16
+; BE-I64-NEXT: vmov.32 d13[1], r0
+; BE-I64-NEXT: vmov.f32 s0, s17
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s16
+; BE-I64-NEXT: mov r7, r1
+; BE-I64-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEXT: vmov.32 d12[1], r9
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: add lr, sp, #192
+; BE-I64-NEXT: vmov.32 d15[1], r4
+; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT: add lr, sp, #8
+; BE-I64-NEXT: vmov.32 d17[1], r10
+; BE-I64-NEXT: vmov.32 d16[1], r11
+; BE-I64-NEXT: vorr q9, q8, q8
+; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT: add lr, sp, #192
+; BE-I64-NEXT: vmov.32 d17[1], r8
+; BE-I64-NEXT: vmov.32 d16[1], r5
+; BE-I64-NEXT: vorr q10, q8, q8
+; BE-I64-NEXT: vrev64.32 q8, q6
+; BE-I64-NEXT: vmov.32 d14[1], r6
+; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT: add lr, sp, #240
+; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEXT: vrev64.32 q8, q8
+; BE-I64-NEXT: vmov.32 d11[1], r7
+; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT: add lr, sp, #224
+; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT: vmov.32 d10[1], r1
+; BE-I64-NEXT: vrev64.32 q8, q8
+; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT: add lr, sp, #56
+; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT: add lr, sp, #136
+; BE-I64-NEXT: vrev64.32 q8, q8
+; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT: add lr, sp, #104
+; BE-I64-NEXT: vrev64.32 q8, q9
+; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT: add lr, sp, #88
+; BE-I64-NEXT: vrev64.32 q8, q10
+; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT: add lr, sp, #72
+; BE-I64-NEXT: vrev64.32 q8, q7
+; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT: add lr, sp, #208
+; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT: add lr, sp, #56
+; BE-I64-NEXT: vrev64.32 d8, d17
+; BE-I64-NEXT: vrev64.32 q8, q5
+; BE-I64-NEXT: vmov.f32 s0, s17
+; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s16
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: add lr, sp, #120
+; BE-I64-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEXT: vmov.32 d13[1], r4
+; BE-I64-NEXT: vrev64.32 d8, d10
+; BE-I64-NEXT: vmov.32 d12[1], r1
+; BE-I64-NEXT: vmov.f32 s0, s17
+; BE-I64-NEXT: vrev64.32 q6, q6
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s16
+; BE-I64-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEXT: vmov.32 d15[1], r1
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEXT: ldr r6, [sp, #156] @ 4-byte Reload
+; BE-I64-NEXT: vrev64.32 d8, d11
+; BE-I64-NEXT: add r5, r6, #64
+; BE-I64-NEXT: vmov.32 d14[1], r1
+; BE-I64-NEXT: vmov.f32 s0, s17
+; BE-I64-NEXT: vrev64.32 q8, q7
+; BE-I64-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s16
+; BE-I64-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEXT: vmov.32 d15[1], r1
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: add lr, sp, #208
+; BE-I64-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I64-NEXT: vmov.32 d14[1], r1
+; BE-I64-NEXT: vrev64.32 d8, d18
+; BE-I64-NEXT: vrev64.32 q8, q7
+; BE-I64-NEXT: vmov.f32 s0, s17
+; BE-I64-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s16
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEXT: add lr, sp, #160
+; BE-I64-NEXT: vmov.32 d15[1], r4
+; BE-I64-NEXT: vmov.32 d14[1], r1
+; BE-I64-NEXT: vrev64.32 q8, q7
+; BE-I64-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; BE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEXT: vrev64.32 d8, d11
+; BE-I64-NEXT: vst1.64 {d12, d13}, [r5:128]
+; BE-I64-NEXT: vmov.f32 s0, s17
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s16
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEXT: add lr, sp, #208
+; BE-I64-NEXT: vmov.32 d13[1], r4
+; BE-I64-NEXT: vmov.32 d12[1], r1
+; BE-I64-NEXT: vrev64.32 q8, q6
+; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT: add lr, sp, #176
+; BE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-I64-NEXT: vrev64.32 d8, d12
+; BE-I64-NEXT: vmov.f32 s0, s17
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s16
+; BE-I64-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEXT: vmov.32 d15[1], r1
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEXT: mov r5, r6
+; BE-I64-NEXT: vrev64.32 d8, d13
+; BE-I64-NEXT: vmov.32 d14[1], r1
+; BE-I64-NEXT: vmov.f32 s0, s17
+; BE-I64-NEXT: vrev64.32 q8, q7
+; BE-I64-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s16
+; BE-I64-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEXT: vmov.32 d15[1], r1
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEXT: vrev64.32 d8, d10
+; BE-I64-NEXT: vmov.32 d14[1], r1
+; BE-I64-NEXT: vmov.f32 s0, s17
+; BE-I64-NEXT: vrev64.32 q8, q7
+; BE-I64-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s16
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEXT: add lr, sp, #208
+; BE-I64-NEXT: add r0, r6, #192
+; BE-I64-NEXT: vmov.32 d15[1], r4
+; BE-I64-NEXT: vmov.32 d14[1], r1
+; BE-I64-NEXT: vrev64.32 q8, q7
+; BE-I64-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT: add lr, sp, #56
+; BE-I64-NEXT: vst1.64 {d16, d17}, [r5:128]
+; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT: add lr, sp, #192
+; BE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT: add lr, sp, #240
+; BE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT: add lr, sp, #224
+; BE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT: add lr, sp, #136
+; BE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]
+; BE-I64-NEXT: add r0, r6, #128
+; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT: add lr, sp, #104
+; BE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT: add lr, sp, #88
+; BE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT: add lr, sp, #72
+; BE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]
+; BE-I64-NEXT: add sp, sp, #256
+; BE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT: add sp, sp, #4
+; BE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I32-NEON-LABEL: lrint_v32f32:
+; BE-I32-NEON: @ %bb.0:
+; BE-I32-NEON-NEXT: .save {r4, r5, r6, lr}
+; BE-I32-NEON-NEXT: push {r4, r5, r6, lr}
+; BE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT: .pad #144
+; BE-I32-NEON-NEXT: sub sp, sp, #144
+; BE-I32-NEON-NEXT: mov r4, r0
+; BE-I32-NEON-NEXT: add r0, sp, #256
+; BE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I32-NEON-NEXT: add lr, sp, #16
+; BE-I32-NEON-NEXT: add r0, sp, #272
+; BE-I32-NEON-NEXT: vrev64.32 q4, q3
+; BE-I32-NEON-NEXT: vrev64.32 q7, q1
+; BE-I32-NEON-NEXT: vrev64.32 q8, q8
+; BE-I32-NEON-NEXT: vld1.64 {d18, d19}, [r0]
+; BE-I32-NEON-NEXT: add r0, sp, #224
+; BE-I32-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; BE-I32-NEON-NEXT: add lr, sp, #80
+; BE-I32-NEON-NEXT: vrev64.32 q5, q0
+; BE-I32-NEON-NEXT: vmov.f32 s0, s28
+; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEON-NEXT: add lr, sp, #64
+; BE-I32-NEON-NEXT: vrev64.32 q8, q9
+; BE-I32-NEON-NEXT: vld1.64 {d20, d21}, [r0]
+; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEON-NEXT: add lr, sp, #32
+; BE-I32-NEON-NEXT: vrev64.32 q8, q10
+; BE-I32-NEON-NEXT: vrev64.32 q6, q2
+; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEON-NEXT: vstmia sp, {d14, d15} @ 16-byte Spill
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.f32 s0, s18
+; BE-I32-NEON-NEXT: vmov.32 d16[0], r0
+; BE-I32-NEON-NEXT: add lr, sp, #128
+; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.f32 s0, s20
+; BE-I32-NEON-NEXT: add lr, sp, #112
+; BE-I32-NEON-NEXT: vmov.32 d17[0], r0
+; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEON-NEXT: add lr, sp, #48
+; BE-I32-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.f32 s0, s22
+; BE-I32-NEON-NEXT: vmov.32 d8[0], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.f32 s0, s30
+; BE-I32-NEON-NEXT: vmov.32 d9[0], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.f32 s0, s26
+; BE-I32-NEON-NEXT: add lr, sp, #128
+; BE-I32-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload
+; BE-I32-NEON-NEXT: vmov.32 d15[0], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.f32 s0, s24
+; BE-I32-NEON-NEXT: vmov.32 d11[0], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.f32 s0, s27
+; BE-I32-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.f32 s0, s25
+; BE-I32-NEON-NEXT: vmov.32 d11[1], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vldmia sp, {d12, d13} @ 16-byte Reload
+; BE-I32-NEON-NEXT: vmov.f32 s0, s27
+; BE-I32-NEON-NEXT: add lr, sp, #96
+; BE-I32-NEON-NEXT: vmov.32 d10[1], r0
+; BE-I32-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: add lr, sp, #48
+; BE-I32-NEON-NEXT: vmov.32 d15[1], r0
+; BE-I32-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I32-NEON-NEXT: vmov.f32 s0, s23
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.f32 s0, s21
+; BE-I32-NEON-NEXT: vmov.32 d9[1], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.32 d8[1], r0
+; BE-I32-NEON-NEXT: add lr, sp, #48
+; BE-I32-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; BE-I32-NEON-NEXT: add lr, sp, #16
+; BE-I32-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I32-NEON-NEXT: vmov.f32 s0, s23
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.f32 s0, s25
+; BE-I32-NEON-NEXT: add lr, sp, #112
+; BE-I32-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I32-NEON-NEXT: vmov.32 d9[1], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.f32 s0, s20
+; BE-I32-NEON-NEXT: vmov.32 d14[1], r0
+; BE-I32-NEON-NEXT: add r0, sp, #240
+; BE-I32-NEON-NEXT: add lr, sp, #128
+; BE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I32-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I32-NEON-NEXT: vrev64.32 q6, q8
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.f32 s0, s21
+; BE-I32-NEON-NEXT: vmov.32 d8[0], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.f32 s0, s24
+; BE-I32-NEON-NEXT: vmov.32 d8[1], r0
+; BE-I32-NEON-NEXT: add lr, sp, #112
+; BE-I32-NEON-NEXT: vorr q7, q6, q6
+; BE-I32-NEON-NEXT: vstmia sp, {d12, d13} @ 16-byte Spill
+; BE-I32-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: add lr, sp, #32
+; BE-I32-NEON-NEXT: vmov.32 d12[0], r0
+; BE-I32-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I32-NEON-NEXT: vmov.f32 s0, s18
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.f32 s0, s16
+; BE-I32-NEON-NEXT: vmov.32 d11[0], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.f32 s0, s19
+; BE-I32-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.f32 s0, s30
+; BE-I32-NEON-NEXT: vmov.32 d11[1], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.f32 s0, s17
+; BE-I32-NEON-NEXT: add lr, sp, #16
+; BE-I32-NEON-NEXT: vmov.32 d13[0], r0
+; BE-I32-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.32 d10[1], r0
+; BE-I32-NEON-NEXT: add lr, sp, #32
+; BE-I32-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I32-NEON-NEXT: add lr, sp, #64
+; BE-I32-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I32-NEON-NEXT: vmov.f32 s0, s20
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: add lr, sp, #80
+; BE-I32-NEON-NEXT: vmov.32 d8[0], r0
+; BE-I32-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-I32-NEON-NEXT: vmov.f32 s0, s26
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.f32 s0, s24
+; BE-I32-NEON-NEXT: vmov.32 d15[0], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.f32 s0, s27
+; BE-I32-NEON-NEXT: vmov.32 d14[0], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.f32 s0, s22
+; BE-I32-NEON-NEXT: vmov.32 d15[1], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.f32 s0, s25
+; BE-I32-NEON-NEXT: vmov.32 d9[0], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.f32 s0, s23
+; BE-I32-NEON-NEXT: vmov.32 d14[1], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.f32 s0, s21
+; BE-I32-NEON-NEXT: vmov.32 d9[1], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vldmia sp, {d12, d13} @ 16-byte Reload
+; BE-I32-NEON-NEXT: vmov.f32 s0, s27
+; BE-I32-NEON-NEXT: vmov.32 d8[1], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: add lr, sp, #16
+; BE-I32-NEON-NEXT: vmov.f32 s0, s25
+; BE-I32-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I32-NEON-NEXT: add lr, sp, #112
+; BE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEON-NEXT: vmov.32 d11[1], r0
+; BE-I32-NEON-NEXT: vrev64.32 q8, q8
+; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: add lr, sp, #48
+; BE-I32-NEON-NEXT: vmov.32 d10[1], r0
+; BE-I32-NEON-NEXT: mov r0, r4
+; BE-I32-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I32-NEON-NEXT: add lr, sp, #128
+; BE-I32-NEON-NEXT: vrev64.32 q8, q4
+; BE-I32-NEON-NEXT: vst1.32 {d18, d19}, [r0:128]!
+; BE-I32-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I32-NEON-NEXT: add lr, sp, #96
+; BE-I32-NEON-NEXT: vst1.32 {d18, d19}, [r0:128]!
+; BE-I32-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I32-NEON-NEXT: add lr, sp, #112
+; BE-I32-NEON-NEXT: vst1.32 {d18, d19}, [r0:128]!
+; BE-I32-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I32-NEON-NEXT: add lr, sp, #32
+; BE-I32-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]
+; BE-I32-NEON-NEXT: add r0, r4, #64
+; BE-I32-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I32-NEON-NEXT: vst1.32 {d18, d19}, [r0:128]!
+; BE-I32-NEON-NEXT: vst1.32 {d10, d11}, [r0:128]!
+; BE-I32-NEON-NEXT: vst1.32 {d14, d15}, [r0:128]!
+; BE-I32-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]
+; BE-I32-NEON-NEXT: add sp, sp, #144
+; BE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT: pop {r4, r5, r6, pc}
+;
+; BE-I64-NEON-LABEL: lrint_v32f32:
+; BE-I64-NEON: @ %bb.0:
+; BE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEON-NEXT: .pad #4
+; BE-I64-NEON-NEXT: sub sp, sp, #4
+; BE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT: .pad #256
+; BE-I64-NEON-NEXT: sub sp, sp, #256
+; BE-I64-NEON-NEXT: add lr, sp, #208
+; BE-I64-NEON-NEXT: str r0, [sp, #156] @ 4-byte Spill
+; BE-I64-NEON-NEXT: add r0, sp, #408
+; BE-I64-NEON-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill
+; BE-I64-NEON-NEXT: add lr, sp, #120
+; BE-I64-NEON-NEXT: vld1.64 {d10, d11}, [r0]
+; BE-I64-NEON-NEXT: add r0, sp, #392
+; BE-I64-NEON-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill
+; BE-I64-NEON-NEXT: add lr, sp, #160
+; BE-I64-NEON-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill
+; BE-I64-NEON-NEXT: add lr, sp, #176
+; BE-I64-NEON-NEXT: vrev64.32 d8, d10
+; BE-I64-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; BE-I64-NEON-NEXT: add lr, sp, #136
+; BE-I64-NEON-NEXT: vmov.f32 s0, s17
+; BE-I64-NEON-NEXT: vld1.64 {d12, d13}, [r0]
+; BE-I64-NEON-NEXT: add r0, sp, #360
+; BE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I64-NEON-NEXT: add lr, sp, #192
+; BE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I64-NEON-NEXT: add r0, sp, #376
+; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT: add lr, sp, #40
+; BE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.f32 s0, s16
+; BE-I64-NEON-NEXT: str r1, [sp, #88] @ 4-byte Spill
+; BE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vrev64.32 d9, d11
+; BE-I64-NEON-NEXT: add lr, sp, #240
+; BE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT: str r1, [sp, #104] @ 4-byte Spill
+; BE-I64-NEON-NEXT: vmov.f32 s0, s18
+; BE-I64-NEON-NEXT: vrev64.32 d8, d13
+; BE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.f32 s0, s17
+; BE-I64-NEON-NEXT: mov r5, r1
+; BE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.f32 s0, s19
+; BE-I64-NEON-NEXT: add lr, sp, #192
+; BE-I64-NEON-NEXT: str r1, [sp, #72] @ 4-byte Spill
+; BE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT: vrev64.32 d10, d16
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.f32 s0, s20
+; BE-I64-NEON-NEXT: add lr, sp, #224
+; BE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT: mov r4, r1
+; BE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.f32 s0, s21
+; BE-I64-NEON-NEXT: mov r7, r1
+; BE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.f32 s0, s16
+; BE-I64-NEON-NEXT: mov r6, r1
+; BE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEON-NEXT: add lr, sp, #24
+; BE-I64-NEON-NEXT: mov r9, r1
+; BE-I64-NEON-NEXT: vmov.32 d15[1], r6
+; BE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I64-NEON-NEXT: add lr, sp, #192
+; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT: vrev64.32 d8, d17
+; BE-I64-NEON-NEXT: vmov.f32 s0, s17
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.f32 s0, s16
+; BE-I64-NEON-NEXT: vmov.32 d14[1], r7
+; BE-I64-NEON-NEXT: add lr, sp, #56
+; BE-I64-NEON-NEXT: mov r10, r1
+; BE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT: add lr, sp, #192
+; BE-I64-NEON-NEXT: mov r11, r1
+; BE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEON-NEXT: add lr, sp, #40
+; BE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-I64-NEON-NEXT: add lr, sp, #224
+; BE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEON-NEXT: vrev64.32 d8, d12
+; BE-I64-NEON-NEXT: vmov.32 d11[1], r4
+; BE-I64-NEON-NEXT: vmov.f32 s0, s17
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.f32 s0, s16
+; BE-I64-NEON-NEXT: vmov.32 d10[1], r5
+; BE-I64-NEON-NEXT: add lr, sp, #224
+; BE-I64-NEON-NEXT: mov r8, r1
+; BE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vrev64.32 d8, d13
+; BE-I64-NEON-NEXT: add lr, sp, #8
+; BE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT: ldr r0, [sp, #88] @ 4-byte Reload
+; BE-I64-NEON-NEXT: vmov.f32 s0, s17
+; BE-I64-NEON-NEXT: mov r5, r1
+; BE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I64-NEON-NEXT: add lr, sp, #240
+; BE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEON-NEXT: vmov.32 d11[1], r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.f32 s0, s16
+; BE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT: ldr r0, [sp, #104] @ 4-byte Reload
+; BE-I64-NEON-NEXT: add lr, sp, #240
+; BE-I64-NEON-NEXT: mov r4, r1
+; BE-I64-NEON-NEXT: vmov.32 d10[1], r0
+; BE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: add lr, sp, #136
+; BE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT: ldr r0, [sp, #72] @ 4-byte Reload
+; BE-I64-NEON-NEXT: mov r6, r1
+; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT: add lr, sp, #24
+; BE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-I64-NEON-NEXT: vrev64.32 d8, d16
+; BE-I64-NEON-NEXT: vmov.32 d13[1], r0
+; BE-I64-NEON-NEXT: vmov.f32 s0, s17
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.f32 s0, s16
+; BE-I64-NEON-NEXT: mov r7, r1
+; BE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT: vmov.32 d12[1], r9
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: add lr, sp, #192
+; BE-I64-NEON-NEXT: vmov.32 d15[1], r4
+; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT: add lr, sp, #8
+; BE-I64-NEON-NEXT: vmov.32 d17[1], r10
+; BE-I64-NEON-NEXT: vmov.32 d16[1], r11
+; BE-I64-NEON-NEXT: vorr q9, q8, q8
+; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT: add lr, sp, #192
+; BE-I64-NEON-NEXT: vmov.32 d17[1], r8
+; BE-I64-NEON-NEXT: vmov.32 d16[1], r5
+; BE-I64-NEON-NEXT: vorr q10, q8, q8
+; BE-I64-NEON-NEXT: vrev64.32 q8, q6
+; BE-I64-NEON-NEXT: vmov.32 d14[1], r6
+; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT: add lr, sp, #240
+; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT: vrev64.32 q8, q8
+; BE-I64-NEON-NEXT: vmov.32 d11[1], r7
+; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT: add lr, sp, #224
+; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT: vmov.32 d10[1], r1
+; BE-I64-NEON-NEXT: vrev64.32 q8, q8
+; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT: add lr, sp, #56
+; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT: add lr, sp, #136
+; BE-I64-NEON-NEXT: vrev64.32 q8, q8
+; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT: add lr, sp, #104
+; BE-I64-NEON-NEXT: vrev64.32 q8, q9
+; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT: add lr, sp, #88
+; BE-I64-NEON-NEXT: vrev64.32 q8, q10
+; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT: add lr, sp, #72
+; BE-I64-NEON-NEXT: vrev64.32 q8, q7
+; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT: add lr, sp, #208
+; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT: add lr, sp, #56
+; BE-I64-NEON-NEXT: vrev64.32 d8, d17
+; BE-I64-NEON-NEXT: vrev64.32 q8, q5
+; BE-I64-NEON-NEXT: vmov.f32 s0, s17
+; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.f32 s0, s16
+; BE-I64-NEON-NEXT: mov r4, r1
+; BE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: add lr, sp, #120
+; BE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEON-NEXT: vmov.32 d13[1], r4
+; BE-I64-NEON-NEXT: vrev64.32 d8, d10
+; BE-I64-NEON-NEXT: vmov.32 d12[1], r1
+; BE-I64-NEON-NEXT: vmov.f32 s0, s17
+; BE-I64-NEON-NEXT: vrev64.32 q6, q6
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.f32 s0, s16
+; BE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT: vmov.32 d15[1], r1
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT: ldr r6, [sp, #156] @ 4-byte Reload
+; BE-I64-NEON-NEXT: vrev64.32 d8, d11
+; BE-I64-NEON-NEXT: add r5, r6, #64
+; BE-I64-NEON-NEXT: vmov.32 d14[1], r1
+; BE-I64-NEON-NEXT: vmov.f32 s0, s17
+; BE-I64-NEON-NEXT: vrev64.32 q8, q7
+; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.f32 s0, s16
+; BE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT: vmov.32 d15[1], r1
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: add lr, sp, #208
+; BE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I64-NEON-NEXT: vmov.32 d14[1], r1
+; BE-I64-NEON-NEXT: vrev64.32 d8, d18
+; BE-I64-NEON-NEXT: vrev64.32 q8, q7
+; BE-I64-NEON-NEXT: vmov.f32 s0, s17
+; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.f32 s0, s16
+; BE-I64-NEON-NEXT: mov r4, r1
+; BE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT: add lr, sp, #160
+; BE-I64-NEON-NEXT: vmov.32 d15[1], r4
+; BE-I64-NEON-NEXT: vmov.32 d14[1], r1
+; BE-I64-NEON-NEXT: vrev64.32 q8, q7
+; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; BE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEON-NEXT: vrev64.32 d8, d11
+; BE-I64-NEON-NEXT: vst1.64 {d12, d13}, [r5:128]
+; BE-I64-NEON-NEXT: vmov.f32 s0, s17
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.f32 s0, s16
+; BE-I64-NEON-NEXT: mov r4, r1
+; BE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEON-NEXT: add lr, sp, #208
+; BE-I64-NEON-NEXT: vmov.32 d13[1], r4
+; BE-I64-NEON-NEXT: vmov.32 d12[1], r1
+; BE-I64-NEON-NEXT: vrev64.32 q8, q6
+; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT: add lr, sp, #176
+; BE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-I64-NEON-NEXT: vrev64.32 d8, d12
+; BE-I64-NEON-NEXT: vmov.f32 s0, s17
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.f32 s0, s16
+; BE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT: vmov.32 d15[1], r1
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT: mov r5, r6
+; BE-I64-NEON-NEXT: vrev64.32 d8, d13
+; BE-I64-NEON-NEXT: vmov.32 d14[1], r1
+; BE-I64-NEON-NEXT: vmov.f32 s0, s17
+; BE-I64-NEON-NEXT: vrev64.32 q8, q7
+; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.f32 s0, s16
+; BE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT: vmov.32 d15[1], r1
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT: vrev64.32 d8, d10
+; BE-I64-NEON-NEXT: vmov.32 d14[1], r1
+; BE-I64-NEON-NEXT: vmov.f32 s0, s17
+; BE-I64-NEON-NEXT: vrev64.32 q8, q7
+; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.f32 s0, s16
+; BE-I64-NEON-NEXT: mov r4, r1
+; BE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT: add lr, sp, #208
+; BE-I64-NEON-NEXT: add r0, r6, #192
+; BE-I64-NEON-NEXT: vmov.32 d15[1], r4
+; BE-I64-NEON-NEXT: vmov.32 d14[1], r1
+; BE-I64-NEON-NEXT: vrev64.32 q8, q7
+; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT: add lr, sp, #56
+; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]
+; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT: add lr, sp, #192
+; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT: add lr, sp, #240
+; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT: add lr, sp, #224
+; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT: add lr, sp, #136
+; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]
+; BE-I64-NEON-NEXT: add r0, r6, #128
+; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT: add lr, sp, #104
+; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT: add lr, sp, #88
+; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT: add lr, sp, #72
+; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]
+; BE-I64-NEON-NEXT: add sp, sp, #256
+; BE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT: add sp, sp, #4
+; BE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+ %a = call <32 x iXLen> @llvm.lrint.v32iXLen.v32f32(<32 x float> %x)
+ ret <32 x iXLen> %a
+}
+declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f32(<32 x float>)
+
+define <1 x iXLen> @lrint_v1f64(<1 x double> %x) {
+; LE-I32-LABEL: lrint_v1f64:
+; LE-I32: @ %bb.0:
+; LE-I32-NEXT: .save {r11, lr}
+; LE-I32-NEXT: push {r11, lr}
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: pop {r11, pc}
+;
+; LE-I64-LABEL: lrint_v1f64:
+; LE-I64: @ %bb.0:
+; LE-I64-NEXT: .save {r11, lr}
+; LE-I64-NEXT: push {r11, lr}
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: vmov.32 d0[0], r0
+; LE-I64-NEXT: vmov.32 d0[1], r1
+; LE-I64-NEXT: pop {r11, pc}
+;
+; LE-I32-NEON-LABEL: lrint_v1f64:
+; LE-I32-NEON: @ %bb.0:
+; LE-I32-NEON-NEXT: .save {r11, lr}
+; LE-I32-NEON-NEXT: push {r11, lr}
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: pop {r11, pc}
+;
+; LE-I64-NEON-LABEL: lrint_v1f64:
+; LE-I64-NEON: @ %bb.0:
+; LE-I64-NEON-NEXT: .save {r11, lr}
+; LE-I64-NEON-NEXT: push {r11, lr}
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: vmov.32 d0[0], r0
+; LE-I64-NEON-NEXT: vmov.32 d0[1], r1
+; LE-I64-NEON-NEXT: pop {r11, pc}
+;
+; BE-I32-LABEL: lrint_v1f64:
+; BE-I32: @ %bb.0:
+; BE-I32-NEXT: .save {r11, lr}
+; BE-I32-NEXT: push {r11, lr}
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: pop {r11, pc}
+;
+; BE-I64-LABEL: lrint_v1f64:
+; BE-I64: @ %bb.0:
+; BE-I64-NEXT: .save {r11, lr}
+; BE-I64-NEXT: push {r11, lr}
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: vmov.32 d16[1], r1
+; BE-I64-NEXT: vrev64.32 d0, d16
+; BE-I64-NEXT: pop {r11, pc}
+;
+; BE-I32-NEON-LABEL: lrint_v1f64:
+; BE-I32-NEON: @ %bb.0:
+; BE-I32-NEON-NEXT: .save {r11, lr}
+; BE-I32-NEON-NEXT: push {r11, lr}
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: pop {r11, pc}
+;
+; BE-I64-NEON-LABEL: lrint_v1f64:
+; BE-I64-NEON: @ %bb.0:
+; BE-I64-NEON-NEXT: .save {r11, lr}
+; BE-I64-NEON-NEXT: push {r11, lr}
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEON-NEXT: vmov.32 d16[1], r1
+; BE-I64-NEON-NEXT: vrev64.32 d0, d16
+; BE-I64-NEON-NEXT: pop {r11, pc}
+ %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double> %x)
+ ret <1 x iXLen> %a
+}
+declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double>)
+
+define <2 x iXLen> @lrint_v2f64(<2 x double> %x) {
+; LE-I32-LABEL: lrint_v2f64:
+; LE-I32: @ %bb.0:
+; LE-I32-NEXT: .save {r11, lr}
+; LE-I32-NEXT: push {r11, lr}
+; LE-I32-NEXT: .vsave {d8, d9, d10}
+; LE-I32-NEXT: vpush {d8, d9, d10}
+; LE-I32-NEXT: vorr q4, q0, q0
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: vorr d0, d9, d9
+; LE-I32-NEXT: vmov.32 d10[0], r0
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: vmov.32 d10[1], r0
+; LE-I32-NEXT: vorr d0, d10, d10
+; LE-I32-NEXT: vpop {d8, d9, d10}
+; LE-I32-NEXT: pop {r11, pc}
+;
+; LE-I64-LABEL: lrint_v2f64:
+; LE-I64: @ %bb.0:
+; LE-I64-NEXT: .save {r4, lr}
+; LE-I64-NEXT: push {r4, lr}
+; LE-I64-NEXT: .vsave {d8, d9, d10, d11}
+; LE-I64-NEXT: vpush {d8, d9, d10, d11}
+; LE-I64-NEXT: vorr q4, q0, q0
+; LE-I64-NEXT: vorr d0, d9, d9
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: vorr d0, d8, d8
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEXT: vmov.32 d11[1], r4
+; LE-I64-NEXT: vmov.32 d10[1], r1
+; LE-I64-NEXT: vorr q0, q5, q5
+; LE-I64-NEXT: vpop {d8, d9, d10, d11}
+; LE-I64-NEXT: pop {r4, pc}
+;
+; LE-I32-NEON-LABEL: lrint_v2f64:
+; LE-I32-NEON: @ %bb.0:
+; LE-I32-NEON-NEXT: .save {r11, lr}
+; LE-I32-NEON-NEXT: push {r11, lr}
+; LE-I32-NEON-NEXT: .vsave {d8, d9, d10}
+; LE-I32-NEON-NEXT: vpush {d8, d9, d10}
+; LE-I32-NEON-NEXT: vorr q4, q0, q0
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: vorr d0, d9, d9
+; LE-I32-NEON-NEXT: vmov.32 d10[0], r0
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: vmov.32 d10[1], r0
+; LE-I32-NEON-NEXT: vorr d0, d10, d10
+; LE-I32-NEON-NEXT: vpop {d8, d9, d10}
+; LE-I32-NEON-NEXT: pop {r11, pc}
+;
+; LE-I64-NEON-LABEL: lrint_v2f64:
+; LE-I64-NEON: @ %bb.0:
+; LE-I64-NEON-NEXT: .save {r4, lr}
+; LE-I64-NEON-NEXT: push {r4, lr}
+; LE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11}
+; LE-I64-NEON-NEXT: vpush {d8, d9, d10, d11}
+; LE-I64-NEON-NEXT: vorr q4, q0, q0
+; LE-I64-NEON-NEXT: vorr d0, d9, d9
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: vorr d0, d8, d8
+; LE-I64-NEON-NEXT: mov r4, r1
+; LE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT: vmov.32 d11[1], r4
+; LE-I64-NEON-NEXT: vmov.32 d10[1], r1
+; LE-I64-NEON-NEXT: vorr q0, q5, q5
+; LE-I64-NEON-NEXT: vpop {d8, d9, d10, d11}
+; LE-I64-NEON-NEXT: pop {r4, pc}
+;
+; BE-I32-LABEL: lrint_v2f64:
+; BE-I32: @ %bb.0:
+; BE-I32-NEXT: .save {r11, lr}
+; BE-I32-NEXT: push {r11, lr}
+; BE-I32-NEXT: .vsave {d8, d9, d10}
+; BE-I32-NEXT: vpush {d8, d9, d10}
+; BE-I32-NEXT: vorr q4, q0, q0
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: vorr d0, d9, d9
+; BE-I32-NEXT: vmov.32 d10[0], r0
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: vmov.32 d10[1], r0
+; BE-I32-NEXT: vrev64.32 d0, d10
+; BE-I32-NEXT: vpop {d8, d9, d10}
+; BE-I32-NEXT: pop {r11, pc}
+;
+; BE-I64-LABEL: lrint_v2f64:
+; BE-I64: @ %bb.0:
+; BE-I64-NEXT: .save {r4, lr}
+; BE-I64-NEXT: push {r4, lr}
+; BE-I64-NEXT: .vsave {d8, d9, d10, d11}
+; BE-I64-NEXT: vpush {d8, d9, d10, d11}
+; BE-I64-NEXT: vorr q4, q0, q0
+; BE-I64-NEXT: vorr d0, d9, d9
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vorr d0, d8, d8
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEXT: vmov.32 d11[1], r4
+; BE-I64-NEXT: vmov.32 d10[1], r1
+; BE-I64-NEXT: vrev64.32 q0, q5
+; BE-I64-NEXT: vpop {d8, d9, d10, d11}
+; BE-I64-NEXT: pop {r4, pc}
+;
+; BE-I32-NEON-LABEL: lrint_v2f64:
+; BE-I32-NEON: @ %bb.0:
+; BE-I32-NEON-NEXT: .save {r11, lr}
+; BE-I32-NEON-NEXT: push {r11, lr}
+; BE-I32-NEON-NEXT: .vsave {d8, d9, d10}
+; BE-I32-NEON-NEXT: vpush {d8, d9, d10}
+; BE-I32-NEON-NEXT: vorr q4, q0, q0
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: vorr d0, d9, d9
+; BE-I32-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: vmov.32 d10[1], r0
+; BE-I32-NEON-NEXT: vrev64.32 d0, d10
+; BE-I32-NEON-NEXT: vpop {d8, d9, d10}
+; BE-I32-NEON-NEXT: pop {r11, pc}
+;
+; BE-I64-NEON-LABEL: lrint_v2f64:
+; BE-I64-NEON: @ %bb.0:
+; BE-I64-NEON-NEXT: .save {r4, lr}
+; BE-I64-NEON-NEXT: push {r4, lr}
+; BE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11}
+; BE-I64-NEON-NEXT: vpush {d8, d9, d10, d11}
+; BE-I64-NEON-NEXT: vorr q4, q0, q0
+; BE-I64-NEON-NEXT: vorr d0, d9, d9
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vorr d0, d8, d8
+; BE-I64-NEON-NEXT: mov r4, r1
+; BE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT: vmov.32 d11[1], r4
+; BE-I64-NEON-NEXT: vmov.32 d10[1], r1
+; BE-I64-NEON-NEXT: vrev64.32 q0, q5
+; BE-I64-NEON-NEXT: vpop {d8, d9, d10, d11}
+; BE-I64-NEON-NEXT: pop {r4, pc}
+ %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double> %x)
+ ret <2 x iXLen> %a
+}
+declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double>)
+
+define <4 x iXLen> @lrint_v4f64(<4 x double> %x) {
+; LE-I32-LABEL: lrint_v4f64:
+; LE-I32: @ %bb.0:
+; LE-I32-NEXT: .save {r11, lr}
+; LE-I32-NEXT: push {r11, lr}
+; LE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
+; LE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13}
+; LE-I32-NEXT: vorr q4, q1, q1
+; LE-I32-NEXT: vorr q5, q0, q0
+; LE-I32-NEXT: vorr d0, d8, d8
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: vorr d0, d10, d10
+; LE-I32-NEXT: vmov.32 d13[0], r0
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: vorr d0, d9, d9
+; LE-I32-NEXT: vmov.32 d12[0], r0
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: vorr d0, d11, d11
+; LE-I32-NEXT: vmov.32 d13[1], r0
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: vmov.32 d12[1], r0
+; LE-I32-NEXT: vorr q0, q6, q6
+; LE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13}
+; LE-I32-NEXT: pop {r11, pc}
+;
+; LE-I64-LABEL: lrint_v4f64:
+; LE-I64: @ %bb.0:
+; LE-I64-NEXT: .save {r4, r5, r6, lr}
+; LE-I64-NEXT: push {r4, r5, r6, lr}
+; LE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: vorr q5, q1, q1
+; LE-I64-NEXT: vorr q6, q0, q0
+; LE-I64-NEXT: vorr d0, d11, d11
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: vorr d0, d12, d12
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: vorr d0, d13, d13
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: vorr d0, d10, d10
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEXT: vmov.32 d15[1], r6
+; LE-I64-NEXT: vmov.32 d9[1], r4
+; LE-I64-NEXT: vmov.32 d14[1], r5
+; LE-I64-NEXT: vmov.32 d8[1], r1
+; LE-I64-NEXT: vorr q0, q7, q7
+; LE-I64-NEXT: vorr q1, q4, q4
+; LE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: pop {r4, r5, r6, pc}
+;
+; LE-I32-NEON-LABEL: lrint_v4f64:
+; LE-I32-NEON: @ %bb.0:
+; LE-I32-NEON-NEXT: .save {r11, lr}
+; LE-I32-NEON-NEXT: push {r11, lr}
+; LE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
+; LE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13}
+; LE-I32-NEON-NEXT: vorr q4, q1, q1
+; LE-I32-NEON-NEXT: vorr q5, q0, q0
+; LE-I32-NEON-NEXT: vorr d0, d8, d8
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: vorr d0, d10, d10
+; LE-I32-NEON-NEXT: vmov.32 d13[0], r0
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: vorr d0, d9, d9
+; LE-I32-NEON-NEXT: vmov.32 d12[0], r0
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: vorr d0, d11, d11
+; LE-I32-NEON-NEXT: vmov.32 d13[1], r0
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: vmov.32 d12[1], r0
+; LE-I32-NEON-NEXT: vorr q0, q6, q6
+; LE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13}
+; LE-I32-NEON-NEXT: pop {r11, pc}
+;
+; LE-I64-NEON-LABEL: lrint_v4f64:
+; LE-I64-NEON: @ %bb.0:
+; LE-I64-NEON-NEXT: .save {r4, r5, r6, lr}
+; LE-I64-NEON-NEXT: push {r4, r5, r6, lr}
+; LE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT: vorr q5, q1, q1
+; LE-I64-NEON-NEXT: vorr q6, q0, q0
+; LE-I64-NEON-NEXT: vorr d0, d11, d11
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: vorr d0, d12, d12
+; LE-I64-NEON-NEXT: mov r4, r1
+; LE-I64-NEON-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: vorr d0, d13, d13
+; LE-I64-NEON-NEXT: mov r5, r1
+; LE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: vorr d0, d10, d10
+; LE-I64-NEON-NEXT: mov r6, r1
+; LE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT: vmov.32 d15[1], r6
+; LE-I64-NEON-NEXT: vmov.32 d9[1], r4
+; LE-I64-NEON-NEXT: vmov.32 d14[1], r5
+; LE-I64-NEON-NEXT: vmov.32 d8[1], r1
+; LE-I64-NEON-NEXT: vorr q0, q7, q7
+; LE-I64-NEON-NEXT: vorr q1, q4, q4
+; LE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT: pop {r4, r5, r6, pc}
+;
+; BE-I32-LABEL: lrint_v4f64:
+; BE-I32: @ %bb.0:
+; BE-I32-NEXT: .save {r11, lr}
+; BE-I32-NEXT: push {r11, lr}
+; BE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
+; BE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13}
+; BE-I32-NEXT: vorr q4, q1, q1
+; BE-I32-NEXT: vorr q5, q0, q0
+; BE-I32-NEXT: vorr d0, d8, d8
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: vorr d0, d10, d10
+; BE-I32-NEXT: vmov.32 d13[0], r0
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: vorr d0, d9, d9
+; BE-I32-NEXT: vmov.32 d12[0], r0
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: vorr d0, d11, d11
+; BE-I32-NEXT: vmov.32 d13[1], r0
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: vmov.32 d12[1], r0
+; BE-I32-NEXT: vrev64.32 q0, q6
+; BE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13}
+; BE-I32-NEXT: pop {r11, pc}
+;
+; BE-I64-LABEL: lrint_v4f64:
+; BE-I64: @ %bb.0:
+; BE-I64-NEXT: .save {r4, r5, r6, lr}
+; BE-I64-NEXT: push {r4, r5, r6, lr}
+; BE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT: vorr q4, q1, q1
+; BE-I64-NEXT: vorr q5, q0, q0
+; BE-I64-NEXT: vorr d0, d9, d9
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vorr d0, d10, d10
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vorr d0, d11, d11
+; BE-I64-NEXT: mov r5, r1
+; BE-I64-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vorr d0, d8, d8
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEXT: vmov.32 d15[1], r6
+; BE-I64-NEXT: vmov.32 d13[1], r4
+; BE-I64-NEXT: vmov.32 d14[1], r5
+; BE-I64-NEXT: vmov.32 d12[1], r1
+; BE-I64-NEXT: vrev64.32 q0, q7
+; BE-I64-NEXT: vrev64.32 q1, q6
+; BE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT: pop {r4, r5, r6, pc}
+;
+; BE-I32-NEON-LABEL: lrint_v4f64:
+; BE-I32-NEON: @ %bb.0:
+; BE-I32-NEON-NEXT: .save {r11, lr}
+; BE-I32-NEON-NEXT: push {r11, lr}
+; BE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
+; BE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13}
+; BE-I32-NEON-NEXT: vorr q4, q1, q1
+; BE-I32-NEON-NEXT: vorr q5, q0, q0
+; BE-I32-NEON-NEXT: vorr d0, d8, d8
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: vorr d0, d10, d10
+; BE-I32-NEON-NEXT: vmov.32 d13[0], r0
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: vorr d0, d9, d9
+; BE-I32-NEON-NEXT: vmov.32 d12[0], r0
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: vorr d0, d11, d11
+; BE-I32-NEON-NEXT: vmov.32 d13[1], r0
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: vmov.32 d12[1], r0
+; BE-I32-NEON-NEXT: vrev64.32 q0, q6
+; BE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13}
+; BE-I32-NEON-NEXT: pop {r11, pc}
+;
+; BE-I64-NEON-LABEL: lrint_v4f64:
+; BE-I64-NEON: @ %bb.0:
+; BE-I64-NEON-NEXT: .save {r4, r5, r6, lr}
+; BE-I64-NEON-NEXT: push {r4, r5, r6, lr}
+; BE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT: vorr q4, q1, q1
+; BE-I64-NEON-NEXT: vorr q5, q0, q0
+; BE-I64-NEON-NEXT: vorr d0, d9, d9
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vorr d0, d10, d10
+; BE-I64-NEON-NEXT: mov r4, r1
+; BE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vorr d0, d11, d11
+; BE-I64-NEON-NEXT: mov r5, r1
+; BE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vorr d0, d8, d8
+; BE-I64-NEON-NEXT: mov r6, r1
+; BE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEON-NEXT: vmov.32 d15[1], r6
+; BE-I64-NEON-NEXT: vmov.32 d13[1], r4
+; BE-I64-NEON-NEXT: vmov.32 d14[1], r5
+; BE-I64-NEON-NEXT: vmov.32 d12[1], r1
+; BE-I64-NEON-NEXT: vrev64.32 q0, q7
+; BE-I64-NEON-NEXT: vrev64.32 q1, q6
+; BE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT: pop {r4, r5, r6, pc}
+ %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f64(<4 x double> %x)
+ ret <4 x iXLen> %a
+}
+declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f64(<4 x double>)
+
+define <8 x iXLen> @lrint_v8f64(<8 x double> %x) {
+; LE-I32-LABEL: lrint_v8f64:
+; LE-I32: @ %bb.0:
+; LE-I32-NEXT: .save {r11, lr}
+; LE-I32-NEXT: push {r11, lr}
+; LE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT: .pad #32
+; LE-I32-NEXT: sub sp, sp, #32
+; LE-I32-NEXT: vorr q5, q0, q0
+; LE-I32-NEXT: add lr, sp, #16
+; LE-I32-NEXT: vorr d0, d4, d4
+; LE-I32-NEXT: vstmia sp, {d6, d7} @ 16-byte Spill
+; LE-I32-NEXT: vorr q7, q3, q3
+; LE-I32-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill
+; LE-I32-NEXT: vorr q6, q1, q1
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: vorr d0, d14, d14
+; LE-I32-NEXT: vmov.32 d8[0], r0
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: vorr d0, d12, d12
+; LE-I32-NEXT: vmov.32 d9[0], r0
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: vorr d0, d10, d10
+; LE-I32-NEXT: vmov.32 d15[0], r0
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: vorr d0, d13, d13
+; LE-I32-NEXT: vmov.32 d14[0], r0
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: vorr d0, d11, d11
+; LE-I32-NEXT: vmov.32 d15[1], r0
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: vldmia sp, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT: vorr d0, d17, d17
+; LE-I32-NEXT: vmov.32 d14[1], r0
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: add lr, sp, #16
+; LE-I32-NEXT: vmov.32 d9[1], r0
+; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT: vorr d0, d17, d17
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: vmov.32 d8[1], r0
+; LE-I32-NEXT: vorr q0, q7, q7
+; LE-I32-NEXT: vorr q1, q4, q4
+; LE-I32-NEXT: add sp, sp, #32
+; LE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT: pop {r11, pc}
+;
+; LE-I64-LABEL: lrint_v8f64:
+; LE-I64: @ %bb.0:
+; LE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: .pad #40
+; LE-I64-NEXT: sub sp, sp, #40
+; LE-I64-NEXT: vorr q4, q0, q0
+; LE-I64-NEXT: add lr, sp, #24
+; LE-I64-NEXT: vorr d0, d7, d7
+; LE-I64-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill
+; LE-I64-NEXT: vorr q7, q2, q2
+; LE-I64-NEXT: vorr q6, q1, q1
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: vorr d0, d14, d14
+; LE-I64-NEXT: add lr, sp, #8
+; LE-I64-NEXT: vmov.32 d17[0], r0
+; LE-I64-NEXT: mov r8, r1
+; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: vorr d0, d15, d15
+; LE-I64-NEXT: mov r9, r1
+; LE-I64-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: vorr d0, d12, d12
+; LE-I64-NEXT: mov r10, r1
+; LE-I64-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: vorr d0, d13, d13
+; LE-I64-NEXT: mov r7, r1
+; LE-I64-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: vorr d0, d8, d8
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: vorr d0, d9, d9
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: add lr, sp, #24
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I64-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: add lr, sp, #8
+; LE-I64-NEXT: vmov.32 d13[1], r6
+; LE-I64-NEXT: vldmia lr, {d6, d7} @ 16-byte Reload
+; LE-I64-NEXT: vmov.32 d15[1], r4
+; LE-I64-NEXT: vmov.32 d11[1], r10
+; LE-I64-NEXT: vmov.32 d6[0], r0
+; LE-I64-NEXT: vmov.32 d12[1], r5
+; LE-I64-NEXT: vmov.32 d14[1], r7
+; LE-I64-NEXT: vorr q0, q6, q6
+; LE-I64-NEXT: vmov.32 d10[1], r9
+; LE-I64-NEXT: vorr q1, q7, q7
+; LE-I64-NEXT: vmov.32 d7[1], r8
+; LE-I64-NEXT: vorr q2, q5, q5
+; LE-I64-NEXT: vmov.32 d6[1], r1
+; LE-I64-NEXT: add sp, sp, #40
+; LE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; LE-I32-NEON-LABEL: lrint_v8f64:
+; LE-I32-NEON: @ %bb.0:
+; LE-I32-NEON-NEXT: .save {r11, lr}
+; LE-I32-NEON-NEXT: push {r11, lr}
+; LE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT: .pad #32
+; LE-I32-NEON-NEXT: sub sp, sp, #32
+; LE-I32-NEON-NEXT: vorr q5, q0, q0
+; LE-I32-NEON-NEXT: add lr, sp, #16
+; LE-I32-NEON-NEXT: vorr d0, d4, d4
+; LE-I32-NEON-NEXT: vstmia sp, {d6, d7} @ 16-byte Spill
+; LE-I32-NEON-NEXT: vorr q7, q3, q3
+; LE-I32-NEON-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill
+; LE-I32-NEON-NEXT: vorr q6, q1, q1
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: vorr d0, d14, d14
+; LE-I32-NEON-NEXT: vmov.32 d8[0], r0
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: vorr d0, d12, d12
+; LE-I32-NEON-NEXT: vmov.32 d9[0], r0
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: vorr d0, d10, d10
+; LE-I32-NEON-NEXT: vmov.32 d15[0], r0
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: vorr d0, d13, d13
+; LE-I32-NEON-NEXT: vmov.32 d14[0], r0
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: vorr d0, d11, d11
+; LE-I32-NEON-NEXT: vmov.32 d15[1], r0
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: vldmia sp, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT: vorr d0, d17, d17
+; LE-I32-NEON-NEXT: vmov.32 d14[1], r0
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: add lr, sp, #16
+; LE-I32-NEON-NEXT: vmov.32 d9[1], r0
+; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT: vorr d0, d17, d17
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: vmov.32 d8[1], r0
+; LE-I32-NEON-NEXT: vorr q0, q7, q7
+; LE-I32-NEON-NEXT: vorr q1, q4, q4
+; LE-I32-NEON-NEXT: add sp, sp, #32
+; LE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT: pop {r11, pc}
+;
+; LE-I64-NEON-LABEL: lrint_v8f64:
+; LE-I64-NEON: @ %bb.0:
+; LE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT: .pad #40
+; LE-I64-NEON-NEXT: sub sp, sp, #40
+; LE-I64-NEON-NEXT: vorr q4, q0, q0
+; LE-I64-NEON-NEXT: add lr, sp, #24
+; LE-I64-NEON-NEXT: vorr d0, d7, d7
+; LE-I64-NEON-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill
+; LE-I64-NEON-NEXT: vorr q7, q2, q2
+; LE-I64-NEON-NEXT: vorr q6, q1, q1
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: vorr d0, d14, d14
+; LE-I64-NEON-NEXT: add lr, sp, #8
+; LE-I64-NEON-NEXT: vmov.32 d17[0], r0
+; LE-I64-NEON-NEXT: mov r8, r1
+; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: vorr d0, d15, d15
+; LE-I64-NEON-NEXT: mov r9, r1
+; LE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: vorr d0, d12, d12
+; LE-I64-NEON-NEXT: mov r10, r1
+; LE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: vorr d0, d13, d13
+; LE-I64-NEON-NEXT: mov r7, r1
+; LE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: vorr d0, d8, d8
+; LE-I64-NEON-NEXT: mov r4, r1
+; LE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: vorr d0, d9, d9
+; LE-I64-NEON-NEXT: mov r5, r1
+; LE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: add lr, sp, #24
+; LE-I64-NEON-NEXT: mov r6, r1
+; LE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I64-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: add lr, sp, #8
+; LE-I64-NEON-NEXT: vmov.32 d13[1], r6
+; LE-I64-NEON-NEXT: vldmia lr, {d6, d7} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vmov.32 d15[1], r4
+; LE-I64-NEON-NEXT: vmov.32 d11[1], r10
+; LE-I64-NEON-NEXT: vmov.32 d6[0], r0
+; LE-I64-NEON-NEXT: vmov.32 d12[1], r5
+; LE-I64-NEON-NEXT: vmov.32 d14[1], r7
+; LE-I64-NEON-NEXT: vorr q0, q6, q6
+; LE-I64-NEON-NEXT: vmov.32 d10[1], r9
+; LE-I64-NEON-NEXT: vorr q1, q7, q7
+; LE-I64-NEON-NEXT: vmov.32 d7[1], r8
+; LE-I64-NEON-NEXT: vorr q2, q5, q5
+; LE-I64-NEON-NEXT: vmov.32 d6[1], r1
+; LE-I64-NEON-NEXT: add sp, sp, #40
+; LE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; BE-I32-LABEL: lrint_v8f64:
+; BE-I32: @ %bb.0:
+; BE-I32-NEXT: .save {r11, lr}
+; BE-I32-NEXT: push {r11, lr}
+; BE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT: .pad #32
+; BE-I32-NEXT: sub sp, sp, #32
+; BE-I32-NEXT: vorr q5, q0, q0
+; BE-I32-NEXT: vstmia sp, {d0, d1} @ 16-byte Spill
+; BE-I32-NEXT: vorr d0, d4, d4
+; BE-I32-NEXT: add lr, sp, #16
+; BE-I32-NEXT: vorr q7, q3, q3
+; BE-I32-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill
+; BE-I32-NEXT: vorr q6, q1, q1
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: vorr d0, d10, d10
+; BE-I32-NEXT: vmov.32 d8[0], r0
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: vorr d0, d14, d14
+; BE-I32-NEXT: vmov.32 d10[0], r0
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: vorr d0, d12, d12
+; BE-I32-NEXT: vmov.32 d9[0], r0
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: vorr d0, d15, d15
+; BE-I32-NEXT: vmov.32 d11[0], r0
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: vorr d0, d13, d13
+; BE-I32-NEXT: vmov.32 d9[1], r0
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: vldmia sp, {d16, d17} @ 16-byte Reload
+; BE-I32-NEXT: vorr d0, d17, d17
+; BE-I32-NEXT: vmov.32 d11[1], r0
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: add lr, sp, #16
+; BE-I32-NEXT: vmov.32 d10[1], r0
+; BE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEXT: vorr d0, d17, d17
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: vmov.32 d8[1], r0
+; BE-I32-NEXT: vrev64.32 q0, q5
+; BE-I32-NEXT: vrev64.32 q1, q4
+; BE-I32-NEXT: add sp, sp, #32
+; BE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT: pop {r11, pc}
+;
+; BE-I64-LABEL: lrint_v8f64:
+; BE-I64: @ %bb.0:
+; BE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT: .pad #40
+; BE-I64-NEXT: sub sp, sp, #40
+; BE-I64-NEXT: vorr q4, q0, q0
+; BE-I64-NEXT: add lr, sp, #24
+; BE-I64-NEXT: vorr d0, d7, d7
+; BE-I64-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill
+; BE-I64-NEXT: vorr q7, q2, q2
+; BE-I64-NEXT: vorr q6, q1, q1
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vorr d0, d14, d14
+; BE-I64-NEXT: add lr, sp, #8
+; BE-I64-NEXT: vmov.32 d17[0], r0
+; BE-I64-NEXT: mov r8, r1
+; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vorr d0, d15, d15
+; BE-I64-NEXT: mov r9, r1
+; BE-I64-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vorr d0, d12, d12
+; BE-I64-NEXT: mov r10, r1
+; BE-I64-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vorr d0, d13, d13
+; BE-I64-NEXT: mov r7, r1
+; BE-I64-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vorr d0, d8, d8
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vorr d0, d9, d9
+; BE-I64-NEXT: mov r5, r1
+; BE-I64-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: add lr, sp, #24
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-I64-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: add lr, sp, #8
+; BE-I64-NEXT: vmov.32 d13[1], r6
+; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT: vmov.32 d15[1], r4
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: vmov.32 d11[1], r10
+; BE-I64-NEXT: vmov.32 d17[1], r8
+; BE-I64-NEXT: vmov.32 d12[1], r5
+; BE-I64-NEXT: vmov.32 d14[1], r7
+; BE-I64-NEXT: vmov.32 d10[1], r9
+; BE-I64-NEXT: vmov.32 d16[1], r1
+; BE-I64-NEXT: vrev64.32 q0, q6
+; BE-I64-NEXT: vrev64.32 q1, q7
+; BE-I64-NEXT: vrev64.32 q2, q5
+; BE-I64-NEXT: vrev64.32 q3, q8
+; BE-I64-NEXT: add sp, sp, #40
+; BE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; BE-I32-NEON-LABEL: lrint_v8f64:
+; BE-I32-NEON: @ %bb.0:
+; BE-I32-NEON-NEXT: .save {r11, lr}
+; BE-I32-NEON-NEXT: push {r11, lr}
+; BE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT: .pad #32
+; BE-I32-NEON-NEXT: sub sp, sp, #32
+; BE-I32-NEON-NEXT: vorr q5, q0, q0
+; BE-I32-NEON-NEXT: vstmia sp, {d0, d1} @ 16-byte Spill
+; BE-I32-NEON-NEXT: vorr d0, d4, d4
+; BE-I32-NEON-NEXT: add lr, sp, #16
+; BE-I32-NEON-NEXT: vorr q7, q3, q3
+; BE-I32-NEON-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill
+; BE-I32-NEON-NEXT: vorr q6, q1, q1
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: vorr d0, d10, d10
+; BE-I32-NEON-NEXT: vmov.32 d8[0], r0
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: vorr d0, d14, d14
+; BE-I32-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: vorr d0, d12, d12
+; BE-I32-NEON-NEXT: vmov.32 d9[0], r0
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: vorr d0, d15, d15
+; BE-I32-NEON-NEXT: vmov.32 d11[0], r0
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: vorr d0, d13, d13
+; BE-I32-NEON-NEXT: vmov.32 d9[1], r0
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: vldmia sp, {d16, d17} @ 16-byte Reload
+; BE-I32-NEON-NEXT: vorr d0, d17, d17
+; BE-I32-NEON-NEXT: vmov.32 d11[1], r0
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: add lr, sp, #16
+; BE-I32-NEON-NEXT: vmov.32 d10[1], r0
+; BE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEON-NEXT: vorr d0, d17, d17
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: vmov.32 d8[1], r0
+; BE-I32-NEON-NEXT: vrev64.32 q0, q5
+; BE-I32-NEON-NEXT: vrev64.32 q1, q4
+; BE-I32-NEON-NEXT: add sp, sp, #32
+; BE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT: pop {r11, pc}
+;
+; BE-I64-NEON-LABEL: lrint_v8f64:
+; BE-I64-NEON: @ %bb.0:
+; BE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT: .pad #40
+; BE-I64-NEON-NEXT: sub sp, sp, #40
+; BE-I64-NEON-NEXT: vorr q4, q0, q0
+; BE-I64-NEON-NEXT: add lr, sp, #24
+; BE-I64-NEON-NEXT: vorr d0, d7, d7
+; BE-I64-NEON-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill
+; BE-I64-NEON-NEXT: vorr q7, q2, q2
+; BE-I64-NEON-NEXT: vorr q6, q1, q1
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vorr d0, d14, d14
+; BE-I64-NEON-NEXT: add lr, sp, #8
+; BE-I64-NEON-NEXT: vmov.32 d17[0], r0
+; BE-I64-NEON-NEXT: mov r8, r1
+; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vorr d0, d15, d15
+; BE-I64-NEON-NEXT: mov r9, r1
+; BE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vorr d0, d12, d12
+; BE-I64-NEON-NEXT: mov r10, r1
+; BE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vorr d0, d13, d13
+; BE-I64-NEON-NEXT: mov r7, r1
+; BE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vorr d0, d8, d8
+; BE-I64-NEON-NEXT: mov r4, r1
+; BE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vorr d0, d9, d9
+; BE-I64-NEON-NEXT: mov r5, r1
+; BE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: add lr, sp, #24
+; BE-I64-NEON-NEXT: mov r6, r1
+; BE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-I64-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: add lr, sp, #8
+; BE-I64-NEON-NEXT: vmov.32 d13[1], r6
+; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT: vmov.32 d15[1], r4
+; BE-I64-NEON-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEON-NEXT: vmov.32 d11[1], r10
+; BE-I64-NEON-NEXT: vmov.32 d17[1], r8
+; BE-I64-NEON-NEXT: vmov.32 d12[1], r5
+; BE-I64-NEON-NEXT: vmov.32 d14[1], r7
+; BE-I64-NEON-NEXT: vmov.32 d10[1], r9
+; BE-I64-NEON-NEXT: vmov.32 d16[1], r1
+; BE-I64-NEON-NEXT: vrev64.32 q0, q6
+; BE-I64-NEON-NEXT: vrev64.32 q1, q7
+; BE-I64-NEON-NEXT: vrev64.32 q2, q5
+; BE-I64-NEON-NEXT: vrev64.32 q3, q8
+; BE-I64-NEON-NEXT: add sp, sp, #40
+; BE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+ %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double> %x)
+ ret <8 x iXLen> %a
+}
+declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double>)
+
+define <16 x iXLen> @lrint_v16f64(<16 x double> %x) {
+; LE-I32-LABEL: lrint_v16f64:
+; LE-I32: @ %bb.0:
+; LE-I32-NEXT: .save {r4, r5, r6, lr}
+; LE-I32-NEXT: push {r4, r5, r6, lr}
+; LE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT: .pad #128
+; LE-I32-NEXT: sub sp, sp, #128
+; LE-I32-NEXT: add lr, sp, #80
+; LE-I32-NEXT: add r0, sp, #240
+; LE-I32-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I32-NEXT: add r0, sp, #208
+; LE-I32-NEXT: vorr q6, q0, q0
+; LE-I32-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill
+; LE-I32-NEXT: add lr, sp, #32
+; LE-I32-NEXT: vorr q5, q1, q1
+; LE-I32-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill
+; LE-I32-NEXT: add lr, sp, #16
+; LE-I32-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-I32-NEXT: add lr, sp, #64
+; LE-I32-NEXT: vorr d0, d4, d4
+; LE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEXT: add lr, sp, #112
+; LE-I32-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I32-NEXT: add r0, sp, #224
+; LE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEXT: add lr, sp, #96
+; LE-I32-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I32-NEXT: add r0, sp, #256
+; LE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEXT: add lr, sp, #48
+; LE-I32-NEXT: vld1.64 {d14, d15}, [r0]
+; LE-I32-NEXT: vstmia sp, {d2, d3} @ 16-byte Spill
+; LE-I32-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: vorr d0, d12, d12
+; LE-I32-NEXT: vmov.32 d8[0], r0
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: vorr d0, d10, d10
+; LE-I32-NEXT: vmov.32 d12[0], r0
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: vorr d0, d14, d14
+; LE-I32-NEXT: vmov.32 d13[0], r0
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: add lr, sp, #64
+; LE-I32-NEXT: mov r4, r0
+; LE-I32-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I32-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: add lr, sp, #80
+; LE-I32-NEXT: vmov.32 d14[0], r0
+; LE-I32-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I32-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: add lr, sp, #112
+; LE-I32-NEXT: vmov.32 d9[0], r0
+; LE-I32-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I32-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; LE-I32-NEXT: vmov.32 d15[0], r4
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: vldmia sp, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT: vorr d0, d17, d17
+; LE-I32-NEXT: vmov.32 d10[0], r0
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: add lr, sp, #96
+; LE-I32-NEXT: vmov.32 d13[1], r0
+; LE-I32-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I32-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: add lr, sp, #16
+; LE-I32-NEXT: vmov.32 d11[0], r0
+; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT: vorr d0, d17, d17
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: add lr, sp, #80
+; LE-I32-NEXT: vmov.32 d12[1], r0
+; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT: vorr d0, d17, d17
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: add lr, sp, #32
+; LE-I32-NEXT: vmov.32 d9[1], r0
+; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT: vorr d0, d17, d17
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: add lr, sp, #96
+; LE-I32-NEXT: vmov.32 d8[1], r0
+; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT: vorr d0, d17, d17
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: add lr, sp, #112
+; LE-I32-NEXT: vmov.32 d11[1], r0
+; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT: vorr d0, d17, d17
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: add lr, sp, #48
+; LE-I32-NEXT: vmov.32 d10[1], r0
+; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT: vorr d0, d17, d17
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: add lr, sp, #64
+; LE-I32-NEXT: vmov.32 d15[1], r0
+; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT: vorr d0, d17, d17
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: vmov.32 d14[1], r0
+; LE-I32-NEXT: vorr q0, q6, q6
+; LE-I32-NEXT: vorr q1, q4, q4
+; LE-I32-NEXT: vorr q2, q5, q5
+; LE-I32-NEXT: vorr q3, q7, q7
+; LE-I32-NEXT: add sp, sp, #128
+; LE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT: pop {r4, r5, r6, pc}
+;
+; LE-I64-LABEL: lrint_v16f64:
+; LE-I64: @ %bb.0:
+; LE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT: .pad #4
+; LE-I64-NEXT: sub sp, sp, #4
+; LE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: .pad #176
+; LE-I64-NEXT: sub sp, sp, #176
+; LE-I64-NEXT: add lr, sp, #40
+; LE-I64-NEXT: str r0, [sp, #140] @ 4-byte Spill
+; LE-I64-NEXT: add r0, sp, #312
+; LE-I64-NEXT: vorr q6, q2, q2
+; LE-I64-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #96
+; LE-I64-NEXT: vorr q7, q1, q1
+; LE-I64-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #144
+; LE-I64-NEXT: vorr d0, d1, d1
+; LE-I64-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I64-NEXT: add r0, sp, #280
+; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #80
+; LE-I64-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I64-NEXT: add r0, sp, #296
+; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #120
+; LE-I64-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I64-NEXT: add r0, sp, #328
+; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #56
+; LE-I64-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: vorr d0, d14, d14
+; LE-I64-NEXT: str r1, [sp, #116] @ 4-byte Spill
+; LE-I64-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: vorr d0, d15, d15
+; LE-I64-NEXT: str r1, [sp, #76] @ 4-byte Spill
+; LE-I64-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: vorr d0, d12, d12
+; LE-I64-NEXT: add lr, sp, #160
+; LE-I64-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEXT: str r1, [sp, #72] @ 4-byte Spill
+; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: vorr d0, d13, d13
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: add lr, sp, #40
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEXT: vorr d0, d8, d8
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: vorr d0, d9, d9
+; LE-I64-NEXT: mov r7, r1
+; LE-I64-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: add lr, sp, #96
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I64-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEXT: add lr, sp, #40
+; LE-I64-NEXT: mov r10, r1
+; LE-I64-NEXT: vmov.32 d13[1], r5
+; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #56
+; LE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEXT: vorr d0, d9, d9
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: vorr d0, d8, d8
+; LE-I64-NEXT: vmov.32 d12[1], r7
+; LE-I64-NEXT: add lr, sp, #96
+; LE-I64-NEXT: mov r9, r1
+; LE-I64-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEXT: add lr, sp, #24
+; LE-I64-NEXT: mov r11, r1
+; LE-I64-NEXT: vmov.32 d15[1], r4
+; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #144
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: vorr d0, d17, d17
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: add lr, sp, #8
+; LE-I64-NEXT: vmov.32 d14[1], r6
+; LE-I64-NEXT: mov r8, r1
+; LE-I64-NEXT: vmov.32 d17[0], r0
+; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #56
+; LE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #80
+; LE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEXT: vorr d0, d11, d11
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEXT: add lr, sp, #160
+; LE-I64-NEXT: vorr d0, d10, d10
+; LE-I64-NEXT: ldr r0, [sp, #72] @ 4-byte Reload
+; LE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: vmov.32 d9[1], r0
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEXT: ldr r0, [sp, #76] @ 4-byte Reload
+; LE-I64-NEXT: add lr, sp, #160
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: vmov.32 d8[1], r0
+; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #120
+; LE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEXT: vorr d0, d11, d11
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEXT: add lr, sp, #40
+; LE-I64-NEXT: vorr d0, d10, d10
+; LE-I64-NEXT: ldr r0, [sp, #116] @ 4-byte Reload
+; LE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: vmov.32 d9[1], r0
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: add lr, sp, #144
+; LE-I64-NEXT: mov r7, r1
+; LE-I64-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I64-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; LE-I64-NEXT: vmov.32 d8[1], r10
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: add lr, sp, #8
+; LE-I64-NEXT: vmov.32 d15[1], r6
+; LE-I64-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #24
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #160
+; LE-I64-NEXT: vmov.32 d20[0], r0
+; LE-I64-NEXT: vmov.32 d21[1], r8
+; LE-I64-NEXT: vmov.32 d20[1], r1
+; LE-I64-NEXT: ldr r1, [sp, #140] @ 4-byte Reload
+; LE-I64-NEXT: vmov.32 d13[1], r5
+; LE-I64-NEXT: mov r0, r1
+; LE-I64-NEXT: vst1.64 {d8, d9}, [r0:128]!
+; LE-I64-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #56
+; LE-I64-NEXT: vmov.32 d14[1], r4
+; LE-I64-NEXT: vst1.64 {d18, d19}, [r0:128]!
+; LE-I64-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #96
+; LE-I64-NEXT: vmov.32 d12[1], r7
+; LE-I64-NEXT: vst1.64 {d18, d19}, [r0:128]!
+; LE-I64-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-I64-NEXT: vmov.32 d17[1], r9
+; LE-I64-NEXT: vst1.64 {d18, d19}, [r0:128]
+; LE-I64-NEXT: add r0, r1, #64
+; LE-I64-NEXT: vst1.64 {d14, d15}, [r0:128]!
+; LE-I64-NEXT: vst1.64 {d12, d13}, [r0:128]!
+; LE-I64-NEXT: vmov.32 d16[1], r11
+; LE-I64-NEXT: vst1.64 {d20, d21}, [r0:128]!
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]
+; LE-I64-NEXT: add sp, sp, #176
+; LE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: add sp, sp, #4
+; LE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; LE-I32-NEON-LABEL: lrint_v16f64:
+; LE-I32-NEON: @ %bb.0:
+; LE-I32-NEON-NEXT: .save {r4, r5, r6, lr}
+; LE-I32-NEON-NEXT: push {r4, r5, r6, lr}
+; LE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT: .pad #128
+; LE-I32-NEON-NEXT: sub sp, sp, #128
+; LE-I32-NEON-NEXT: add lr, sp, #80
+; LE-I32-NEON-NEXT: add r0, sp, #240
+; LE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I32-NEON-NEXT: add r0, sp, #208
+; LE-I32-NEON-NEXT: vorr q6, q0, q0
+; LE-I32-NEON-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill
+; LE-I32-NEON-NEXT: add lr, sp, #32
+; LE-I32-NEON-NEXT: vorr q5, q1, q1
+; LE-I32-NEON-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill
+; LE-I32-NEON-NEXT: add lr, sp, #16
+; LE-I32-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-I32-NEON-NEXT: add lr, sp, #64
+; LE-I32-NEON-NEXT: vorr d0, d4, d4
+; LE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEON-NEXT: add lr, sp, #112
+; LE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I32-NEON-NEXT: add r0, sp, #224
+; LE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEON-NEXT: add lr, sp, #96
+; LE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I32-NEON-NEXT: add r0, sp, #256
+; LE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEON-NEXT: add lr, sp, #48
+; LE-I32-NEON-NEXT: vld1.64 {d14, d15}, [r0]
+; LE-I32-NEON-NEXT: vstmia sp, {d2, d3} @ 16-byte Spill
+; LE-I32-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: vorr d0, d12, d12
+; LE-I32-NEON-NEXT: vmov.32 d8[0], r0
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: vorr d0, d10, d10
+; LE-I32-NEON-NEXT: vmov.32 d12[0], r0
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: vorr d0, d14, d14
+; LE-I32-NEON-NEXT: vmov.32 d13[0], r0
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: add lr, sp, #64
+; LE-I32-NEON-NEXT: mov r4, r0
+; LE-I32-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I32-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: add lr, sp, #80
+; LE-I32-NEON-NEXT: vmov.32 d14[0], r0
+; LE-I32-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I32-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: add lr, sp, #112
+; LE-I32-NEON-NEXT: vmov.32 d9[0], r0
+; LE-I32-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I32-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; LE-I32-NEON-NEXT: vmov.32 d15[0], r4
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: vldmia sp, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT: vorr d0, d17, d17
+; LE-I32-NEON-NEXT: vmov.32 d10[0], r0
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: add lr, sp, #96
+; LE-I32-NEON-NEXT: vmov.32 d13[1], r0
+; LE-I32-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I32-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: add lr, sp, #16
+; LE-I32-NEON-NEXT: vmov.32 d11[0], r0
+; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT: vorr d0, d17, d17
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: add lr, sp, #80
+; LE-I32-NEON-NEXT: vmov.32 d12[1], r0
+; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT: vorr d0, d17, d17
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: add lr, sp, #32
+; LE-I32-NEON-NEXT: vmov.32 d9[1], r0
+; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT: vorr d0, d17, d17
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: add lr, sp, #96
+; LE-I32-NEON-NEXT: vmov.32 d8[1], r0
+; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT: vorr d0, d17, d17
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: add lr, sp, #112
+; LE-I32-NEON-NEXT: vmov.32 d11[1], r0
+; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT: vorr d0, d17, d17
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: add lr, sp, #48
+; LE-I32-NEON-NEXT: vmov.32 d10[1], r0
+; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT: vorr d0, d17, d17
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: add lr, sp, #64
+; LE-I32-NEON-NEXT: vmov.32 d15[1], r0
+; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT: vorr d0, d17, d17
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: vmov.32 d14[1], r0
+; LE-I32-NEON-NEXT: vorr q0, q6, q6
+; LE-I32-NEON-NEXT: vorr q1, q4, q4
+; LE-I32-NEON-NEXT: vorr q2, q5, q5
+; LE-I32-NEON-NEXT: vorr q3, q7, q7
+; LE-I32-NEON-NEXT: add sp, sp, #128
+; LE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT: pop {r4, r5, r6, pc}
+;
+; LE-I64-NEON-LABEL: lrint_v16f64:
+; LE-I64-NEON: @ %bb.0:
+; LE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEON-NEXT: .pad #4
+; LE-I64-NEON-NEXT: sub sp, sp, #4
+; LE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT: .pad #176
+; LE-I64-NEON-NEXT: sub sp, sp, #176
+; LE-I64-NEON-NEXT: add lr, sp, #40
+; LE-I64-NEON-NEXT: str r0, [sp, #140] @ 4-byte Spill
+; LE-I64-NEON-NEXT: add r0, sp, #312
+; LE-I64-NEON-NEXT: vorr q6, q2, q2
+; LE-I64-NEON-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #96
+; LE-I64-NEON-NEXT: vorr q7, q1, q1
+; LE-I64-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #144
+; LE-I64-NEON-NEXT: vorr d0, d1, d1
+; LE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I64-NEON-NEXT: add r0, sp, #280
+; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #80
+; LE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I64-NEON-NEXT: add r0, sp, #296
+; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #120
+; LE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I64-NEON-NEXT: add r0, sp, #328
+; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #56
+; LE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: vorr d0, d14, d14
+; LE-I64-NEON-NEXT: str r1, [sp, #116] @ 4-byte Spill
+; LE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: vorr d0, d15, d15
+; LE-I64-NEON-NEXT: str r1, [sp, #76] @ 4-byte Spill
+; LE-I64-NEON-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: vorr d0, d12, d12
+; LE-I64-NEON-NEXT: add lr, sp, #160
+; LE-I64-NEON-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEON-NEXT: str r1, [sp, #72] @ 4-byte Spill
+; LE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: vorr d0, d13, d13
+; LE-I64-NEON-NEXT: mov r6, r1
+; LE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: add lr, sp, #40
+; LE-I64-NEON-NEXT: mov r4, r1
+; LE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vorr d0, d8, d8
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: vorr d0, d9, d9
+; LE-I64-NEON-NEXT: mov r7, r1
+; LE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: add lr, sp, #96
+; LE-I64-NEON-NEXT: mov r5, r1
+; LE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I64-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT: add lr, sp, #40
+; LE-I64-NEON-NEXT: mov r10, r1
+; LE-I64-NEON-NEXT: vmov.32 d13[1], r5
+; LE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #56
+; LE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vorr d0, d9, d9
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: vorr d0, d8, d8
+; LE-I64-NEON-NEXT: vmov.32 d12[1], r7
+; LE-I64-NEON-NEXT: add lr, sp, #96
+; LE-I64-NEON-NEXT: mov r9, r1
+; LE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT: add lr, sp, #24
+; LE-I64-NEON-NEXT: mov r11, r1
+; LE-I64-NEON-NEXT: vmov.32 d15[1], r4
+; LE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #144
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vorr d0, d17, d17
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: add lr, sp, #8
+; LE-I64-NEON-NEXT: vmov.32 d14[1], r6
+; LE-I64-NEON-NEXT: mov r8, r1
+; LE-I64-NEON-NEXT: vmov.32 d17[0], r0
+; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #56
+; LE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #80
+; LE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vorr d0, d11, d11
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEON-NEXT: add lr, sp, #160
+; LE-I64-NEON-NEXT: vorr d0, d10, d10
+; LE-I64-NEON-NEXT: ldr r0, [sp, #72] @ 4-byte Reload
+; LE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEON-NEXT: mov r6, r1
+; LE-I64-NEON-NEXT: vmov.32 d9[1], r0
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEON-NEXT: ldr r0, [sp, #76] @ 4-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #160
+; LE-I64-NEON-NEXT: mov r4, r1
+; LE-I64-NEON-NEXT: vmov.32 d8[1], r0
+; LE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #120
+; LE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vorr d0, d11, d11
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT: add lr, sp, #40
+; LE-I64-NEON-NEXT: vorr d0, d10, d10
+; LE-I64-NEON-NEXT: ldr r0, [sp, #116] @ 4-byte Reload
+; LE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEON-NEXT: mov r5, r1
+; LE-I64-NEON-NEXT: vmov.32 d9[1], r0
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: add lr, sp, #144
+; LE-I64-NEON-NEXT: mov r7, r1
+; LE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I64-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; LE-I64-NEON-NEXT: vmov.32 d8[1], r10
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: add lr, sp, #8
+; LE-I64-NEON-NEXT: vmov.32 d15[1], r6
+; LE-I64-NEON-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #24
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #160
+; LE-I64-NEON-NEXT: vmov.32 d20[0], r0
+; LE-I64-NEON-NEXT: vmov.32 d21[1], r8
+; LE-I64-NEON-NEXT: vmov.32 d20[1], r1
+; LE-I64-NEON-NEXT: ldr r1, [sp, #140] @ 4-byte Reload
+; LE-I64-NEON-NEXT: vmov.32 d13[1], r5
+; LE-I64-NEON-NEXT: mov r0, r1
+; LE-I64-NEON-NEXT: vst1.64 {d8, d9}, [r0:128]!
+; LE-I64-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #56
+; LE-I64-NEON-NEXT: vmov.32 d14[1], r4
+; LE-I64-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]!
+; LE-I64-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #96
+; LE-I64-NEON-NEXT: vmov.32 d12[1], r7
+; LE-I64-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]!
+; LE-I64-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vmov.32 d17[1], r9
+; LE-I64-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]
+; LE-I64-NEON-NEXT: add r0, r1, #64
+; LE-I64-NEON-NEXT: vst1.64 {d14, d15}, [r0:128]!
+; LE-I64-NEON-NEXT: vst1.64 {d12, d13}, [r0:128]!
+; LE-I64-NEON-NEXT: vmov.32 d16[1], r11
+; LE-I64-NEON-NEXT: vst1.64 {d20, d21}, [r0:128]!
+; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]
+; LE-I64-NEON-NEXT: add sp, sp, #176
+; LE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT: add sp, sp, #4
+; LE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I32-LABEL: lrint_v16f64:
+; BE-I32: @ %bb.0:
+; BE-I32-NEXT: .save {r4, r5, r6, lr}
+; BE-I32-NEXT: push {r4, r5, r6, lr}
+; BE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT: .pad #128
+; BE-I32-NEXT: sub sp, sp, #128
+; BE-I32-NEXT: add lr, sp, #64
+; BE-I32-NEXT: add r0, sp, #240
+; BE-I32-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I32-NEXT: add r0, sp, #224
+; BE-I32-NEXT: vorr q6, q3, q3
+; BE-I32-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill
+; BE-I32-NEXT: add lr, sp, #16
+; BE-I32-NEXT: vorr q5, q1, q1
+; BE-I32-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill
+; BE-I32-NEXT: add lr, sp, #32
+; BE-I32-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; BE-I32-NEXT: add lr, sp, #80
+; BE-I32-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT: add lr, sp, #112
+; BE-I32-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I32-NEXT: add r0, sp, #256
+; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT: add lr, sp, #96
+; BE-I32-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I32-NEXT: add r0, sp, #208
+; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT: add lr, sp, #48
+; BE-I32-NEXT: vld1.64 {d14, d15}, [r0]
+; BE-I32-NEXT: vstmia sp, {d6, d7} @ 16-byte Spill
+; BE-I32-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: vorr d0, d10, d10
+; BE-I32-NEXT: vmov.32 d8[0], r0
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: vorr d0, d12, d12
+; BE-I32-NEXT: vmov.32 d9[0], r0
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: vorr d0, d14, d14
+; BE-I32-NEXT: vmov.32 d11[0], r0
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: add lr, sp, #80
+; BE-I32-NEXT: mov r4, r0
+; BE-I32-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-I32-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: add lr, sp, #64
+; BE-I32-NEXT: vmov.32 d12[0], r0
+; BE-I32-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-I32-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: add lr, sp, #112
+; BE-I32-NEXT: vmov.32 d10[0], r0
+; BE-I32-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-I32-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; BE-I32-NEXT: vmov.32 d14[0], r4
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: vldmia sp, {d16, d17} @ 16-byte Reload
+; BE-I32-NEXT: vorr d0, d17, d17
+; BE-I32-NEXT: vmov.32 d15[0], r0
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: add lr, sp, #96
+; BE-I32-NEXT: vmov.32 d11[1], r0
+; BE-I32-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-I32-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: add lr, sp, #64
+; BE-I32-NEXT: vmov.32 d13[0], r0
+; BE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEXT: vorr d0, d17, d17
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: add lr, sp, #16
+; BE-I32-NEXT: vmov.32 d10[1], r0
+; BE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEXT: vorr d0, d17, d17
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: add lr, sp, #32
+; BE-I32-NEXT: vmov.32 d9[1], r0
+; BE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEXT: vorr d0, d17, d17
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: add lr, sp, #96
+; BE-I32-NEXT: vmov.32 d8[1], r0
+; BE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEXT: vorr d0, d17, d17
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: add lr, sp, #112
+; BE-I32-NEXT: vmov.32 d13[1], r0
+; BE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEXT: vorr d0, d17, d17
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: add lr, sp, #48
+; BE-I32-NEXT: vmov.32 d15[1], r0
+; BE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEXT: vorr d0, d17, d17
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: add lr, sp, #80
+; BE-I32-NEXT: vmov.32 d14[1], r0
+; BE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEXT: vorr d0, d17, d17
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: vmov.32 d12[1], r0
+; BE-I32-NEXT: vrev64.32 q0, q4
+; BE-I32-NEXT: vrev64.32 q1, q5
+; BE-I32-NEXT: vrev64.32 q2, q7
+; BE-I32-NEXT: vrev64.32 q3, q6
+; BE-I32-NEXT: add sp, sp, #128
+; BE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT: pop {r4, r5, r6, pc}
+;
+; BE-I64-LABEL: lrint_v16f64:
+; BE-I64: @ %bb.0:
+; BE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT: .pad #4
+; BE-I64-NEXT: sub sp, sp, #4
+; BE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT: .pad #168
+; BE-I64-NEXT: sub sp, sp, #168
+; BE-I64-NEXT: add lr, sp, #64
+; BE-I64-NEXT: str r0, [sp, #132] @ 4-byte Spill
+; BE-I64-NEXT: add r0, sp, #304
+; BE-I64-NEXT: vorr q4, q3, q3
+; BE-I64-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; BE-I64-NEXT: add lr, sp, #48
+; BE-I64-NEXT: vorr d0, d1, d1
+; BE-I64-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I64-NEXT: add r0, sp, #320
+; BE-I64-NEXT: vorr q6, q2, q2
+; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT: add lr, sp, #88
+; BE-I64-NEXT: vorr q7, q1, q1
+; BE-I64-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I64-NEXT: add r0, sp, #272
+; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT: add lr, sp, #112
+; BE-I64-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I64-NEXT: add r0, sp, #288
+; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT: add lr, sp, #24
+; BE-I64-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vorr d0, d14, d14
+; BE-I64-NEXT: add lr, sp, #136
+; BE-I64-NEXT: vmov.32 d17[0], r0
+; BE-I64-NEXT: str r1, [sp, #108] @ 4-byte Spill
+; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vorr d0, d15, d15
+; BE-I64-NEXT: str r1, [sp, #84] @ 4-byte Spill
+; BE-I64-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vorr d0, d12, d12
+; BE-I64-NEXT: add lr, sp, #152
+; BE-I64-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEXT: str r1, [sp, #44] @ 4-byte Spill
+; BE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vorr d0, d13, d13
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vorr d0, d8, d8
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vorr d0, d9, d9
+; BE-I64-NEXT: mov r7, r1
+; BE-I64-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: add lr, sp, #64
+; BE-I64-NEXT: mov r5, r1
+; BE-I64-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-I64-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: add lr, sp, #136
+; BE-I64-NEXT: mov r9, r1
+; BE-I64-NEXT: vmov.32 d13[1], r5
+; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT: add lr, sp, #24
+; BE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I64-NEXT: vorr d0, d9, d9
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vorr d0, d8, d8
+; BE-I64-NEXT: vmov.32 d12[1], r7
+; BE-I64-NEXT: add lr, sp, #64
+; BE-I64-NEXT: mov r10, r1
+; BE-I64-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEXT: add lr, sp, #8
+; BE-I64-NEXT: mov r11, r1
+; BE-I64-NEXT: vmov.32 d11[1], r4
+; BE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I64-NEXT: add lr, sp, #48
+; BE-I64-NEXT: vorr q6, q5, q5
+; BE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I64-NEXT: vorr d0, d9, d9
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vorr d0, d8, d8
+; BE-I64-NEXT: vmov.32 d12[1], r6
+; BE-I64-NEXT: add lr, sp, #24
+; BE-I64-NEXT: mov r8, r1
+; BE-I64-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEXT: add lr, sp, #48
+; BE-I64-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEXT: add lr, sp, #152
+; BE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I64-NEXT: add lr, sp, #88
+; BE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-I64-NEXT: vorr d0, d13, d13
+; BE-I64-NEXT: vmov.32 d9[1], r0
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEXT: ldr r0, [sp, #84] @ 4-byte Reload
+; BE-I64-NEXT: vorr d0, d12, d12
+; BE-I64-NEXT: add lr, sp, #152
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: vmov.32 d8[1], r0
+; BE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: add lr, sp, #136
+; BE-I64-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEXT: ldr r0, [sp, #108] @ 4-byte Reload
+; BE-I64-NEXT: mov r5, r1
+; BE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEXT: add lr, sp, #112
+; BE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I64-NEXT: vorr d0, d9, d9
+; BE-I64-NEXT: vmov.32 d11[1], r0
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vorr d0, d8, d8
+; BE-I64-NEXT: mov r7, r1
+; BE-I64-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEXT: vmov.32 d10[1], r9
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: add lr, sp, #8
+; BE-I64-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT: add lr, sp, #48
+; BE-I64-NEXT: vmov.32 d17[1], r10
+; BE-I64-NEXT: vmov.32 d16[1], r11
+; BE-I64-NEXT: vorr q12, q8, q8
+; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT: add lr, sp, #152
+; BE-I64-NEXT: vmov.32 d17[1], r8
+; BE-I64-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I64-NEXT: add lr, sp, #24
+; BE-I64-NEXT: vmov.32 d13[1], r7
+; BE-I64-NEXT: vmov.32 d16[1], r6
+; BE-I64-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload
+; BE-I64-NEXT: add lr, sp, #64
+; BE-I64-NEXT: vorr q13, q8, q8
+; BE-I64-NEXT: vmov.32 d12[1], r1
+; BE-I64-NEXT: ldr r1, [sp, #132] @ 4-byte Reload
+; BE-I64-NEXT: vrev64.32 q8, q5
+; BE-I64-NEXT: mov r0, r1
+; BE-I64-NEXT: vldmia lr, {d22, d23} @ 16-byte Reload
+; BE-I64-NEXT: vrev64.32 q9, q9
+; BE-I64-NEXT: vrev64.32 q10, q10
+; BE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEXT: vst1.64 {d18, d19}, [r0:128]!
+; BE-I64-NEXT: vrev64.32 q11, q11
+; BE-I64-NEXT: vmov.32 d15[1], r4
+; BE-I64-NEXT: vst1.64 {d20, d21}, [r0:128]!
+; BE-I64-NEXT: vrev64.32 q15, q6
+; BE-I64-NEXT: vmov.32 d14[1], r5
+; BE-I64-NEXT: vrev64.32 q12, q12
+; BE-I64-NEXT: vst1.64 {d22, d23}, [r0:128]
+; BE-I64-NEXT: add r0, r1, #64
+; BE-I64-NEXT: vrev64.32 q13, q13
+; BE-I64-NEXT: vst1.64 {d30, d31}, [r0:128]!
+; BE-I64-NEXT: vst1.64 {d24, d25}, [r0:128]!
+; BE-I64-NEXT: vrev64.32 q14, q7
+; BE-I64-NEXT: vst1.64 {d26, d27}, [r0:128]!
+; BE-I64-NEXT: vst1.64 {d28, d29}, [r0:128]
+; BE-I64-NEXT: add sp, sp, #168
+; BE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT: add sp, sp, #4
+; BE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I32-NEON-LABEL: lrint_v16f64:
+; BE-I32-NEON: @ %bb.0:
+; BE-I32-NEON-NEXT: .save {r4, r5, r6, lr}
+; BE-I32-NEON-NEXT: push {r4, r5, r6, lr}
+; BE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT: .pad #128
+; BE-I32-NEON-NEXT: sub sp, sp, #128
+; BE-I32-NEON-NEXT: add lr, sp, #64
+; BE-I32-NEON-NEXT: add r0, sp, #240
+; BE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I32-NEON-NEXT: add r0, sp, #224
+; BE-I32-NEON-NEXT: vorr q6, q3, q3
+; BE-I32-NEON-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill
+; BE-I32-NEON-NEXT: add lr, sp, #16
+; BE-I32-NEON-NEXT: vorr q5, q1, q1
+; BE-I32-NEON-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill
+; BE-I32-NEON-NEXT: add lr, sp, #32
+; BE-I32-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; BE-I32-NEON-NEXT: add lr, sp, #80
+; BE-I32-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEON-NEXT: add lr, sp, #112
+; BE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I32-NEON-NEXT: add r0, sp, #256
+; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEON-NEXT: add lr, sp, #96
+; BE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I32-NEON-NEXT: add r0, sp, #208
+; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEON-NEXT: add lr, sp, #48
+; BE-I32-NEON-NEXT: vld1.64 {d14, d15}, [r0]
+; BE-I32-NEON-NEXT: vstmia sp, {d6, d7} @ 16-byte Spill
+; BE-I32-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: vorr d0, d10, d10
+; BE-I32-NEON-NEXT: vmov.32 d8[0], r0
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: vorr d0, d12, d12
+; BE-I32-NEON-NEXT: vmov.32 d9[0], r0
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: vorr d0, d14, d14
+; BE-I32-NEON-NEXT: vmov.32 d11[0], r0
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: add lr, sp, #80
+; BE-I32-NEON-NEXT: mov r4, r0
+; BE-I32-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-I32-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: add lr, sp, #64
+; BE-I32-NEON-NEXT: vmov.32 d12[0], r0
+; BE-I32-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-I32-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: add lr, sp, #112
+; BE-I32-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I32-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-I32-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; BE-I32-NEON-NEXT: vmov.32 d14[0], r4
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: vldmia sp, {d16, d17} @ 16-byte Reload
+; BE-I32-NEON-NEXT: vorr d0, d17, d17
+; BE-I32-NEON-NEXT: vmov.32 d15[0], r0
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: add lr, sp, #96
+; BE-I32-NEON-NEXT: vmov.32 d11[1], r0
+; BE-I32-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-I32-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: add lr, sp, #64
+; BE-I32-NEON-NEXT: vmov.32 d13[0], r0
+; BE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEON-NEXT: vorr d0, d17, d17
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: add lr, sp, #16
+; BE-I32-NEON-NEXT: vmov.32 d10[1], r0
+; BE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEON-NEXT: vorr d0, d17, d17
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: add lr, sp, #32
+; BE-I32-NEON-NEXT: vmov.32 d9[1], r0
+; BE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEON-NEXT: vorr d0, d17, d17
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: add lr, sp, #96
+; BE-I32-NEON-NEXT: vmov.32 d8[1], r0
+; BE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEON-NEXT: vorr d0, d17, d17
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: add lr, sp, #112
+; BE-I32-NEON-NEXT: vmov.32 d13[1], r0
+; BE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEON-NEXT: vorr d0, d17, d17
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: add lr, sp, #48
+; BE-I32-NEON-NEXT: vmov.32 d15[1], r0
+; BE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEON-NEXT: vorr d0, d17, d17
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: add lr, sp, #80
+; BE-I32-NEON-NEXT: vmov.32 d14[1], r0
+; BE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEON-NEXT: vorr d0, d17, d17
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: vmov.32 d12[1], r0
+; BE-I32-NEON-NEXT: vrev64.32 q0, q4
+; BE-I32-NEON-NEXT: vrev64.32 q1, q5
+; BE-I32-NEON-NEXT: vrev64.32 q2, q7
+; BE-I32-NEON-NEXT: vrev64.32 q3, q6
+; BE-I32-NEON-NEXT: add sp, sp, #128
+; BE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT: pop {r4, r5, r6, pc}
+;
+; BE-I64-NEON-LABEL: lrint_v16f64:
+; BE-I64-NEON: @ %bb.0:
+; BE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEON-NEXT: .pad #4
+; BE-I64-NEON-NEXT: sub sp, sp, #4
+; BE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT: .pad #168
+; BE-I64-NEON-NEXT: sub sp, sp, #168
+; BE-I64-NEON-NEXT: add lr, sp, #64
+; BE-I64-NEON-NEXT: str r0, [sp, #132] @ 4-byte Spill
+; BE-I64-NEON-NEXT: add r0, sp, #304
+; BE-I64-NEON-NEXT: vorr q4, q3, q3
+; BE-I64-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; BE-I64-NEON-NEXT: add lr, sp, #48
+; BE-I64-NEON-NEXT: vorr d0, d1, d1
+; BE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I64-NEON-NEXT: add r0, sp, #320
+; BE-I64-NEON-NEXT: vorr q6, q2, q2
+; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT: add lr, sp, #88
+; BE-I64-NEON-NEXT: vorr q7, q1, q1
+; BE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I64-NEON-NEXT: add r0, sp, #272
+; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT: add lr, sp, #112
+; BE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I64-NEON-NEXT: add r0, sp, #288
+; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT: add lr, sp, #24
+; BE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vorr d0, d14, d14
+; BE-I64-NEON-NEXT: add lr, sp, #136
+; BE-I64-NEON-NEXT: vmov.32 d17[0], r0
+; BE-I64-NEON-NEXT: str r1, [sp, #108] @ 4-byte Spill
+; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vorr d0, d15, d15
+; BE-I64-NEON-NEXT: str r1, [sp, #84] @ 4-byte Spill
+; BE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vorr d0, d12, d12
+; BE-I64-NEON-NEXT: add lr, sp, #152
+; BE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT: str r1, [sp, #44] @ 4-byte Spill
+; BE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vorr d0, d13, d13
+; BE-I64-NEON-NEXT: mov r6, r1
+; BE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vorr d0, d8, d8
+; BE-I64-NEON-NEXT: mov r4, r1
+; BE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vorr d0, d9, d9
+; BE-I64-NEON-NEXT: mov r7, r1
+; BE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: add lr, sp, #64
+; BE-I64-NEON-NEXT: mov r5, r1
+; BE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-I64-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: add lr, sp, #136
+; BE-I64-NEON-NEXT: mov r9, r1
+; BE-I64-NEON-NEXT: vmov.32 d13[1], r5
+; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT: add lr, sp, #24
+; BE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I64-NEON-NEXT: vorr d0, d9, d9
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vorr d0, d8, d8
+; BE-I64-NEON-NEXT: vmov.32 d12[1], r7
+; BE-I64-NEON-NEXT: add lr, sp, #64
+; BE-I64-NEON-NEXT: mov r10, r1
+; BE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT: add lr, sp, #8
+; BE-I64-NEON-NEXT: mov r11, r1
+; BE-I64-NEON-NEXT: vmov.32 d11[1], r4
+; BE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I64-NEON-NEXT: add lr, sp, #48
+; BE-I64-NEON-NEXT: vorr q6, q5, q5
+; BE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I64-NEON-NEXT: vorr d0, d9, d9
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vorr d0, d8, d8
+; BE-I64-NEON-NEXT: vmov.32 d12[1], r6
+; BE-I64-NEON-NEXT: add lr, sp, #24
+; BE-I64-NEON-NEXT: mov r8, r1
+; BE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT: add lr, sp, #48
+; BE-I64-NEON-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
+; BE-I64-NEON-NEXT: mov r6, r1
+; BE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEON-NEXT: add lr, sp, #152
+; BE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I64-NEON-NEXT: add lr, sp, #88
+; BE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-I64-NEON-NEXT: vorr d0, d13, d13
+; BE-I64-NEON-NEXT: vmov.32 d9[1], r0
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT: ldr r0, [sp, #84] @ 4-byte Reload
+; BE-I64-NEON-NEXT: vorr d0, d12, d12
+; BE-I64-NEON-NEXT: add lr, sp, #152
+; BE-I64-NEON-NEXT: mov r4, r1
+; BE-I64-NEON-NEXT: vmov.32 d8[1], r0
+; BE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: add lr, sp, #136
+; BE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT: ldr r0, [sp, #108] @ 4-byte Reload
+; BE-I64-NEON-NEXT: mov r5, r1
+; BE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEON-NEXT: add lr, sp, #112
+; BE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I64-NEON-NEXT: vorr d0, d9, d9
+; BE-I64-NEON-NEXT: vmov.32 d11[1], r0
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vorr d0, d8, d8
+; BE-I64-NEON-NEXT: mov r7, r1
+; BE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEON-NEXT: vmov.32 d10[1], r9
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: add lr, sp, #8
+; BE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT: add lr, sp, #48
+; BE-I64-NEON-NEXT: vmov.32 d17[1], r10
+; BE-I64-NEON-NEXT: vmov.32 d16[1], r11
+; BE-I64-NEON-NEXT: vorr q12, q8, q8
+; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT: add lr, sp, #152
+; BE-I64-NEON-NEXT: vmov.32 d17[1], r8
+; BE-I64-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I64-NEON-NEXT: add lr, sp, #24
+; BE-I64-NEON-NEXT: vmov.32 d13[1], r7
+; BE-I64-NEON-NEXT: vmov.32 d16[1], r6
+; BE-I64-NEON-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload
+; BE-I64-NEON-NEXT: add lr, sp, #64
+; BE-I64-NEON-NEXT: vorr q13, q8, q8
+; BE-I64-NEON-NEXT: vmov.32 d12[1], r1
+; BE-I64-NEON-NEXT: ldr r1, [sp, #132] @ 4-byte Reload
+; BE-I64-NEON-NEXT: vrev64.32 q8, q5
+; BE-I64-NEON-NEXT: mov r0, r1
+; BE-I64-NEON-NEXT: vldmia lr, {d22, d23} @ 16-byte Reload
+; BE-I64-NEON-NEXT: vrev64.32 q9, q9
+; BE-I64-NEON-NEXT: vrev64.32 q10, q10
+; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]!
+; BE-I64-NEON-NEXT: vrev64.32 q11, q11
+; BE-I64-NEON-NEXT: vmov.32 d15[1], r4
+; BE-I64-NEON-NEXT: vst1.64 {d20, d21}, [r0:128]!
+; BE-I64-NEON-NEXT: vrev64.32 q15, q6
+; BE-I64-NEON-NEXT: vmov.32 d14[1], r5
+; BE-I64-NEON-NEXT: vrev64.32 q12, q12
+; BE-I64-NEON-NEXT: vst1.64 {d22, d23}, [r0:128]
+; BE-I64-NEON-NEXT: add r0, r1, #64
+; BE-I64-NEON-NEXT: vrev64.32 q13, q13
+; BE-I64-NEON-NEXT: vst1.64 {d30, d31}, [r0:128]!
+; BE-I64-NEON-NEXT: vst1.64 {d24, d25}, [r0:128]!
+; BE-I64-NEON-NEXT: vrev64.32 q14, q7
+; BE-I64-NEON-NEXT: vst1.64 {d26, d27}, [r0:128]!
+; BE-I64-NEON-NEXT: vst1.64 {d28, d29}, [r0:128]
+; BE-I64-NEON-NEXT: add sp, sp, #168
+; BE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT: add sp, sp, #4
+; BE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+ %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f64(<16 x double> %x)
+ ret <16 x iXLen> %a
+}
+declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f64(<16 x double>)
+
+define <32 x iXLen> @lrint_v32f64(<32 x double> %x) {
+; LE-I32-LABEL: lrint_v32f64:
+; LE-I32: @ %bb.0:
+; LE-I32-NEXT: .save {r4, r5, r6, lr}
+; LE-I32-NEXT: push {r4, r5, r6, lr}
+; LE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT: .pad #160
+; LE-I32-NEXT: sub sp, sp, #160
+; LE-I32-NEXT: add lr, sp, #96
+; LE-I32-NEXT: mov r4, r0
+; LE-I32-NEXT: add r0, sp, #304
+; LE-I32-NEXT: vorr q6, q3, q3
+; LE-I32-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill
+; LE-I32-NEXT: add lr, sp, #64
+; LE-I32-NEXT: vorr q5, q1, q1
+; LE-I32-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-I32-NEXT: add lr, sp, #48
+; LE-I32-NEXT: vorr d0, d4, d4
+; LE-I32-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I32-NEXT: add r0, sp, #352
+; LE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEXT: add lr, sp, #16
+; LE-I32-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I32-NEXT: add r0, sp, #272
+; LE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEXT: add lr, sp, #112
+; LE-I32-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I32-NEXT: add r0, sp, #288
+; LE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEXT: add lr, sp, #80
+; LE-I32-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I32-NEXT: add r0, sp, #336
+; LE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEXT: add lr, sp, #144
+; LE-I32-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I32-NEXT: add r0, sp, #256
+; LE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEXT: add lr, sp, #128
+; LE-I32-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I32-NEXT: add r0, sp, #320
+; LE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEXT: add lr, sp, #32
+; LE-I32-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: vorr d0, d12, d12
+; LE-I32-NEXT: vmov.32 d14[0], r0
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: vorr d0, d10, d10
+; LE-I32-NEXT: vmov.32 d15[0], r0
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: vorr d0, d13, d13
+; LE-I32-NEXT: vmov.32 d9[0], r0
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: vorr d0, d11, d11
+; LE-I32-NEXT: vmov.32 d15[1], r0
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: add lr, sp, #96
+; LE-I32-NEXT: vorr q5, q4, q4
+; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT: vorr d0, d17, d17
+; LE-I32-NEXT: vmov.32 d11[1], r0
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: vmov.32 d14[1], r0
+; LE-I32-NEXT: add lr, sp, #96
+; LE-I32-NEXT: add r0, sp, #416
+; LE-I32-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I32-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I32-NEXT: add lr, sp, #64
+; LE-I32-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I32-NEXT: vorr d0, d8, d8
+; LE-I32-NEXT: vstmia sp, {d16, d17} @ 16-byte Spill
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: add lr, sp, #32
+; LE-I32-NEXT: vmov.32 d10[0], r0
+; LE-I32-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload
+; LE-I32-NEXT: vorr q6, q5, q5
+; LE-I32-NEXT: vorr d0, d14, d14
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: vorr d0, d9, d9
+; LE-I32-NEXT: vmov.32 d11[0], r0
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: vorr d0, d15, d15
+; LE-I32-NEXT: vmov.32 d12[1], r0
+; LE-I32-NEXT: add lr, sp, #64
+; LE-I32-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: vmov.32 d11[1], r0
+; LE-I32-NEXT: add r0, sp, #400
+; LE-I32-NEXT: add lr, sp, #32
+; LE-I32-NEXT: vorr q6, q5, q5
+; LE-I32-NEXT: vldmia sp, {d10, d11} @ 16-byte Reload
+; LE-I32-NEXT: vorr d0, d10, d10
+; LE-I32-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: add lr, sp, #48
+; LE-I32-NEXT: vmov.32 d15[0], r0
+; LE-I32-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I32-NEXT: vorr d0, d8, d8
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: vorr d0, d11, d11
+; LE-I32-NEXT: vmov.32 d12[0], r0
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: vorr d0, d9, d9
+; LE-I32-NEXT: vmov.32 d15[1], r0
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: vmov.32 d12[1], r0
+; LE-I32-NEXT: add lr, sp, #48
+; LE-I32-NEXT: add r0, sp, #384
+; LE-I32-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I32-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I32-NEXT: add lr, sp, #32
+; LE-I32-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I32-NEXT: vorr d0, d10, d10
+; LE-I32-NEXT: vstmia sp, {d16, d17} @ 16-byte Spill
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: add lr, sp, #16
+; LE-I32-NEXT: vmov.32 d14[0], r0
+; LE-I32-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I32-NEXT: vorr d0, d8, d8
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: vorr d0, d11, d11
+; LE-I32-NEXT: vmov.32 d13[0], r0
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: vorr d0, d9, d9
+; LE-I32-NEXT: vmov.32 d14[1], r0
+; LE-I32-NEXT: add lr, sp, #32
+; LE-I32-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: vorr q7, q6, q6
+; LE-I32-NEXT: vldmia sp, {d10, d11} @ 16-byte Reload
+; LE-I32-NEXT: vorr d0, d10, d10
+; LE-I32-NEXT: vmov.32 d15[1], r0
+; LE-I32-NEXT: add r0, sp, #368
+; LE-I32-NEXT: vld1.64 {d12, d13}, [r0]
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: add lr, sp, #144
+; LE-I32-NEXT: vmov.32 d9[0], r0
+; LE-I32-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I32-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: vorr d0, d11, d11
+; LE-I32-NEXT: vmov.32 d14[0], r0
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: vorr d0, d12, d12
+; LE-I32-NEXT: vmov.32 d9[1], r0
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: add lr, sp, #144
+; LE-I32-NEXT: vmov.32 d8[0], r0
+; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT: vorr d0, d17, d17
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: vmov.32 d14[1], r0
+; LE-I32-NEXT: add r0, sp, #240
+; LE-I32-NEXT: vorr d0, d13, d13
+; LE-I32-NEXT: add lr, sp, #144
+; LE-I32-NEXT: vld1.64 {d10, d11}, [r0]
+; LE-I32-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I32-NEXT: vstmia sp, {d10, d11} @ 16-byte Spill
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: vorr d0, d10, d10
+; LE-I32-NEXT: vmov.32 d8[1], r0
+; LE-I32-NEXT: add lr, sp, #16
+; LE-I32-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: add lr, sp, #80
+; LE-I32-NEXT: vmov.32 d8[0], r0
+; LE-I32-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I32-NEXT: vorr d0, d12, d12
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: add lr, sp, #112
+; LE-I32-NEXT: vmov.32 d11[0], r0
+; LE-I32-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload
+; LE-I32-NEXT: vorr d0, d14, d14
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: vorr d0, d13, d13
+; LE-I32-NEXT: vmov.32 d10[0], r0
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: add lr, sp, #128
+; LE-I32-NEXT: vmov.32 d11[1], r0
+; LE-I32-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I32-NEXT: vorr d0, d12, d12
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: vorr d0, d15, d15
+; LE-I32-NEXT: vmov.32 d9[0], r0
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: vorr d0, d13, d13
+; LE-I32-NEXT: vmov.32 d10[1], r0
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: vldmia sp, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT: vorr d0, d17, d17
+; LE-I32-NEXT: vmov.32 d9[1], r0
+; LE-I32-NEXT: bl lrint
+; LE-I32-NEXT: add lr, sp, #64
+; LE-I32-NEXT: vmov.32 d8[1], r0
+; LE-I32-NEXT: mov r0, r4
+; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT: add lr, sp, #96
+; LE-I32-NEXT: vst1.32 {d16, d17}, [r0:128]!
+; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT: add lr, sp, #48
+; LE-I32-NEXT: vst1.32 {d16, d17}, [r0:128]!
+; LE-I32-NEXT: vst1.32 {d8, d9}, [r0:128]!
+; LE-I32-NEXT: vst1.64 {d10, d11}, [r0:128]
+; LE-I32-NEXT: add r0, r4, #64
+; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT: add lr, sp, #144
+; LE-I32-NEXT: vst1.32 {d16, d17}, [r0:128]!
+; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT: add lr, sp, #16
+; LE-I32-NEXT: vst1.32 {d16, d17}, [r0:128]!
+; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT: add lr, sp, #32
+; LE-I32-NEXT: vst1.32 {d16, d17}, [r0:128]!
+; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT: vst1.64 {d16, d17}, [r0:128]
+; LE-I32-NEXT: add sp, sp, #160
+; LE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT: pop {r4, r5, r6, pc}
+;
+; LE-I64-LABEL: lrint_v32f64:
+; LE-I64: @ %bb.0:
+; LE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT: .pad #4
+; LE-I64-NEXT: sub sp, sp, #4
+; LE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: .pad #208
+; LE-I64-NEXT: sub sp, sp, #208
+; LE-I64-NEXT: add lr, sp, #72
+; LE-I64-NEXT: str r0, [sp, #156] @ 4-byte Spill
+; LE-I64-NEXT: add r0, sp, #456
+; LE-I64-NEXT: vorr q4, q0, q0
+; LE-I64-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #56
+; LE-I64-NEXT: vorr d0, d7, d7
+; LE-I64-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #104
+; LE-I64-NEXT: vorr q5, q2, q2
+; LE-I64-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I64-NEXT: add r0, sp, #344
+; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #192
+; LE-I64-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I64-NEXT: add r0, sp, #376
+; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #160
+; LE-I64-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I64-NEXT: add r0, sp, #360
+; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #136
+; LE-I64-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I64-NEXT: add r0, sp, #440
+; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #176
+; LE-I64-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: vorr d0, d10, d10
+; LE-I64-NEXT: str r1, [sp, #120] @ 4-byte Spill
+; LE-I64-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: vorr d0, d11, d11
+; LE-I64-NEXT: mov r10, r1
+; LE-I64-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: vorr d0, d8, d8
+; LE-I64-NEXT: add lr, sp, #88
+; LE-I64-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEXT: mov r11, r1
+; LE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: vorr d0, d9, d9
+; LE-I64-NEXT: mov r9, r1
+; LE-I64-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: add lr, sp, #40
+; LE-I64-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #56
+; LE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEXT: vorr d0, d10, d10
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: vorr d0, d11, d11
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: add lr, sp, #72
+; LE-I64-NEXT: mov r7, r1
+; LE-I64-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I64-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEXT: add lr, sp, #56
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: vmov.32 d9[1], r7
+; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #176
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: vorr d0, d17, d17
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: vmov.32 d8[1], r4
+; LE-I64-NEXT: add lr, sp, #72
+; LE-I64-NEXT: mov r7, r1
+; LE-I64-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #104
+; LE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEXT: vorr d0, d9, d9
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: add lr, sp, #40
+; LE-I64-NEXT: vorr d0, d8, d8
+; LE-I64-NEXT: mov r8, r1
+; LE-I64-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEXT: vmov.32 d11[1], r6
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: add lr, sp, #176
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I64-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; LE-I64-NEXT: vmov.32 d10[1], r9
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: add lr, sp, #88
+; LE-I64-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEXT: ldr r0, [sp, #120] @ 4-byte Reload
+; LE-I64-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #56
+; LE-I64-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #24
+; LE-I64-NEXT: vmov.32 d19[1], r0
+; LE-I64-NEXT: add r0, sp, #408
+; LE-I64-NEXT: ldr r2, [sp, #156] @ 4-byte Reload
+; LE-I64-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I64-NEXT: vmov.32 d13[1], r7
+; LE-I64-NEXT: mov r0, r2
+; LE-I64-NEXT: vmov.32 d12[1], r1
+; LE-I64-NEXT: add r1, sp, #488
+; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #40
+; LE-I64-NEXT: vld1.64 {d16, d17}, [r1]
+; LE-I64-NEXT: add r1, sp, #472
+; LE-I64-NEXT: vst1.64 {d10, d11}, [r0:128]!
+; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #72
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #56
+; LE-I64-NEXT: vmov.32 d21[1], r11
+; LE-I64-NEXT: vmov.32 d20[1], r10
+; LE-I64-NEXT: add r10, r2, #192
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; LE-I64-NEXT: vld1.64 {d16, d17}, [r1]
+; LE-I64-NEXT: add r1, sp, #392
+; LE-I64-NEXT: vmov.32 d18[1], r5
+; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #72
+; LE-I64-NEXT: vst1.64 {d20, d21}, [r0:128]!
+; LE-I64-NEXT: vld1.64 {d16, d17}, [r1]
+; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #104
+; LE-I64-NEXT: vst1.64 {d18, d19}, [r0:128]
+; LE-I64-NEXT: add r0, sp, #312
+; LE-I64-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I64-NEXT: add r0, sp, #328
+; LE-I64-NEXT: vmov.32 d15[1], r8
+; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #120
+; LE-I64-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I64-NEXT: add r0, sp, #424
+; LE-I64-NEXT: vmov.32 d14[1], r4
+; LE-I64-NEXT: vst1.64 {d12, d13}, [r10:128]!
+; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #176
+; LE-I64-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I64-NEXT: vst1.64 {d14, d15}, [r10:128]!
+; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #192
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: vorr d0, d17, d17
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: add lr, sp, #136
+; LE-I64-NEXT: mov r9, r1
+; LE-I64-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEXT: vorr d0, d10, d10
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: vorr d0, d11, d11
+; LE-I64-NEXT: mov r8, r1
+; LE-I64-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: add lr, sp, #160
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEXT: vorr d0, d10, d10
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: vorr d0, d11, d11
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: add lr, sp, #192
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I64-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEXT: add lr, sp, #192
+; LE-I64-NEXT: mov r11, r1
+; LE-I64-NEXT: vmov.32 d15[1], r4
+; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #176
+; LE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEXT: vorr d0, d9, d9
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: vorr d0, d8, d8
+; LE-I64-NEXT: vmov.32 d14[1], r6
+; LE-I64-NEXT: add lr, sp, #136
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEXT: add lr, sp, #176
+; LE-I64-NEXT: mov r7, r1
+; LE-I64-NEXT: vmov.32 d13[1], r5
+; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #24
+; LE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEXT: vorr d0, d9, d9
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: vorr d0, d8, d8
+; LE-I64-NEXT: vmov.32 d12[1], r8
+; LE-I64-NEXT: add lr, sp, #88
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: add lr, sp, #192
+; LE-I64-NEXT: str r1, [sp, #24] @ 4-byte Spill
+; LE-I64-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #40
+; LE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEXT: vorr d0, d11, d11
+; LE-I64-NEXT: vmov.32 d9[1], r9
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: vorr d0, d10, d10
+; LE-I64-NEXT: vmov.32 d8[1], r11
+; LE-I64-NEXT: add lr, sp, #192
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEXT: add lr, sp, #160
+; LE-I64-NEXT: str r1, [sp, #40] @ 4-byte Spill
+; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #176
+; LE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #56
+; LE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEXT: vorr d0, d9, d9
+; LE-I64-NEXT: vmov.32 d11[1], r4
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: vorr d0, d8, d8
+; LE-I64-NEXT: vmov.32 d10[1], r7
+; LE-I64-NEXT: add lr, sp, #176
+; LE-I64-NEXT: mov r8, r1
+; LE-I64-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEXT: add lr, sp, #8
+; LE-I64-NEXT: mov r11, r1
+; LE-I64-NEXT: vmov.32 d15[1], r5
+; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #72
+; LE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEXT: vorr d0, d9, d9
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEXT: ldr r0, [sp, #24] @ 4-byte Reload
+; LE-I64-NEXT: vorr d0, d8, d8
+; LE-I64-NEXT: add lr, sp, #56
+; LE-I64-NEXT: mov r9, r1
+; LE-I64-NEXT: vmov.32 d14[1], r0
+; LE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEXT: add lr, sp, #72
+; LE-I64-NEXT: mov r7, r1
+; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #160
+; LE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #104
+; LE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEXT: vorr d0, d9, d9
+; LE-I64-NEXT: vmov.32 d13[1], r6
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEXT: ldr r0, [sp, #40] @ 4-byte Reload
+; LE-I64-NEXT: vorr d0, d8, d8
+; LE-I64-NEXT: add lr, sp, #160
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: vmov.32 d12[1], r0
+; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: add lr, sp, #8
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #120
+; LE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEXT: vorr d0, d9, d9
+; LE-I64-NEXT: vmov.32 d13[1], r8
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: vorr d0, d8, d8
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEXT: vmov.32 d12[1], r11
+; LE-I64-NEXT: bl lrint
+; LE-I64-NEXT: add lr, sp, #72
+; LE-I64-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #160
+; LE-I64-NEXT: vmov.32 d17[1], r9
+; LE-I64-NEXT: vmov.32 d16[1], r7
+; LE-I64-NEXT: vst1.64 {d12, d13}, [r10:128]!
+; LE-I64-NEXT: vorr q9, q8, q8
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #136
+; LE-I64-NEXT: vmov.32 d15[1], r5
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r10:128]
+; LE-I64-NEXT: vmov.32 d14[1], r1
+; LE-I64-NEXT: ldr r1, [sp, #156] @ 4-byte Reload
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: add r0, r1, #128
+; LE-I64-NEXT: add lr, sp, #56
+; LE-I64-NEXT: vmov.32 d11[1], r6
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; LE-I64-NEXT: vst1.64 {d18, d19}, [r0:128]!
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #176
+; LE-I64-NEXT: vmov.32 d10[1], r4
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #192
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]
+; LE-I64-NEXT: add r0, r1, #64
+; LE-I64-NEXT: vst1.64 {d10, d11}, [r0:128]!
+; LE-I64-NEXT: vst1.64 {d14, d15}, [r0:128]!
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #88
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]
+; LE-I64-NEXT: add sp, sp, #208
+; LE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: add sp, sp, #4
+; LE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; LE-I32-NEON-LABEL: lrint_v32f64:
+; LE-I32-NEON: @ %bb.0:
+; LE-I32-NEON-NEXT: .save {r4, r5, r6, lr}
+; LE-I32-NEON-NEXT: push {r4, r5, r6, lr}
+; LE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT: .pad #160
+; LE-I32-NEON-NEXT: sub sp, sp, #160
+; LE-I32-NEON-NEXT: add lr, sp, #96
+; LE-I32-NEON-NEXT: mov r4, r0
+; LE-I32-NEON-NEXT: add r0, sp, #304
+; LE-I32-NEON-NEXT: vorr q6, q3, q3
+; LE-I32-NEON-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill
+; LE-I32-NEON-NEXT: add lr, sp, #64
+; LE-I32-NEON-NEXT: vorr q5, q1, q1
+; LE-I32-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-I32-NEON-NEXT: add lr, sp, #48
+; LE-I32-NEON-NEXT: vorr d0, d4, d4
+; LE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I32-NEON-NEXT: add r0, sp, #352
+; LE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEON-NEXT: add lr, sp, #16
+; LE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I32-NEON-NEXT: add r0, sp, #272
+; LE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEON-NEXT: add lr, sp, #112
+; LE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I32-NEON-NEXT: add r0, sp, #288
+; LE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEON-NEXT: add lr, sp, #80
+; LE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I32-NEON-NEXT: add r0, sp, #336
+; LE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEON-NEXT: add lr, sp, #144
+; LE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I32-NEON-NEXT: add r0, sp, #256
+; LE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEON-NEXT: add lr, sp, #128
+; LE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I32-NEON-NEXT: add r0, sp, #320
+; LE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEON-NEXT: add lr, sp, #32
+; LE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: vorr d0, d12, d12
+; LE-I32-NEON-NEXT: vmov.32 d14[0], r0
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: vorr d0, d10, d10
+; LE-I32-NEON-NEXT: vmov.32 d15[0], r0
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: vorr d0, d13, d13
+; LE-I32-NEON-NEXT: vmov.32 d9[0], r0
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: vorr d0, d11, d11
+; LE-I32-NEON-NEXT: vmov.32 d15[1], r0
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: add lr, sp, #96
+; LE-I32-NEON-NEXT: vorr q5, q4, q4
+; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT: vorr d0, d17, d17
+; LE-I32-NEON-NEXT: vmov.32 d11[1], r0
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: vmov.32 d14[1], r0
+; LE-I32-NEON-NEXT: add lr, sp, #96
+; LE-I32-NEON-NEXT: add r0, sp, #416
+; LE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I32-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I32-NEON-NEXT: add lr, sp, #64
+; LE-I32-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I32-NEON-NEXT: vorr d0, d8, d8
+; LE-I32-NEON-NEXT: vstmia sp, {d16, d17} @ 16-byte Spill
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: add lr, sp, #32
+; LE-I32-NEON-NEXT: vmov.32 d10[0], r0
+; LE-I32-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload
+; LE-I32-NEON-NEXT: vorr q6, q5, q5
+; LE-I32-NEON-NEXT: vorr d0, d14, d14
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: vorr d0, d9, d9
+; LE-I32-NEON-NEXT: vmov.32 d11[0], r0
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: vorr d0, d15, d15
+; LE-I32-NEON-NEXT: vmov.32 d12[1], r0
+; LE-I32-NEON-NEXT: add lr, sp, #64
+; LE-I32-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: vmov.32 d11[1], r0
+; LE-I32-NEON-NEXT: add r0, sp, #400
+; LE-I32-NEON-NEXT: add lr, sp, #32
+; LE-I32-NEON-NEXT: vorr q6, q5, q5
+; LE-I32-NEON-NEXT: vldmia sp, {d10, d11} @ 16-byte Reload
+; LE-I32-NEON-NEXT: vorr d0, d10, d10
+; LE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: add lr, sp, #48
+; LE-I32-NEON-NEXT: vmov.32 d15[0], r0
+; LE-I32-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I32-NEON-NEXT: vorr d0, d8, d8
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: vorr d0, d11, d11
+; LE-I32-NEON-NEXT: vmov.32 d12[0], r0
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: vorr d0, d9, d9
+; LE-I32-NEON-NEXT: vmov.32 d15[1], r0
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: vmov.32 d12[1], r0
+; LE-I32-NEON-NEXT: add lr, sp, #48
+; LE-I32-NEON-NEXT: add r0, sp, #384
+; LE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I32-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I32-NEON-NEXT: add lr, sp, #32
+; LE-I32-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I32-NEON-NEXT: vorr d0, d10, d10
+; LE-I32-NEON-NEXT: vstmia sp, {d16, d17} @ 16-byte Spill
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: add lr, sp, #16
+; LE-I32-NEON-NEXT: vmov.32 d14[0], r0
+; LE-I32-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I32-NEON-NEXT: vorr d0, d8, d8
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: vorr d0, d11, d11
+; LE-I32-NEON-NEXT: vmov.32 d13[0], r0
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: vorr d0, d9, d9
+; LE-I32-NEON-NEXT: vmov.32 d14[1], r0
+; LE-I32-NEON-NEXT: add lr, sp, #32
+; LE-I32-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: vorr q7, q6, q6
+; LE-I32-NEON-NEXT: vldmia sp, {d10, d11} @ 16-byte Reload
+; LE-I32-NEON-NEXT: vorr d0, d10, d10
+; LE-I32-NEON-NEXT: vmov.32 d15[1], r0
+; LE-I32-NEON-NEXT: add r0, sp, #368
+; LE-I32-NEON-NEXT: vld1.64 {d12, d13}, [r0]
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: add lr, sp, #144
+; LE-I32-NEON-NEXT: vmov.32 d9[0], r0
+; LE-I32-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I32-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: vorr d0, d11, d11
+; LE-I32-NEON-NEXT: vmov.32 d14[0], r0
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: vorr d0, d12, d12
+; LE-I32-NEON-NEXT: vmov.32 d9[1], r0
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: add lr, sp, #144
+; LE-I32-NEON-NEXT: vmov.32 d8[0], r0
+; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT: vorr d0, d17, d17
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: vmov.32 d14[1], r0
+; LE-I32-NEON-NEXT: add r0, sp, #240
+; LE-I32-NEON-NEXT: vorr d0, d13, d13
+; LE-I32-NEON-NEXT: add lr, sp, #144
+; LE-I32-NEON-NEXT: vld1.64 {d10, d11}, [r0]
+; LE-I32-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I32-NEON-NEXT: vstmia sp, {d10, d11} @ 16-byte Spill
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: vorr d0, d10, d10
+; LE-I32-NEON-NEXT: vmov.32 d8[1], r0
+; LE-I32-NEON-NEXT: add lr, sp, #16
+; LE-I32-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: add lr, sp, #80
+; LE-I32-NEON-NEXT: vmov.32 d8[0], r0
+; LE-I32-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I32-NEON-NEXT: vorr d0, d12, d12
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: add lr, sp, #112
+; LE-I32-NEON-NEXT: vmov.32 d11[0], r0
+; LE-I32-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload
+; LE-I32-NEON-NEXT: vorr d0, d14, d14
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: vorr d0, d13, d13
+; LE-I32-NEON-NEXT: vmov.32 d10[0], r0
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: add lr, sp, #128
+; LE-I32-NEON-NEXT: vmov.32 d11[1], r0
+; LE-I32-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I32-NEON-NEXT: vorr d0, d12, d12
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: vorr d0, d15, d15
+; LE-I32-NEON-NEXT: vmov.32 d9[0], r0
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: vorr d0, d13, d13
+; LE-I32-NEON-NEXT: vmov.32 d10[1], r0
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: vldmia sp, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT: vorr d0, d17, d17
+; LE-I32-NEON-NEXT: vmov.32 d9[1], r0
+; LE-I32-NEON-NEXT: bl lrint
+; LE-I32-NEON-NEXT: add lr, sp, #64
+; LE-I32-NEON-NEXT: vmov.32 d8[1], r0
+; LE-I32-NEON-NEXT: mov r0, r4
+; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT: add lr, sp, #96
+; LE-I32-NEON-NEXT: vst1.32 {d16, d17}, [r0:128]!
+; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT: add lr, sp, #48
+; LE-I32-NEON-NEXT: vst1.32 {d16, d17}, [r0:128]!
+; LE-I32-NEON-NEXT: vst1.32 {d8, d9}, [r0:128]!
+; LE-I32-NEON-NEXT: vst1.64 {d10, d11}, [r0:128]
+; LE-I32-NEON-NEXT: add r0, r4, #64
+; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT: add lr, sp, #144
+; LE-I32-NEON-NEXT: vst1.32 {d16, d17}, [r0:128]!
+; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT: add lr, sp, #16
+; LE-I32-NEON-NEXT: vst1.32 {d16, d17}, [r0:128]!
+; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT: add lr, sp, #32
+; LE-I32-NEON-NEXT: vst1.32 {d16, d17}, [r0:128]!
+; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]
+; LE-I32-NEON-NEXT: add sp, sp, #160
+; LE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT: pop {r4, r5, r6, pc}
+;
+; LE-I64-NEON-LABEL: lrint_v32f64:
+; LE-I64-NEON: @ %bb.0:
+; LE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEON-NEXT: .pad #4
+; LE-I64-NEON-NEXT: sub sp, sp, #4
+; LE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT: .pad #208
+; LE-I64-NEON-NEXT: sub sp, sp, #208
+; LE-I64-NEON-NEXT: add lr, sp, #72
+; LE-I64-NEON-NEXT: str r0, [sp, #156] @ 4-byte Spill
+; LE-I64-NEON-NEXT: add r0, sp, #456
+; LE-I64-NEON-NEXT: vorr q4, q0, q0
+; LE-I64-NEON-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #56
+; LE-I64-NEON-NEXT: vorr d0, d7, d7
+; LE-I64-NEON-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #104
+; LE-I64-NEON-NEXT: vorr q5, q2, q2
+; LE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I64-NEON-NEXT: add r0, sp, #344
+; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #192
+; LE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I64-NEON-NEXT: add r0, sp, #376
+; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #160
+; LE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I64-NEON-NEXT: add r0, sp, #360
+; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #136
+; LE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I64-NEON-NEXT: add r0, sp, #440
+; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #176
+; LE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: vorr d0, d10, d10
+; LE-I64-NEON-NEXT: str r1, [sp, #120] @ 4-byte Spill
+; LE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: vorr d0, d11, d11
+; LE-I64-NEON-NEXT: mov r10, r1
+; LE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: vorr d0, d8, d8
+; LE-I64-NEON-NEXT: add lr, sp, #88
+; LE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEON-NEXT: mov r11, r1
+; LE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: vorr d0, d9, d9
+; LE-I64-NEON-NEXT: mov r9, r1
+; LE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: add lr, sp, #40
+; LE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEON-NEXT: mov r6, r1
+; LE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #56
+; LE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vorr d0, d10, d10
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: vorr d0, d11, d11
+; LE-I64-NEON-NEXT: mov r4, r1
+; LE-I64-NEON-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: add lr, sp, #72
+; LE-I64-NEON-NEXT: mov r7, r1
+; LE-I64-NEON-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I64-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT: add lr, sp, #56
+; LE-I64-NEON-NEXT: mov r5, r1
+; LE-I64-NEON-NEXT: vmov.32 d9[1], r7
+; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #176
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vorr d0, d17, d17
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: vmov.32 d8[1], r4
+; LE-I64-NEON-NEXT: add lr, sp, #72
+; LE-I64-NEON-NEXT: mov r7, r1
+; LE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #104
+; LE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vorr d0, d9, d9
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: add lr, sp, #40
+; LE-I64-NEON-NEXT: vorr d0, d8, d8
+; LE-I64-NEON-NEXT: mov r8, r1
+; LE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vmov.32 d11[1], r6
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: add lr, sp, #176
+; LE-I64-NEON-NEXT: mov r4, r1
+; LE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I64-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; LE-I64-NEON-NEXT: vmov.32 d10[1], r9
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: add lr, sp, #88
+; LE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT: ldr r0, [sp, #120] @ 4-byte Reload
+; LE-I64-NEON-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #56
+; LE-I64-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #24
+; LE-I64-NEON-NEXT: vmov.32 d19[1], r0
+; LE-I64-NEON-NEXT: add r0, sp, #408
+; LE-I64-NEON-NEXT: ldr r2, [sp, #156] @ 4-byte Reload
+; LE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I64-NEON-NEXT: vmov.32 d13[1], r7
+; LE-I64-NEON-NEXT: mov r0, r2
+; LE-I64-NEON-NEXT: vmov.32 d12[1], r1
+; LE-I64-NEON-NEXT: add r1, sp, #488
+; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #40
+; LE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r1]
+; LE-I64-NEON-NEXT: add r1, sp, #472
+; LE-I64-NEON-NEXT: vst1.64 {d10, d11}, [r0:128]!
+; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #72
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #56
+; LE-I64-NEON-NEXT: vmov.32 d21[1], r11
+; LE-I64-NEON-NEXT: vmov.32 d20[1], r10
+; LE-I64-NEON-NEXT: add r10, r2, #192
+; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; LE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r1]
+; LE-I64-NEON-NEXT: add r1, sp, #392
+; LE-I64-NEON-NEXT: vmov.32 d18[1], r5
+; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #72
+; LE-I64-NEON-NEXT: vst1.64 {d20, d21}, [r0:128]!
+; LE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r1]
+; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #104
+; LE-I64-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]
+; LE-I64-NEON-NEXT: add r0, sp, #312
+; LE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I64-NEON-NEXT: add r0, sp, #328
+; LE-I64-NEON-NEXT: vmov.32 d15[1], r8
+; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #120
+; LE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I64-NEON-NEXT: add r0, sp, #424
+; LE-I64-NEON-NEXT: vmov.32 d14[1], r4
+; LE-I64-NEON-NEXT: vst1.64 {d12, d13}, [r10:128]!
+; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #176
+; LE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; LE-I64-NEON-NEXT: vst1.64 {d14, d15}, [r10:128]!
+; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #192
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vorr d0, d17, d17
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: add lr, sp, #136
+; LE-I64-NEON-NEXT: mov r9, r1
+; LE-I64-NEON-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vorr d0, d10, d10
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: vorr d0, d11, d11
+; LE-I64-NEON-NEXT: mov r8, r1
+; LE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: add lr, sp, #160
+; LE-I64-NEON-NEXT: mov r5, r1
+; LE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vorr d0, d10, d10
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: vorr d0, d11, d11
+; LE-I64-NEON-NEXT: mov r6, r1
+; LE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: add lr, sp, #192
+; LE-I64-NEON-NEXT: mov r4, r1
+; LE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I64-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT: add lr, sp, #192
+; LE-I64-NEON-NEXT: mov r11, r1
+; LE-I64-NEON-NEXT: vmov.32 d15[1], r4
+; LE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #176
+; LE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vorr d0, d9, d9
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: vorr d0, d8, d8
+; LE-I64-NEON-NEXT: vmov.32 d14[1], r6
+; LE-I64-NEON-NEXT: add lr, sp, #136
+; LE-I64-NEON-NEXT: mov r4, r1
+; LE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT: add lr, sp, #176
+; LE-I64-NEON-NEXT: mov r7, r1
+; LE-I64-NEON-NEXT: vmov.32 d13[1], r5
+; LE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #24
+; LE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vorr d0, d9, d9
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: vorr d0, d8, d8
+; LE-I64-NEON-NEXT: vmov.32 d12[1], r8
+; LE-I64-NEON-NEXT: add lr, sp, #88
+; LE-I64-NEON-NEXT: mov r5, r1
+; LE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: add lr, sp, #192
+; LE-I64-NEON-NEXT: str r1, [sp, #24] @ 4-byte Spill
+; LE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #40
+; LE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vorr d0, d11, d11
+; LE-I64-NEON-NEXT: vmov.32 d9[1], r9
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: vorr d0, d10, d10
+; LE-I64-NEON-NEXT: vmov.32 d8[1], r11
+; LE-I64-NEON-NEXT: add lr, sp, #192
+; LE-I64-NEON-NEXT: mov r6, r1
+; LE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT: add lr, sp, #160
+; LE-I64-NEON-NEXT: str r1, [sp, #40] @ 4-byte Spill
+; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #176
+; LE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #56
+; LE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vorr d0, d9, d9
+; LE-I64-NEON-NEXT: vmov.32 d11[1], r4
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: vorr d0, d8, d8
+; LE-I64-NEON-NEXT: vmov.32 d10[1], r7
+; LE-I64-NEON-NEXT: add lr, sp, #176
+; LE-I64-NEON-NEXT: mov r8, r1
+; LE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT: add lr, sp, #8
+; LE-I64-NEON-NEXT: mov r11, r1
+; LE-I64-NEON-NEXT: vmov.32 d15[1], r5
+; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #72
+; LE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vorr d0, d9, d9
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEON-NEXT: ldr r0, [sp, #24] @ 4-byte Reload
+; LE-I64-NEON-NEXT: vorr d0, d8, d8
+; LE-I64-NEON-NEXT: add lr, sp, #56
+; LE-I64-NEON-NEXT: mov r9, r1
+; LE-I64-NEON-NEXT: vmov.32 d14[1], r0
+; LE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT: add lr, sp, #72
+; LE-I64-NEON-NEXT: mov r7, r1
+; LE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #160
+; LE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #104
+; LE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vorr d0, d9, d9
+; LE-I64-NEON-NEXT: vmov.32 d13[1], r6
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEON-NEXT: ldr r0, [sp, #40] @ 4-byte Reload
+; LE-I64-NEON-NEXT: vorr d0, d8, d8
+; LE-I64-NEON-NEXT: add lr, sp, #160
+; LE-I64-NEON-NEXT: mov r6, r1
+; LE-I64-NEON-NEXT: vmov.32 d12[1], r0
+; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: add lr, sp, #8
+; LE-I64-NEON-NEXT: mov r4, r1
+; LE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #120
+; LE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vorr d0, d9, d9
+; LE-I64-NEON-NEXT: vmov.32 d13[1], r8
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: vorr d0, d8, d8
+; LE-I64-NEON-NEXT: mov r5, r1
+; LE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEON-NEXT: vmov.32 d12[1], r11
+; LE-I64-NEON-NEXT: bl lrint
+; LE-I64-NEON-NEXT: add lr, sp, #72
+; LE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #160
+; LE-I64-NEON-NEXT: vmov.32 d17[1], r9
+; LE-I64-NEON-NEXT: vmov.32 d16[1], r7
+; LE-I64-NEON-NEXT: vst1.64 {d12, d13}, [r10:128]!
+; LE-I64-NEON-NEXT: vorr q9, q8, q8
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #136
+; LE-I64-NEON-NEXT: vmov.32 d15[1], r5
+; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r10:128]
+; LE-I64-NEON-NEXT: vmov.32 d14[1], r1
+; LE-I64-NEON-NEXT: ldr r1, [sp, #156] @ 4-byte Reload
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add r0, r1, #128
+; LE-I64-NEON-NEXT: add lr, sp, #56
+; LE-I64-NEON-NEXT: vmov.32 d11[1], r6
+; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; LE-I64-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]!
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #176
+; LE-I64-NEON-NEXT: vmov.32 d10[1], r4
+; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #192
+; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]
+; LE-I64-NEON-NEXT: add r0, r1, #64
+; LE-I64-NEON-NEXT: vst1.64 {d10, d11}, [r0:128]!
+; LE-I64-NEON-NEXT: vst1.64 {d14, d15}, [r0:128]!
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #88
+; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]
+; LE-I64-NEON-NEXT: add sp, sp, #208
+; LE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT: add sp, sp, #4
+; LE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I32-LABEL: lrint_v32f64:
+; BE-I32: @ %bb.0:
+; BE-I32-NEXT: .save {r4, r5, r6, lr}
+; BE-I32-NEXT: push {r4, r5, r6, lr}
+; BE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT: .pad #176
+; BE-I32-NEXT: sub sp, sp, #176
+; BE-I32-NEXT: add lr, sp, #128
+; BE-I32-NEXT: mov r4, r0
+; BE-I32-NEXT: add r0, sp, #336
+; BE-I32-NEXT: vorr q6, q3, q3
+; BE-I32-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill
+; BE-I32-NEXT: add lr, sp, #80
+; BE-I32-NEXT: vorr q5, q1, q1
+; BE-I32-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; BE-I32-NEXT: add lr, sp, #32
+; BE-I32-NEXT: vorr d0, d4, d4
+; BE-I32-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I32-NEXT: add r0, sp, #320
+; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT: add lr, sp, #160
+; BE-I32-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I32-NEXT: add r0, sp, #432
+; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT: add lr, sp, #112
+; BE-I32-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I32-NEXT: add r0, sp, #288
+; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT: add lr, sp, #96
+; BE-I32-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I32-NEXT: add r0, sp, #368
+; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT: add lr, sp, #48
+; BE-I32-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I32-NEXT: add r0, sp, #416
+; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT: add lr, sp, #144
+; BE-I32-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I32-NEXT: add r0, sp, #400
+; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT: add lr, sp, #64
+; BE-I32-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: vorr d0, d12, d12
+; BE-I32-NEXT: vmov.32 d14[0], r0
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: vorr d0, d10, d10
+; BE-I32-NEXT: vmov.32 d15[0], r0
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: vorr d0, d13, d13
+; BE-I32-NEXT: vmov.32 d9[0], r0
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: vorr d0, d11, d11
+; BE-I32-NEXT: vmov.32 d15[1], r0
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: add lr, sp, #128
+; BE-I32-NEXT: vorr q5, q4, q4
+; BE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEXT: vorr d0, d17, d17
+; BE-I32-NEXT: vmov.32 d11[1], r0
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: vmov.32 d14[1], r0
+; BE-I32-NEXT: add lr, sp, #128
+; BE-I32-NEXT: add r0, sp, #384
+; BE-I32-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I32-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I32-NEXT: add lr, sp, #16
+; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT: add lr, sp, #80
+; BE-I32-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I32-NEXT: vorr d0, d8, d8
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: add lr, sp, #64
+; BE-I32-NEXT: vmov.32 d10[0], r0
+; BE-I32-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload
+; BE-I32-NEXT: vorr d0, d14, d14
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: vorr d0, d9, d9
+; BE-I32-NEXT: vmov.32 d13[0], r0
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: vorr d0, d15, d15
+; BE-I32-NEXT: vmov.32 d10[1], r0
+; BE-I32-NEXT: add lr, sp, #80
+; BE-I32-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: add lr, sp, #16
+; BE-I32-NEXT: vmov.32 d13[1], r0
+; BE-I32-NEXT: add r0, sp, #272
+; BE-I32-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I32-NEXT: vorr d0, d10, d10
+; BE-I32-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I32-NEXT: vstmia sp, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: add lr, sp, #32
+; BE-I32-NEXT: vmov.32 d12[0], r0
+; BE-I32-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I32-NEXT: vorr d0, d8, d8
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: vorr d0, d11, d11
+; BE-I32-NEXT: vmov.32 d15[0], r0
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: vorr d0, d9, d9
+; BE-I32-NEXT: vmov.32 d12[1], r0
+; BE-I32-NEXT: add lr, sp, #64
+; BE-I32-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: vldmia sp, {d10, d11} @ 16-byte Reload
+; BE-I32-NEXT: add lr, sp, #16
+; BE-I32-NEXT: vmov.32 d15[1], r0
+; BE-I32-NEXT: add r0, sp, #256
+; BE-I32-NEXT: vorr d0, d10, d10
+; BE-I32-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: add lr, sp, #160
+; BE-I32-NEXT: vmov.32 d13[0], r0
+; BE-I32-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I32-NEXT: vorr d0, d8, d8
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: vorr d0, d11, d11
+; BE-I32-NEXT: vmov.32 d14[0], r0
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: vorr d0, d9, d9
+; BE-I32-NEXT: vmov.32 d13[1], r0
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: vmov.32 d14[1], r0
+; BE-I32-NEXT: add lr, sp, #32
+; BE-I32-NEXT: add r0, sp, #304
+; BE-I32-NEXT: vld1.64 {d10, d11}, [r0]
+; BE-I32-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I32-NEXT: add lr, sp, #16
+; BE-I32-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload
+; BE-I32-NEXT: vorr d0, d14, d14
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: add lr, sp, #48
+; BE-I32-NEXT: vorr q4, q6, q6
+; BE-I32-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-I32-NEXT: vorr d0, d12, d12
+; BE-I32-NEXT: vmov.32 d8[0], r0
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: vorr d0, d15, d15
+; BE-I32-NEXT: add lr, sp, #160
+; BE-I32-NEXT: vmov.32 d17[0], r0
+; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: vorr d0, d10, d10
+; BE-I32-NEXT: vmov.32 d8[1], r0
+; BE-I32-NEXT: add lr, sp, #16
+; BE-I32-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: vorr d0, d13, d13
+; BE-I32-NEXT: vmov.32 d9[0], r0
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: add lr, sp, #160
+; BE-I32-NEXT: vorr d0, d11, d11
+; BE-I32-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-I32-NEXT: add lr, sp, #48
+; BE-I32-NEXT: vmov.32 d13[1], r0
+; BE-I32-NEXT: add r0, sp, #352
+; BE-I32-NEXT: vld1.64 {d14, d15}, [r0]
+; BE-I32-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: vorr d0, d14, d14
+; BE-I32-NEXT: vmov.32 d9[1], r0
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: vmov.32 d12[0], r0
+; BE-I32-NEXT: add lr, sp, #160
+; BE-I32-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I32-NEXT: add lr, sp, #96
+; BE-I32-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-I32-NEXT: vorr d0, d12, d12
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: add lr, sp, #112
+; BE-I32-NEXT: vmov.32 d8[0], r0
+; BE-I32-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload
+; BE-I32-NEXT: vorr d0, d14, d14
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: vorr d0, d13, d13
+; BE-I32-NEXT: vmov.32 d11[0], r0
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: add lr, sp, #144
+; BE-I32-NEXT: vmov.32 d8[1], r0
+; BE-I32-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-I32-NEXT: vorr d0, d12, d12
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: vorr d0, d15, d15
+; BE-I32-NEXT: vmov.32 d10[0], r0
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: vorr d0, d13, d13
+; BE-I32-NEXT: vmov.32 d11[1], r0
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: add lr, sp, #48
+; BE-I32-NEXT: vmov.32 d10[1], r0
+; BE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEXT: vorr d0, d17, d17
+; BE-I32-NEXT: bl lrint
+; BE-I32-NEXT: add lr, sp, #160
+; BE-I32-NEXT: vrev64.32 q9, q4
+; BE-I32-NEXT: vldmia lr, {d22, d23} @ 16-byte Reload
+; BE-I32-NEXT: add lr, sp, #80
+; BE-I32-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload
+; BE-I32-NEXT: add lr, sp, #128
+; BE-I32-NEXT: vmov.32 d22[1], r0
+; BE-I32-NEXT: mov r0, r4
+; BE-I32-NEXT: vst1.32 {d20, d21}, [r0:128]!
+; BE-I32-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload
+; BE-I32-NEXT: add lr, sp, #16
+; BE-I32-NEXT: vrev64.32 q8, q5
+; BE-I32-NEXT: vst1.32 {d20, d21}, [r0:128]!
+; BE-I32-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload
+; BE-I32-NEXT: add lr, sp, #32
+; BE-I32-NEXT: vst1.32 {d20, d21}, [r0:128]!
+; BE-I32-NEXT: vst1.64 {d18, d19}, [r0:128]
+; BE-I32-NEXT: add r0, r4, #64
+; BE-I32-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I32-NEXT: add lr, sp, #64
+; BE-I32-NEXT: vst1.32 {d18, d19}, [r0:128]!
+; BE-I32-NEXT: vst1.32 {d22, d23}, [r0:128]!
+; BE-I32-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I32-NEXT: vst1.32 {d18, d19}, [r0:128]!
+; BE-I32-NEXT: vst1.64 {d16, d17}, [r0:128]
+; BE-I32-NEXT: add sp, sp, #176
+; BE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT: pop {r4, r5, r6, pc}
+;
+; BE-I64-LABEL: lrint_v32f64:
+; BE-I64: @ %bb.0:
+; BE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT: .pad #4
+; BE-I64-NEXT: sub sp, sp, #4
+; BE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT: .pad #232
+; BE-I64-NEXT: sub sp, sp, #232
+; BE-I64-NEXT: add lr, sp, #184
+; BE-I64-NEXT: str r0, [sp, #148] @ 4-byte Spill
+; BE-I64-NEXT: add r0, sp, #416
+; BE-I64-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill
+; BE-I64-NEXT: add lr, sp, #168
+; BE-I64-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill
+; BE-I64-NEXT: add lr, sp, #152
+; BE-I64-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill
+; BE-I64-NEXT: add lr, sp, #128
+; BE-I64-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; BE-I64-NEXT: add lr, sp, #200
+; BE-I64-NEXT: vld1.64 {d18, d19}, [r0]
+; BE-I64-NEXT: add r0, sp, #448
+; BE-I64-NEXT: vorr d0, d19, d19
+; BE-I64-NEXT: vld1.64 {d14, d15}, [r0]
+; BE-I64-NEXT: add r0, sp, #336
+; BE-I64-NEXT: vstmia lr, {d18, d19} @ 16-byte Spill
+; BE-I64-NEXT: add lr, sp, #64
+; BE-I64-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I64-NEXT: add r0, sp, #400
+; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT: add lr, sp, #8
+; BE-I64-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I64-NEXT: add r0, sp, #352
+; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT: add lr, sp, #24
+; BE-I64-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I64-NEXT: add r0, sp, #368
+; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT: add lr, sp, #48
+; BE-I64-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I64-NEXT: add r0, sp, #384
+; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT: add lr, sp, #96
+; BE-I64-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I64-NEXT: add r0, sp, #512
+; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT: add lr, sp, #112
+; BE-I64-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I64-NEXT: add r0, sp, #432
+; BE-I64-NEXT: vld1.64 {d8, d9}, [r0]
+; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vorr d0, d8, d8
+; BE-I64-NEXT: str r1, [sp, #80] @ 4-byte Spill
+; BE-I64-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vorr d0, d9, d9
+; BE-I64-NEXT: str r1, [sp, #44] @ 4-byte Spill
+; BE-I64-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vorr d0, d14, d14
+; BE-I64-NEXT: add lr, sp, #216
+; BE-I64-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEXT: mov r9, r1
+; BE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vorr d0, d15, d15
+; BE-I64-NEXT: mov r8, r1
+; BE-I64-NEXT: vmov.32 d8[0], r0
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: add lr, sp, #64
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: vmov.32 d9[0], r0
+; BE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEXT: vorr d0, d10, d10
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vorr d0, d11, d11
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: add lr, sp, #200
+; BE-I64-NEXT: mov r7, r1
+; BE-I64-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-I64-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEXT: add lr, sp, #200
+; BE-I64-NEXT: mov r5, r1
+; BE-I64-NEXT: vmov.32 d15[1], r7
+; BE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I64-NEXT: add lr, sp, #8
+; BE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEXT: vorr d0, d11, d11
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vorr d0, d10, d10
+; BE-I64-NEXT: vmov.32 d14[1], r6
+; BE-I64-NEXT: add lr, sp, #64
+; BE-I64-NEXT: mov r10, r1
+; BE-I64-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: add lr, sp, #24
+; BE-I64-NEXT: mov r11, r1
+; BE-I64-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload
+; BE-I64-NEXT: vorr d0, d15, d15
+; BE-I64-NEXT: vmov.32 d9[1], r4
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vorr d0, d14, d14
+; BE-I64-NEXT: vmov.32 d8[1], r8
+; BE-I64-NEXT: add lr, sp, #8
+; BE-I64-NEXT: str r1, [sp, #4] @ 4-byte Spill
+; BE-I64-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEXT: add lr, sp, #24
+; BE-I64-NEXT: mov r8, r1
+; BE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEXT: add lr, sp, #216
+; BE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEXT: add lr, sp, #48
+; BE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I64-NEXT: vorr d0, d9, d9
+; BE-I64-NEXT: vmov.32 d11[1], r9
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
+; BE-I64-NEXT: vorr d0, d8, d8
+; BE-I64-NEXT: add lr, sp, #216
+; BE-I64-NEXT: mov r9, r1
+; BE-I64-NEXT: vmov.32 d10[1], r0
+; BE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEXT: add lr, sp, #48
+; BE-I64-NEXT: ldr r0, [sp, #80] @ 4-byte Reload
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I64-NEXT: add lr, sp, #200
+; BE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEXT: add lr, sp, #96
+; BE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I64-NEXT: vorr d0, d9, d9
+; BE-I64-NEXT: vmov.32 d11[1], r0
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vorr d0, d8, d8
+; BE-I64-NEXT: vmov.32 d10[1], r5
+; BE-I64-NEXT: add lr, sp, #200
+; BE-I64-NEXT: mov r7, r1
+; BE-I64-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: add lr, sp, #112
+; BE-I64-NEXT: vorr q4, q6, q6
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-I64-NEXT: vorr d0, d13, d13
+; BE-I64-NEXT: vmov.32 d9[1], r10
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vorr d0, d12, d12
+; BE-I64-NEXT: mov r5, r1
+; BE-I64-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEXT: vmov.32 d8[1], r11
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEXT: add lr, sp, #24
+; BE-I64-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
+; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT: add lr, sp, #48
+; BE-I64-NEXT: vmov.32 d17[1], r0
+; BE-I64-NEXT: vmov.32 d16[1], r8
+; BE-I64-NEXT: vorr q9, q8, q8
+; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT: add lr, sp, #112
+; BE-I64-NEXT: vmov.32 d17[1], r9
+; BE-I64-NEXT: vmov.32 d16[1], r6
+; BE-I64-NEXT: vorr q10, q8, q8
+; BE-I64-NEXT: vrev64.32 q8, q4
+; BE-I64-NEXT: vmov.32 d15[1], r7
+; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT: add lr, sp, #200
+; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT: vmov.32 d11[1], r5
+; BE-I64-NEXT: vrev64.32 q8, q8
+; BE-I64-NEXT: vmov.32 d14[1], r4
+; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT: add lr, sp, #216
+; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT: vmov.32 d10[1], r1
+; BE-I64-NEXT: vrev64.32 q8, q8
+; BE-I64-NEXT: vrev64.32 q6, q7
+; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT: add lr, sp, #8
+; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT: add lr, sp, #96
+; BE-I64-NEXT: vrev64.32 q7, q5
+; BE-I64-NEXT: vrev64.32 q8, q8
+; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT: add lr, sp, #64
+; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT: add lr, sp, #80
+; BE-I64-NEXT: vrev64.32 q8, q8
+; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT: add lr, sp, #64
+; BE-I64-NEXT: vrev64.32 q8, q9
+; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT: add lr, sp, #48
+; BE-I64-NEXT: vrev64.32 q8, q10
+; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT: add lr, sp, #128
+; BE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEXT: vorr d0, d11, d11
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vorr d0, d10, d10
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: vmov.32 d9[0], r0
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vmov.32 d8[0], r0
+; BE-I64-NEXT: ldr r6, [sp, #148] @ 4-byte Reload
+; BE-I64-NEXT: add lr, sp, #152
+; BE-I64-NEXT: vmov.32 d9[1], r4
+; BE-I64-NEXT: mov r5, r6
+; BE-I64-NEXT: vmov.32 d8[1], r1
+; BE-I64-NEXT: vrev64.32 q8, q4
+; BE-I64-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; BE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEXT: vorr d0, d11, d11
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vorr d0, d10, d10
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: vmov.32 d9[0], r0
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vmov.32 d8[0], r0
+; BE-I64-NEXT: add lr, sp, #168
+; BE-I64-NEXT: vmov.32 d9[1], r4
+; BE-I64-NEXT: vmov.32 d8[1], r1
+; BE-I64-NEXT: vrev64.32 q8, q4
+; BE-I64-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; BE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEXT: vorr d0, d11, d11
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vorr d0, d10, d10
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: vmov.32 d9[0], r0
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vmov.32 d8[0], r0
+; BE-I64-NEXT: add lr, sp, #184
+; BE-I64-NEXT: vmov.32 d9[1], r4
+; BE-I64-NEXT: vmov.32 d8[1], r1
+; BE-I64-NEXT: vrev64.32 q8, q4
+; BE-I64-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; BE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEXT: vorr d0, d11, d11
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vorr d0, d10, d10
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: vmov.32 d9[0], r0
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vmov.32 d8[0], r0
+; BE-I64-NEXT: add r0, sp, #464
+; BE-I64-NEXT: vmov.32 d9[1], r4
+; BE-I64-NEXT: vmov.32 d8[1], r1
+; BE-I64-NEXT: vrev64.32 q8, q4
+; BE-I64-NEXT: vld1.64 {d8, d9}, [r0]
+; BE-I64-NEXT: vorr d0, d9, d9
+; BE-I64-NEXT: vst1.64 {d16, d17}, [r5:128]
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vorr d0, d8, d8
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEXT: add r0, sp, #480
+; BE-I64-NEXT: add r5, r6, #192
+; BE-I64-NEXT: vmov.32 d11[1], r4
+; BE-I64-NEXT: vmov.32 d10[1], r1
+; BE-I64-NEXT: vld1.64 {d8, d9}, [r0]
+; BE-I64-NEXT: vorr d0, d9, d9
+; BE-I64-NEXT: vrev64.32 q8, q5
+; BE-I64-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vorr d0, d8, d8
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEXT: add r0, sp, #496
+; BE-I64-NEXT: vmov.32 d11[1], r4
+; BE-I64-NEXT: vmov.32 d10[1], r1
+; BE-I64-NEXT: vld1.64 {d8, d9}, [r0]
+; BE-I64-NEXT: vorr d0, d9, d9
+; BE-I64-NEXT: vrev64.32 q8, q5
+; BE-I64-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vorr d0, d8, d8
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEXT: bl lrint
+; BE-I64-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEXT: add lr, sp, #112
+; BE-I64-NEXT: add r0, r6, #128
+; BE-I64-NEXT: vmov.32 d11[1], r4
+; BE-I64-NEXT: vmov.32 d10[1], r1
+; BE-I64-NEXT: vrev64.32 q8, q5
+; BE-I64-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; BE-I64-NEXT: vst1.64 {d14, d15}, [r5:128]
+; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT: add lr, sp, #200
+; BE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT: add lr, sp, #216
+; BE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT: add lr, sp, #96
+; BE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT: add lr, sp, #80
+; BE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]
+; BE-I64-NEXT: add r0, r6, #64
+; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT: add lr, sp, #64
+; BE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT: add lr, sp, #48
+; BE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEXT: vst1.64 {d12, d13}, [r0:128]
+; BE-I64-NEXT: add sp, sp, #232
+; BE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT: add sp, sp, #4
+; BE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I32-NEON-LABEL: lrint_v32f64:
+; BE-I32-NEON: @ %bb.0:
+; BE-I32-NEON-NEXT: .save {r4, r5, r6, lr}
+; BE-I32-NEON-NEXT: push {r4, r5, r6, lr}
+; BE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT: .pad #176
+; BE-I32-NEON-NEXT: sub sp, sp, #176
+; BE-I32-NEON-NEXT: add lr, sp, #128
+; BE-I32-NEON-NEXT: mov r4, r0
+; BE-I32-NEON-NEXT: add r0, sp, #336
+; BE-I32-NEON-NEXT: vorr q6, q3, q3
+; BE-I32-NEON-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill
+; BE-I32-NEON-NEXT: add lr, sp, #80
+; BE-I32-NEON-NEXT: vorr q5, q1, q1
+; BE-I32-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; BE-I32-NEON-NEXT: add lr, sp, #32
+; BE-I32-NEON-NEXT: vorr d0, d4, d4
+; BE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I32-NEON-NEXT: add r0, sp, #320
+; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEON-NEXT: add lr, sp, #160
+; BE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I32-NEON-NEXT: add r0, sp, #432
+; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEON-NEXT: add lr, sp, #112
+; BE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I32-NEON-NEXT: add r0, sp, #288
+; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEON-NEXT: add lr, sp, #96
+; BE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I32-NEON-NEXT: add r0, sp, #368
+; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEON-NEXT: add lr, sp, #48
+; BE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I32-NEON-NEXT: add r0, sp, #416
+; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEON-NEXT: add lr, sp, #144
+; BE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I32-NEON-NEXT: add r0, sp, #400
+; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEON-NEXT: add lr, sp, #64
+; BE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: vorr d0, d12, d12
+; BE-I32-NEON-NEXT: vmov.32 d14[0], r0
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: vorr d0, d10, d10
+; BE-I32-NEON-NEXT: vmov.32 d15[0], r0
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: vorr d0, d13, d13
+; BE-I32-NEON-NEXT: vmov.32 d9[0], r0
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: vorr d0, d11, d11
+; BE-I32-NEON-NEXT: vmov.32 d15[1], r0
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: add lr, sp, #128
+; BE-I32-NEON-NEXT: vorr q5, q4, q4
+; BE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEON-NEXT: vorr d0, d17, d17
+; BE-I32-NEON-NEXT: vmov.32 d11[1], r0
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: vmov.32 d14[1], r0
+; BE-I32-NEON-NEXT: add lr, sp, #128
+; BE-I32-NEON-NEXT: add r0, sp, #384
+; BE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I32-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I32-NEON-NEXT: add lr, sp, #16
+; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEON-NEXT: add lr, sp, #80
+; BE-I32-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I32-NEON-NEXT: vorr d0, d8, d8
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: add lr, sp, #64
+; BE-I32-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I32-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload
+; BE-I32-NEON-NEXT: vorr d0, d14, d14
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: vorr d0, d9, d9
+; BE-I32-NEON-NEXT: vmov.32 d13[0], r0
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: vorr d0, d15, d15
+; BE-I32-NEON-NEXT: vmov.32 d10[1], r0
+; BE-I32-NEON-NEXT: add lr, sp, #80
+; BE-I32-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: add lr, sp, #16
+; BE-I32-NEON-NEXT: vmov.32 d13[1], r0
+; BE-I32-NEON-NEXT: add r0, sp, #272
+; BE-I32-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I32-NEON-NEXT: vorr d0, d10, d10
+; BE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I32-NEON-NEXT: vstmia sp, {d16, d17} @ 16-byte Spill
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: add lr, sp, #32
+; BE-I32-NEON-NEXT: vmov.32 d12[0], r0
+; BE-I32-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I32-NEON-NEXT: vorr d0, d8, d8
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: vorr d0, d11, d11
+; BE-I32-NEON-NEXT: vmov.32 d15[0], r0
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: vorr d0, d9, d9
+; BE-I32-NEON-NEXT: vmov.32 d12[1], r0
+; BE-I32-NEON-NEXT: add lr, sp, #64
+; BE-I32-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: vldmia sp, {d10, d11} @ 16-byte Reload
+; BE-I32-NEON-NEXT: add lr, sp, #16
+; BE-I32-NEON-NEXT: vmov.32 d15[1], r0
+; BE-I32-NEON-NEXT: add r0, sp, #256
+; BE-I32-NEON-NEXT: vorr d0, d10, d10
+; BE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: add lr, sp, #160
+; BE-I32-NEON-NEXT: vmov.32 d13[0], r0
+; BE-I32-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I32-NEON-NEXT: vorr d0, d8, d8
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: vorr d0, d11, d11
+; BE-I32-NEON-NEXT: vmov.32 d14[0], r0
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: vorr d0, d9, d9
+; BE-I32-NEON-NEXT: vmov.32 d13[1], r0
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: vmov.32 d14[1], r0
+; BE-I32-NEON-NEXT: add lr, sp, #32
+; BE-I32-NEON-NEXT: add r0, sp, #304
+; BE-I32-NEON-NEXT: vld1.64 {d10, d11}, [r0]
+; BE-I32-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I32-NEON-NEXT: add lr, sp, #16
+; BE-I32-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload
+; BE-I32-NEON-NEXT: vorr d0, d14, d14
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: add lr, sp, #48
+; BE-I32-NEON-NEXT: vorr q4, q6, q6
+; BE-I32-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-I32-NEON-NEXT: vorr d0, d12, d12
+; BE-I32-NEON-NEXT: vmov.32 d8[0], r0
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: vorr d0, d15, d15
+; BE-I32-NEON-NEXT: add lr, sp, #160
+; BE-I32-NEON-NEXT: vmov.32 d17[0], r0
+; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: vorr d0, d10, d10
+; BE-I32-NEON-NEXT: vmov.32 d8[1], r0
+; BE-I32-NEON-NEXT: add lr, sp, #16
+; BE-I32-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: vorr d0, d13, d13
+; BE-I32-NEON-NEXT: vmov.32 d9[0], r0
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: add lr, sp, #160
+; BE-I32-NEON-NEXT: vorr d0, d11, d11
+; BE-I32-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-I32-NEON-NEXT: add lr, sp, #48
+; BE-I32-NEON-NEXT: vmov.32 d13[1], r0
+; BE-I32-NEON-NEXT: add r0, sp, #352
+; BE-I32-NEON-NEXT: vld1.64 {d14, d15}, [r0]
+; BE-I32-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: vorr d0, d14, d14
+; BE-I32-NEON-NEXT: vmov.32 d9[1], r0
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: vmov.32 d12[0], r0
+; BE-I32-NEON-NEXT: add lr, sp, #160
+; BE-I32-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I32-NEON-NEXT: add lr, sp, #96
+; BE-I32-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-I32-NEON-NEXT: vorr d0, d12, d12
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: add lr, sp, #112
+; BE-I32-NEON-NEXT: vmov.32 d8[0], r0
+; BE-I32-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload
+; BE-I32-NEON-NEXT: vorr d0, d14, d14
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: vorr d0, d13, d13
+; BE-I32-NEON-NEXT: vmov.32 d11[0], r0
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: add lr, sp, #144
+; BE-I32-NEON-NEXT: vmov.32 d8[1], r0
+; BE-I32-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-I32-NEON-NEXT: vorr d0, d12, d12
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: vorr d0, d15, d15
+; BE-I32-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: vorr d0, d13, d13
+; BE-I32-NEON-NEXT: vmov.32 d11[1], r0
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: add lr, sp, #48
+; BE-I32-NEON-NEXT: vmov.32 d10[1], r0
+; BE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEON-NEXT: vorr d0, d17, d17
+; BE-I32-NEON-NEXT: bl lrint
+; BE-I32-NEON-NEXT: add lr, sp, #160
+; BE-I32-NEON-NEXT: vrev64.32 q9, q4
+; BE-I32-NEON-NEXT: vldmia lr, {d22, d23} @ 16-byte Reload
+; BE-I32-NEON-NEXT: add lr, sp, #80
+; BE-I32-NEON-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload
+; BE-I32-NEON-NEXT: add lr, sp, #128
+; BE-I32-NEON-NEXT: vmov.32 d22[1], r0
+; BE-I32-NEON-NEXT: mov r0, r4
+; BE-I32-NEON-NEXT: vst1.32 {d20, d21}, [r0:128]!
+; BE-I32-NEON-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload
+; BE-I32-NEON-NEXT: add lr, sp, #16
+; BE-I32-NEON-NEXT: vrev64.32 q8, q5
+; BE-I32-NEON-NEXT: vst1.32 {d20, d21}, [r0:128]!
+; BE-I32-NEON-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload
+; BE-I32-NEON-NEXT: add lr, sp, #32
+; BE-I32-NEON-NEXT: vst1.32 {d20, d21}, [r0:128]!
+; BE-I32-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]
+; BE-I32-NEON-NEXT: add r0, r4, #64
+; BE-I32-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I32-NEON-NEXT: add lr, sp, #64
+; BE-I32-NEON-NEXT: vst1.32 {d18, d19}, [r0:128]!
+; BE-I32-NEON-NEXT: vst1.32 {d22, d23}, [r0:128]!
+; BE-I32-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I32-NEON-NEXT: vst1.32 {d18, d19}, [r0:128]!
+; BE-I32-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]
+; BE-I32-NEON-NEXT: add sp, sp, #176
+; BE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT: pop {r4, r5, r6, pc}
+;
+; BE-I64-NEON-LABEL: lrint_v32f64:
+; BE-I64-NEON: @ %bb.0:
+; BE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEON-NEXT: .pad #4
+; BE-I64-NEON-NEXT: sub sp, sp, #4
+; BE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT: .pad #232
+; BE-I64-NEON-NEXT: sub sp, sp, #232
+; BE-I64-NEON-NEXT: add lr, sp, #184
+; BE-I64-NEON-NEXT: str r0, [sp, #148] @ 4-byte Spill
+; BE-I64-NEON-NEXT: add r0, sp, #416
+; BE-I64-NEON-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill
+; BE-I64-NEON-NEXT: add lr, sp, #168
+; BE-I64-NEON-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill
+; BE-I64-NEON-NEXT: add lr, sp, #152
+; BE-I64-NEON-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill
+; BE-I64-NEON-NEXT: add lr, sp, #128
+; BE-I64-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill
+; BE-I64-NEON-NEXT: add lr, sp, #200
+; BE-I64-NEON-NEXT: vld1.64 {d18, d19}, [r0]
+; BE-I64-NEON-NEXT: add r0, sp, #448
+; BE-I64-NEON-NEXT: vorr d0, d19, d19
+; BE-I64-NEON-NEXT: vld1.64 {d14, d15}, [r0]
+; BE-I64-NEON-NEXT: add r0, sp, #336
+; BE-I64-NEON-NEXT: vstmia lr, {d18, d19} @ 16-byte Spill
+; BE-I64-NEON-NEXT: add lr, sp, #64
+; BE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I64-NEON-NEXT: add r0, sp, #400
+; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT: add lr, sp, #8
+; BE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I64-NEON-NEXT: add r0, sp, #352
+; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT: add lr, sp, #24
+; BE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I64-NEON-NEXT: add r0, sp, #368
+; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT: add lr, sp, #48
+; BE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I64-NEON-NEXT: add r0, sp, #384
+; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT: add lr, sp, #96
+; BE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I64-NEON-NEXT: add r0, sp, #512
+; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT: add lr, sp, #112
+; BE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0]
+; BE-I64-NEON-NEXT: add r0, sp, #432
+; BE-I64-NEON-NEXT: vld1.64 {d8, d9}, [r0]
+; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vorr d0, d8, d8
+; BE-I64-NEON-NEXT: str r1, [sp, #80] @ 4-byte Spill
+; BE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vorr d0, d9, d9
+; BE-I64-NEON-NEXT: str r1, [sp, #44] @ 4-byte Spill
+; BE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vorr d0, d14, d14
+; BE-I64-NEON-NEXT: add lr, sp, #216
+; BE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT: mov r9, r1
+; BE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vorr d0, d15, d15
+; BE-I64-NEON-NEXT: mov r8, r1
+; BE-I64-NEON-NEXT: vmov.32 d8[0], r0
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: add lr, sp, #64
+; BE-I64-NEON-NEXT: mov r4, r1
+; BE-I64-NEON-NEXT: vmov.32 d9[0], r0
+; BE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEON-NEXT: vorr d0, d10, d10
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vorr d0, d11, d11
+; BE-I64-NEON-NEXT: mov r6, r1
+; BE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: add lr, sp, #200
+; BE-I64-NEON-NEXT: mov r7, r1
+; BE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-I64-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEON-NEXT: add lr, sp, #200
+; BE-I64-NEON-NEXT: mov r5, r1
+; BE-I64-NEON-NEXT: vmov.32 d15[1], r7
+; BE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I64-NEON-NEXT: add lr, sp, #8
+; BE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEON-NEXT: vorr d0, d11, d11
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vorr d0, d10, d10
+; BE-I64-NEON-NEXT: vmov.32 d14[1], r6
+; BE-I64-NEON-NEXT: add lr, sp, #64
+; BE-I64-NEON-NEXT: mov r10, r1
+; BE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: add lr, sp, #24
+; BE-I64-NEON-NEXT: mov r11, r1
+; BE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload
+; BE-I64-NEON-NEXT: vorr d0, d15, d15
+; BE-I64-NEON-NEXT: vmov.32 d9[1], r4
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vorr d0, d14, d14
+; BE-I64-NEON-NEXT: vmov.32 d8[1], r8
+; BE-I64-NEON-NEXT: add lr, sp, #8
+; BE-I64-NEON-NEXT: str r1, [sp, #4] @ 4-byte Spill
+; BE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT: add lr, sp, #24
+; BE-I64-NEON-NEXT: mov r8, r1
+; BE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEON-NEXT: add lr, sp, #216
+; BE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEON-NEXT: add lr, sp, #48
+; BE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I64-NEON-NEXT: vorr d0, d9, d9
+; BE-I64-NEON-NEXT: vmov.32 d11[1], r9
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
+; BE-I64-NEON-NEXT: vorr d0, d8, d8
+; BE-I64-NEON-NEXT: add lr, sp, #216
+; BE-I64-NEON-NEXT: mov r9, r1
+; BE-I64-NEON-NEXT: vmov.32 d10[1], r0
+; BE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT: add lr, sp, #48
+; BE-I64-NEON-NEXT: ldr r0, [sp, #80] @ 4-byte Reload
+; BE-I64-NEON-NEXT: mov r6, r1
+; BE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I64-NEON-NEXT: add lr, sp, #200
+; BE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEON-NEXT: add lr, sp, #96
+; BE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I64-NEON-NEXT: vorr d0, d9, d9
+; BE-I64-NEON-NEXT: vmov.32 d11[1], r0
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vorr d0, d8, d8
+; BE-I64-NEON-NEXT: vmov.32 d10[1], r5
+; BE-I64-NEON-NEXT: add lr, sp, #200
+; BE-I64-NEON-NEXT: mov r7, r1
+; BE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: add lr, sp, #112
+; BE-I64-NEON-NEXT: vorr q4, q6, q6
+; BE-I64-NEON-NEXT: mov r4, r1
+; BE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-I64-NEON-NEXT: vorr d0, d13, d13
+; BE-I64-NEON-NEXT: vmov.32 d9[1], r10
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vorr d0, d12, d12
+; BE-I64-NEON-NEXT: mov r5, r1
+; BE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT: vmov.32 d8[1], r11
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT: add lr, sp, #24
+; BE-I64-NEON-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
+; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT: add lr, sp, #48
+; BE-I64-NEON-NEXT: vmov.32 d17[1], r0
+; BE-I64-NEON-NEXT: vmov.32 d16[1], r8
+; BE-I64-NEON-NEXT: vorr q9, q8, q8
+; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT: add lr, sp, #112
+; BE-I64-NEON-NEXT: vmov.32 d17[1], r9
+; BE-I64-NEON-NEXT: vmov.32 d16[1], r6
+; BE-I64-NEON-NEXT: vorr q10, q8, q8
+; BE-I64-NEON-NEXT: vrev64.32 q8, q4
+; BE-I64-NEON-NEXT: vmov.32 d15[1], r7
+; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT: add lr, sp, #200
+; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT: vmov.32 d11[1], r5
+; BE-I64-NEON-NEXT: vrev64.32 q8, q8
+; BE-I64-NEON-NEXT: vmov.32 d14[1], r4
+; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT: add lr, sp, #216
+; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT: vmov.32 d10[1], r1
+; BE-I64-NEON-NEXT: vrev64.32 q8, q8
+; BE-I64-NEON-NEXT: vrev64.32 q6, q7
+; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT: add lr, sp, #8
+; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT: add lr, sp, #96
+; BE-I64-NEON-NEXT: vrev64.32 q7, q5
+; BE-I64-NEON-NEXT: vrev64.32 q8, q8
+; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT: add lr, sp, #64
+; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT: add lr, sp, #80
+; BE-I64-NEON-NEXT: vrev64.32 q8, q8
+; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT: add lr, sp, #64
+; BE-I64-NEON-NEXT: vrev64.32 q8, q9
+; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT: add lr, sp, #48
+; BE-I64-NEON-NEXT: vrev64.32 q8, q10
+; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT: add lr, sp, #128
+; BE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEON-NEXT: vorr d0, d11, d11
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vorr d0, d10, d10
+; BE-I64-NEON-NEXT: mov r4, r1
+; BE-I64-NEON-NEXT: vmov.32 d9[0], r0
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vmov.32 d8[0], r0
+; BE-I64-NEON-NEXT: ldr r6, [sp, #148] @ 4-byte Reload
+; BE-I64-NEON-NEXT: add lr, sp, #152
+; BE-I64-NEON-NEXT: vmov.32 d9[1], r4
+; BE-I64-NEON-NEXT: mov r5, r6
+; BE-I64-NEON-NEXT: vmov.32 d8[1], r1
+; BE-I64-NEON-NEXT: vrev64.32 q8, q4
+; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; BE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEON-NEXT: vorr d0, d11, d11
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vorr d0, d10, d10
+; BE-I64-NEON-NEXT: mov r4, r1
+; BE-I64-NEON-NEXT: vmov.32 d9[0], r0
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vmov.32 d8[0], r0
+; BE-I64-NEON-NEXT: add lr, sp, #168
+; BE-I64-NEON-NEXT: vmov.32 d9[1], r4
+; BE-I64-NEON-NEXT: vmov.32 d8[1], r1
+; BE-I64-NEON-NEXT: vrev64.32 q8, q4
+; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; BE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEON-NEXT: vorr d0, d11, d11
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vorr d0, d10, d10
+; BE-I64-NEON-NEXT: mov r4, r1
+; BE-I64-NEON-NEXT: vmov.32 d9[0], r0
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vmov.32 d8[0], r0
+; BE-I64-NEON-NEXT: add lr, sp, #184
+; BE-I64-NEON-NEXT: vmov.32 d9[1], r4
+; BE-I64-NEON-NEXT: vmov.32 d8[1], r1
+; BE-I64-NEON-NEXT: vrev64.32 q8, q4
+; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; BE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEON-NEXT: vorr d0, d11, d11
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vorr d0, d10, d10
+; BE-I64-NEON-NEXT: mov r4, r1
+; BE-I64-NEON-NEXT: vmov.32 d9[0], r0
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vmov.32 d8[0], r0
+; BE-I64-NEON-NEXT: add r0, sp, #464
+; BE-I64-NEON-NEXT: vmov.32 d9[1], r4
+; BE-I64-NEON-NEXT: vmov.32 d8[1], r1
+; BE-I64-NEON-NEXT: vrev64.32 q8, q4
+; BE-I64-NEON-NEXT: vld1.64 {d8, d9}, [r0]
+; BE-I64-NEON-NEXT: vorr d0, d9, d9
+; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vorr d0, d8, d8
+; BE-I64-NEON-NEXT: mov r4, r1
+; BE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT: add r0, sp, #480
+; BE-I64-NEON-NEXT: add r5, r6, #192
+; BE-I64-NEON-NEXT: vmov.32 d11[1], r4
+; BE-I64-NEON-NEXT: vmov.32 d10[1], r1
+; BE-I64-NEON-NEXT: vld1.64 {d8, d9}, [r0]
+; BE-I64-NEON-NEXT: vorr d0, d9, d9
+; BE-I64-NEON-NEXT: vrev64.32 q8, q5
+; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vorr d0, d8, d8
+; BE-I64-NEON-NEXT: mov r4, r1
+; BE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT: add r0, sp, #496
+; BE-I64-NEON-NEXT: vmov.32 d11[1], r4
+; BE-I64-NEON-NEXT: vmov.32 d10[1], r1
+; BE-I64-NEON-NEXT: vld1.64 {d8, d9}, [r0]
+; BE-I64-NEON-NEXT: vorr d0, d9, d9
+; BE-I64-NEON-NEXT: vrev64.32 q8, q5
+; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vorr d0, d8, d8
+; BE-I64-NEON-NEXT: mov r4, r1
+; BE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT: bl lrint
+; BE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT: add lr, sp, #112
+; BE-I64-NEON-NEXT: add r0, r6, #128
+; BE-I64-NEON-NEXT: vmov.32 d11[1], r4
+; BE-I64-NEON-NEXT: vmov.32 d10[1], r1
+; BE-I64-NEON-NEXT: vrev64.32 q8, q5
+; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; BE-I64-NEON-NEXT: vst1.64 {d14, d15}, [r5:128]
+; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT: add lr, sp, #200
+; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT: add lr, sp, #216
+; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT: add lr, sp, #96
+; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT: add lr, sp, #80
+; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]
+; BE-I64-NEON-NEXT: add r0, r6, #64
+; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT: add lr, sp, #64
+; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT: add lr, sp, #48
+; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEON-NEXT: vst1.64 {d12, d13}, [r0:128]
+; BE-I64-NEON-NEXT: add sp, sp, #232
+; BE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT: add sp, sp, #4
+; BE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+ %a = call <32 x iXLen> @llvm.lrint.v32iXLen.v16f64(<32 x double> %x)
+ ret <32 x iXLen> %a
+}
+declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f64(<32 x double>)
+
+define <1 x iXLen> @lrint_v1fp128(<1 x fp128> %x) {
+; LE-I32-LABEL: lrint_v1fp128:
+; LE-I32: @ %bb.0:
+; LE-I32-NEXT: .save {r11, lr}
+; LE-I32-NEXT: push {r11, lr}
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: pop {r11, pc}
+;
+; LE-I64-LABEL: lrint_v1fp128:
+; LE-I64: @ %bb.0:
+; LE-I64-NEXT: .save {r11, lr}
+; LE-I64-NEXT: push {r11, lr}
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: vmov.32 d0[0], r0
+; LE-I64-NEXT: vmov.32 d0[1], r1
+; LE-I64-NEXT: pop {r11, pc}
+;
+; LE-I32-NEON-LABEL: lrint_v1fp128:
+; LE-I32-NEON: @ %bb.0:
+; LE-I32-NEON-NEXT: .save {r11, lr}
+; LE-I32-NEON-NEXT: push {r11, lr}
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: pop {r11, pc}
+;
+; LE-I64-NEON-LABEL: lrint_v1fp128:
+; LE-I64-NEON: @ %bb.0:
+; LE-I64-NEON-NEXT: .save {r11, lr}
+; LE-I64-NEON-NEXT: push {r11, lr}
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: vmov.32 d0[0], r0
+; LE-I64-NEON-NEXT: vmov.32 d0[1], r1
+; LE-I64-NEON-NEXT: pop {r11, pc}
+;
+; BE-I32-LABEL: lrint_v1fp128:
+; BE-I32: @ %bb.0:
+; BE-I32-NEXT: .save {r11, lr}
+; BE-I32-NEXT: push {r11, lr}
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: pop {r11, pc}
+;
+; BE-I64-LABEL: lrint_v1fp128:
+; BE-I64: @ %bb.0:
+; BE-I64-NEXT: .save {r11, lr}
+; BE-I64-NEXT: push {r11, lr}
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: vmov.32 d16[1], r1
+; BE-I64-NEXT: vrev64.32 d0, d16
+; BE-I64-NEXT: pop {r11, pc}
+;
+; BE-I32-NEON-LABEL: lrint_v1fp128:
+; BE-I32-NEON: @ %bb.0:
+; BE-I32-NEON-NEXT: .save {r11, lr}
+; BE-I32-NEON-NEXT: push {r11, lr}
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: pop {r11, pc}
+;
+; BE-I64-NEON-LABEL: lrint_v1fp128:
+; BE-I64-NEON: @ %bb.0:
+; BE-I64-NEON-NEXT: .save {r11, lr}
+; BE-I64-NEON-NEXT: push {r11, lr}
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEON-NEXT: vmov.32 d16[1], r1
+; BE-I64-NEON-NEXT: vrev64.32 d0, d16
+; BE-I64-NEON-NEXT: pop {r11, pc}
+ %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1fp128(<1 x fp128> %x)
+ ret <1 x iXLen> %a
+}
+declare <1 x iXLen> @llvm.lrint.v1iXLen.v1fp128(<1 x fp128>)
+
+define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) {
+; LE-I32-LABEL: lrint_v2fp128:
+; LE-I32: @ %bb.0:
+; LE-I32-NEXT: .save {r4, r5, r6, r7, r8, lr}
+; LE-I32-NEXT: push {r4, r5, r6, r7, r8, lr}
+; LE-I32-NEXT: mov r8, r3
+; LE-I32-NEXT: add r3, sp, #24
+; LE-I32-NEXT: mov r5, r2
+; LE-I32-NEXT: mov r6, r1
+; LE-I32-NEXT: mov r7, r0
+; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: mov r4, r0
+; LE-I32-NEXT: mov r0, r7
+; LE-I32-NEXT: mov r1, r6
+; LE-I32-NEXT: mov r2, r5
+; LE-I32-NEXT: mov r3, r8
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: vmov.32 d0[0], r0
+; LE-I32-NEXT: vmov.32 d0[1], r4
+; LE-I32-NEXT: pop {r4, r5, r6, r7, r8, pc}
+;
+; LE-I64-LABEL: lrint_v2fp128:
+; LE-I64: @ %bb.0:
+; LE-I64-NEXT: .save {r4, r5, r6, r7, r8, lr}
+; LE-I64-NEXT: push {r4, r5, r6, r7, r8, lr}
+; LE-I64-NEXT: .vsave {d8, d9}
+; LE-I64-NEXT: vpush {d8, d9}
+; LE-I64-NEXT: mov r8, r3
+; LE-I64-NEXT: add r3, sp, #40
+; LE-I64-NEXT: mov r5, r2
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: mov r7, r0
+; LE-I64-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEXT: mov r0, r7
+; LE-I64-NEXT: mov r1, r6
+; LE-I64-NEXT: mov r2, r5
+; LE-I64-NEXT: mov r3, r8
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEXT: vmov.32 d9[1], r4
+; LE-I64-NEXT: vmov.32 d8[1], r1
+; LE-I64-NEXT: vorr q0, q4, q4
+; LE-I64-NEXT: vpop {d8, d9}
+; LE-I64-NEXT: pop {r4, r5, r6, r7, r8, pc}
+;
+; LE-I32-NEON-LABEL: lrint_v2fp128:
+; LE-I32-NEON: @ %bb.0:
+; LE-I32-NEON-NEXT: .save {r4, r5, r6, r7, r8, lr}
+; LE-I32-NEON-NEXT: push {r4, r5, r6, r7, r8, lr}
+; LE-I32-NEON-NEXT: mov r8, r3
+; LE-I32-NEON-NEXT: add r3, sp, #24
+; LE-I32-NEON-NEXT: mov r5, r2
+; LE-I32-NEON-NEXT: mov r6, r1
+; LE-I32-NEON-NEXT: mov r7, r0
+; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: mov r4, r0
+; LE-I32-NEON-NEXT: mov r0, r7
+; LE-I32-NEON-NEXT: mov r1, r6
+; LE-I32-NEON-NEXT: mov r2, r5
+; LE-I32-NEON-NEXT: mov r3, r8
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: vmov.32 d0[0], r0
+; LE-I32-NEON-NEXT: vmov.32 d0[1], r4
+; LE-I32-NEON-NEXT: pop {r4, r5, r6, r7, r8, pc}
+;
+; LE-I64-NEON-LABEL: lrint_v2fp128:
+; LE-I64-NEON: @ %bb.0:
+; LE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, lr}
+; LE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, lr}
+; LE-I64-NEON-NEXT: .vsave {d8, d9}
+; LE-I64-NEON-NEXT: vpush {d8, d9}
+; LE-I64-NEON-NEXT: mov r8, r3
+; LE-I64-NEON-NEXT: add r3, sp, #40
+; LE-I64-NEON-NEXT: mov r5, r2
+; LE-I64-NEON-NEXT: mov r6, r1
+; LE-I64-NEON-NEXT: mov r7, r0
+; LE-I64-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: mov r4, r1
+; LE-I64-NEON-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEON-NEXT: mov r0, r7
+; LE-I64-NEON-NEXT: mov r1, r6
+; LE-I64-NEON-NEXT: mov r2, r5
+; LE-I64-NEON-NEXT: mov r3, r8
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT: vmov.32 d9[1], r4
+; LE-I64-NEON-NEXT: vmov.32 d8[1], r1
+; LE-I64-NEON-NEXT: vorr q0, q4, q4
+; LE-I64-NEON-NEXT: vpop {d8, d9}
+; LE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, pc}
+;
+; BE-I32-LABEL: lrint_v2fp128:
+; BE-I32: @ %bb.0:
+; BE-I32-NEXT: .save {r4, r5, r6, r7, r8, lr}
+; BE-I32-NEXT: push {r4, r5, r6, r7, r8, lr}
+; BE-I32-NEXT: mov r8, r3
+; BE-I32-NEXT: add r3, sp, #24
+; BE-I32-NEXT: mov r5, r2
+; BE-I32-NEXT: mov r6, r1
+; BE-I32-NEXT: mov r7, r0
+; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: mov r4, r0
+; BE-I32-NEXT: mov r0, r7
+; BE-I32-NEXT: mov r1, r6
+; BE-I32-NEXT: mov r2, r5
+; BE-I32-NEXT: mov r3, r8
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: vmov.32 d16[0], r0
+; BE-I32-NEXT: vmov.32 d16[1], r4
+; BE-I32-NEXT: vrev64.32 d0, d16
+; BE-I32-NEXT: pop {r4, r5, r6, r7, r8, pc}
+;
+; BE-I64-LABEL: lrint_v2fp128:
+; BE-I64: @ %bb.0:
+; BE-I64-NEXT: .save {r4, r5, r6, r7, r8, lr}
+; BE-I64-NEXT: push {r4, r5, r6, r7, r8, lr}
+; BE-I64-NEXT: .vsave {d8}
+; BE-I64-NEXT: vpush {d8}
+; BE-I64-NEXT: mov r8, r3
+; BE-I64-NEXT: add r3, sp, #32
+; BE-I64-NEXT: mov r5, r2
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: mov r7, r0
+; BE-I64-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: vmov.32 d8[0], r0
+; BE-I64-NEXT: mov r0, r7
+; BE-I64-NEXT: mov r1, r6
+; BE-I64-NEXT: mov r2, r5
+; BE-I64-NEXT: mov r3, r8
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: vmov.32 d8[1], r4
+; BE-I64-NEXT: vmov.32 d16[1], r1
+; BE-I64-NEXT: vrev64.32 d1, d8
+; BE-I64-NEXT: vrev64.32 d0, d16
+; BE-I64-NEXT: vpop {d8}
+; BE-I64-NEXT: pop {r4, r5, r6, r7, r8, pc}
+;
+; BE-I32-NEON-LABEL: lrint_v2fp128:
+; BE-I32-NEON: @ %bb.0:
+; BE-I32-NEON-NEXT: .save {r4, r5, r6, r7, r8, lr}
+; BE-I32-NEON-NEXT: push {r4, r5, r6, r7, r8, lr}
+; BE-I32-NEON-NEXT: mov r8, r3
+; BE-I32-NEON-NEXT: add r3, sp, #24
+; BE-I32-NEON-NEXT: mov r5, r2
+; BE-I32-NEON-NEXT: mov r6, r1
+; BE-I32-NEON-NEXT: mov r7, r0
+; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: mov r4, r0
+; BE-I32-NEON-NEXT: mov r0, r7
+; BE-I32-NEON-NEXT: mov r1, r6
+; BE-I32-NEON-NEXT: mov r2, r5
+; BE-I32-NEON-NEXT: mov r3, r8
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: vmov.32 d16[0], r0
+; BE-I32-NEON-NEXT: vmov.32 d16[1], r4
+; BE-I32-NEON-NEXT: vrev64.32 d0, d16
+; BE-I32-NEON-NEXT: pop {r4, r5, r6, r7, r8, pc}
+;
+; BE-I64-NEON-LABEL: lrint_v2fp128:
+; BE-I64-NEON: @ %bb.0:
+; BE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, lr}
+; BE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, lr}
+; BE-I64-NEON-NEXT: .vsave {d8}
+; BE-I64-NEON-NEXT: vpush {d8}
+; BE-I64-NEON-NEXT: mov r8, r3
+; BE-I64-NEON-NEXT: add r3, sp, #32
+; BE-I64-NEON-NEXT: mov r5, r2
+; BE-I64-NEON-NEXT: mov r6, r1
+; BE-I64-NEON-NEXT: mov r7, r0
+; BE-I64-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: mov r4, r1
+; BE-I64-NEON-NEXT: vmov.32 d8[0], r0
+; BE-I64-NEON-NEXT: mov r0, r7
+; BE-I64-NEON-NEXT: mov r1, r6
+; BE-I64-NEON-NEXT: mov r2, r5
+; BE-I64-NEON-NEXT: mov r3, r8
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEON-NEXT: vmov.32 d8[1], r4
+; BE-I64-NEON-NEXT: vmov.32 d16[1], r1
+; BE-I64-NEON-NEXT: vrev64.32 d1, d8
+; BE-I64-NEON-NEXT: vrev64.32 d0, d16
+; BE-I64-NEON-NEXT: vpop {d8}
+; BE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, pc}
+ %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2fp128(<2 x fp128> %x)
+ ret <2 x iXLen> %a
+}
+declare <2 x iXLen> @llvm.lrint.v2iXLen.v2fp128(<2 x fp128>)
+
+define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) {
+; LE-I32-LABEL: lrint_v4fp128:
+; LE-I32: @ %bb.0:
+; LE-I32-NEXT: .save {r4, lr}
+; LE-I32-NEXT: push {r4, lr}
+; LE-I32-NEXT: .vsave {d8, d9}
+; LE-I32-NEXT: vpush {d8, d9}
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: add r3, sp, #60
+; LE-I32-NEXT: ldr r12, [sp, #56]
+; LE-I32-NEXT: vmov.32 d8[0], r0
+; LE-I32-NEXT: ldm r3, {r1, r2, r3}
+; LE-I32-NEXT: mov r0, r12
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: add r3, sp, #40
+; LE-I32-NEXT: mov r4, r0
+; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: add r3, sp, #28
+; LE-I32-NEXT: ldr r12, [sp, #24]
+; LE-I32-NEXT: vmov.32 d9[0], r0
+; LE-I32-NEXT: ldm r3, {r1, r2, r3}
+; LE-I32-NEXT: mov r0, r12
+; LE-I32-NEXT: vmov.32 d9[1], r4
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: vmov.32 d8[1], r0
+; LE-I32-NEXT: vorr q0, q4, q4
+; LE-I32-NEXT: vpop {d8, d9}
+; LE-I32-NEXT: pop {r4, pc}
+;
+; LE-I64-LABEL: lrint_v4fp128:
+; LE-I64: @ %bb.0:
+; LE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I64-NEXT: .vsave {d8, d9, d10, d11}
+; LE-I64-NEXT: vpush {d8, d9, d10, d11}
+; LE-I64-NEXT: mov r5, r3
+; LE-I64-NEXT: add r3, sp, #96
+; LE-I64-NEXT: mov r7, r2
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: mov r4, r0
+; LE-I64-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: mov r9, r1
+; LE-I64-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEXT: mov r0, r4
+; LE-I64-NEXT: mov r1, r6
+; LE-I64-NEXT: mov r2, r7
+; LE-I64-NEXT: mov r3, r5
+; LE-I64-NEXT: ldr r8, [sp, #80]
+; LE-I64-NEXT: ldr r10, [sp, #64]
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: add r3, sp, #68
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEXT: mov r0, r10
+; LE-I64-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: add r3, sp, #84
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEXT: mov r0, r8
+; LE-I64-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEXT: vmov.32 d11[1], r4
+; LE-I64-NEXT: vmov.32 d9[1], r9
+; LE-I64-NEXT: vmov.32 d10[1], r5
+; LE-I64-NEXT: vmov.32 d8[1], r1
+; LE-I64-NEXT: vorr q0, q5, q5
+; LE-I64-NEXT: vorr q1, q4, q4
+; LE-I64-NEXT: vpop {d8, d9, d10, d11}
+; LE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; LE-I32-NEON-LABEL: lrint_v4fp128:
+; LE-I32-NEON: @ %bb.0:
+; LE-I32-NEON-NEXT: .save {r4, lr}
+; LE-I32-NEON-NEXT: push {r4, lr}
+; LE-I32-NEON-NEXT: .vsave {d8, d9}
+; LE-I32-NEON-NEXT: vpush {d8, d9}
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: add r3, sp, #60
+; LE-I32-NEON-NEXT: ldr r12, [sp, #56]
+; LE-I32-NEON-NEXT: vmov.32 d8[0], r0
+; LE-I32-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I32-NEON-NEXT: mov r0, r12
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: add r3, sp, #40
+; LE-I32-NEON-NEXT: mov r4, r0
+; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: add r3, sp, #28
+; LE-I32-NEON-NEXT: ldr r12, [sp, #24]
+; LE-I32-NEON-NEXT: vmov.32 d9[0], r0
+; LE-I32-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I32-NEON-NEXT: mov r0, r12
+; LE-I32-NEON-NEXT: vmov.32 d9[1], r4
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: vmov.32 d8[1], r0
+; LE-I32-NEON-NEXT: vorr q0, q4, q4
+; LE-I32-NEON-NEXT: vpop {d8, d9}
+; LE-I32-NEON-NEXT: pop {r4, pc}
+;
+; LE-I64-NEON-LABEL: lrint_v4fp128:
+; LE-I64-NEON: @ %bb.0:
+; LE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11}
+; LE-I64-NEON-NEXT: vpush {d8, d9, d10, d11}
+; LE-I64-NEON-NEXT: mov r5, r3
+; LE-I64-NEON-NEXT: add r3, sp, #96
+; LE-I64-NEON-NEXT: mov r7, r2
+; LE-I64-NEON-NEXT: mov r6, r1
+; LE-I64-NEON-NEXT: mov r4, r0
+; LE-I64-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: mov r9, r1
+; LE-I64-NEON-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEON-NEXT: mov r0, r4
+; LE-I64-NEON-NEXT: mov r1, r6
+; LE-I64-NEON-NEXT: mov r2, r7
+; LE-I64-NEON-NEXT: mov r3, r5
+; LE-I64-NEON-NEXT: ldr r8, [sp, #80]
+; LE-I64-NEON-NEXT: ldr r10, [sp, #64]
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: add r3, sp, #68
+; LE-I64-NEON-NEXT: mov r5, r1
+; LE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT: mov r0, r10
+; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: add r3, sp, #84
+; LE-I64-NEON-NEXT: mov r4, r1
+; LE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEON-NEXT: mov r0, r8
+; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT: vmov.32 d11[1], r4
+; LE-I64-NEON-NEXT: vmov.32 d9[1], r9
+; LE-I64-NEON-NEXT: vmov.32 d10[1], r5
+; LE-I64-NEON-NEXT: vmov.32 d8[1], r1
+; LE-I64-NEON-NEXT: vorr q0, q5, q5
+; LE-I64-NEON-NEXT: vorr q1, q4, q4
+; LE-I64-NEON-NEXT: vpop {d8, d9, d10, d11}
+; LE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; BE-I32-LABEL: lrint_v4fp128:
+; BE-I32: @ %bb.0:
+; BE-I32-NEXT: .save {r4, lr}
+; BE-I32-NEXT: push {r4, lr}
+; BE-I32-NEXT: .vsave {d8, d9}
+; BE-I32-NEXT: vpush {d8, d9}
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: add r3, sp, #60
+; BE-I32-NEXT: ldr r12, [sp, #56]
+; BE-I32-NEXT: vmov.32 d8[0], r0
+; BE-I32-NEXT: ldm r3, {r1, r2, r3}
+; BE-I32-NEXT: mov r0, r12
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: add r3, sp, #40
+; BE-I32-NEXT: mov r4, r0
+; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: add r3, sp, #28
+; BE-I32-NEXT: ldr r12, [sp, #24]
+; BE-I32-NEXT: vmov.32 d9[0], r0
+; BE-I32-NEXT: ldm r3, {r1, r2, r3}
+; BE-I32-NEXT: mov r0, r12
+; BE-I32-NEXT: vmov.32 d9[1], r4
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: vmov.32 d8[1], r0
+; BE-I32-NEXT: vrev64.32 q0, q4
+; BE-I32-NEXT: vpop {d8, d9}
+; BE-I32-NEXT: pop {r4, pc}
+;
+; BE-I64-LABEL: lrint_v4fp128:
+; BE-I64: @ %bb.0:
+; BE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-I64-NEXT: .vsave {d8, d9, d10}
+; BE-I64-NEXT: vpush {d8, d9, d10}
+; BE-I64-NEXT: mov r5, r3
+; BE-I64-NEXT: add r3, sp, #88
+; BE-I64-NEXT: mov r7, r2
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: mov r4, r0
+; BE-I64-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: mov r9, r1
+; BE-I64-NEXT: vmov.32 d8[0], r0
+; BE-I64-NEXT: mov r0, r4
+; BE-I64-NEXT: mov r1, r6
+; BE-I64-NEXT: mov r2, r7
+; BE-I64-NEXT: mov r3, r5
+; BE-I64-NEXT: ldr r8, [sp, #72]
+; BE-I64-NEXT: ldr r10, [sp, #56]
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: add r3, sp, #60
+; BE-I64-NEXT: mov r5, r1
+; BE-I64-NEXT: vmov.32 d9[0], r0
+; BE-I64-NEXT: mov r0, r10
+; BE-I64-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: add r3, sp, #76
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEXT: mov r0, r8
+; BE-I64-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: vmov.32 d10[1], r4
+; BE-I64-NEXT: vmov.32 d8[1], r9
+; BE-I64-NEXT: vmov.32 d9[1], r5
+; BE-I64-NEXT: vmov.32 d16[1], r1
+; BE-I64-NEXT: vrev64.32 d1, d10
+; BE-I64-NEXT: vrev64.32 d3, d8
+; BE-I64-NEXT: vrev64.32 d0, d9
+; BE-I64-NEXT: vrev64.32 d2, d16
+; BE-I64-NEXT: vpop {d8, d9, d10}
+; BE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; BE-I32-NEON-LABEL: lrint_v4fp128:
+; BE-I32-NEON: @ %bb.0:
+; BE-I32-NEON-NEXT: .save {r4, lr}
+; BE-I32-NEON-NEXT: push {r4, lr}
+; BE-I32-NEON-NEXT: .vsave {d8, d9}
+; BE-I32-NEON-NEXT: vpush {d8, d9}
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: add r3, sp, #60
+; BE-I32-NEON-NEXT: ldr r12, [sp, #56]
+; BE-I32-NEON-NEXT: vmov.32 d8[0], r0
+; BE-I32-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I32-NEON-NEXT: mov r0, r12
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: add r3, sp, #40
+; BE-I32-NEON-NEXT: mov r4, r0
+; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: add r3, sp, #28
+; BE-I32-NEON-NEXT: ldr r12, [sp, #24]
+; BE-I32-NEON-NEXT: vmov.32 d9[0], r0
+; BE-I32-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I32-NEON-NEXT: mov r0, r12
+; BE-I32-NEON-NEXT: vmov.32 d9[1], r4
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: vmov.32 d8[1], r0
+; BE-I32-NEON-NEXT: vrev64.32 q0, q4
+; BE-I32-NEON-NEXT: vpop {d8, d9}
+; BE-I32-NEON-NEXT: pop {r4, pc}
+;
+; BE-I64-NEON-LABEL: lrint_v4fp128:
+; BE-I64-NEON: @ %bb.0:
+; BE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-I64-NEON-NEXT: .vsave {d8, d9, d10}
+; BE-I64-NEON-NEXT: vpush {d8, d9, d10}
+; BE-I64-NEON-NEXT: mov r5, r3
+; BE-I64-NEON-NEXT: add r3, sp, #88
+; BE-I64-NEON-NEXT: mov r7, r2
+; BE-I64-NEON-NEXT: mov r6, r1
+; BE-I64-NEON-NEXT: mov r4, r0
+; BE-I64-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: mov r9, r1
+; BE-I64-NEON-NEXT: vmov.32 d8[0], r0
+; BE-I64-NEON-NEXT: mov r0, r4
+; BE-I64-NEON-NEXT: mov r1, r6
+; BE-I64-NEON-NEXT: mov r2, r7
+; BE-I64-NEON-NEXT: mov r3, r5
+; BE-I64-NEON-NEXT: ldr r8, [sp, #72]
+; BE-I64-NEON-NEXT: ldr r10, [sp, #56]
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: add r3, sp, #60
+; BE-I64-NEON-NEXT: mov r5, r1
+; BE-I64-NEON-NEXT: vmov.32 d9[0], r0
+; BE-I64-NEON-NEXT: mov r0, r10
+; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: add r3, sp, #76
+; BE-I64-NEON-NEXT: mov r4, r1
+; BE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT: mov r0, r8
+; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEON-NEXT: vmov.32 d10[1], r4
+; BE-I64-NEON-NEXT: vmov.32 d8[1], r9
+; BE-I64-NEON-NEXT: vmov.32 d9[1], r5
+; BE-I64-NEON-NEXT: vmov.32 d16[1], r1
+; BE-I64-NEON-NEXT: vrev64.32 d1, d10
+; BE-I64-NEON-NEXT: vrev64.32 d3, d8
+; BE-I64-NEON-NEXT: vrev64.32 d0, d9
+; BE-I64-NEON-NEXT: vrev64.32 d2, d16
+; BE-I64-NEON-NEXT: vpop {d8, d9, d10}
+; BE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+ %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4fp128(<4 x fp128> %x)
+ ret <4 x iXLen> %a
+}
+declare <4 x iXLen> @llvm.lrint.v4iXLen.v4fp128(<4 x fp128>)
+
+define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) {
+; LE-I32-LABEL: lrint_v8fp128:
+; LE-I32: @ %bb.0:
+; LE-I32-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I32-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I32-NEXT: .vsave {d8, d9, d10, d11}
+; LE-I32-NEXT: vpush {d8, d9, d10, d11}
+; LE-I32-NEXT: mov r6, r3
+; LE-I32-NEXT: add r3, sp, #112
+; LE-I32-NEXT: mov r7, r2
+; LE-I32-NEXT: mov r4, r1
+; LE-I32-NEXT: mov r5, r0
+; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: vmov.32 d8[0], r0
+; LE-I32-NEXT: mov r0, r5
+; LE-I32-NEXT: mov r1, r4
+; LE-I32-NEXT: mov r2, r7
+; LE-I32-NEXT: mov r3, r6
+; LE-I32-NEXT: ldr r8, [sp, #160]
+; LE-I32-NEXT: ldr r9, [sp, #64]
+; LE-I32-NEXT: ldr r10, [sp, #80]
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: add r3, sp, #84
+; LE-I32-NEXT: vmov.32 d10[0], r0
+; LE-I32-NEXT: mov r0, r10
+; LE-I32-NEXT: ldm r3, {r1, r2, r3}
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: ldr r6, [sp, #96]
+; LE-I32-NEXT: vmov.32 d11[0], r0
+; LE-I32-NEXT: ldr r1, [sp, #100]
+; LE-I32-NEXT: ldr r2, [sp, #104]
+; LE-I32-NEXT: ldr r3, [sp, #108]
+; LE-I32-NEXT: mov r0, r6
+; LE-I32-NEXT: ldr r4, [sp, #68]
+; LE-I32-NEXT: ldr r5, [sp, #72]
+; LE-I32-NEXT: ldr r10, [sp, #164]
+; LE-I32-NEXT: ldr r7, [sp, #168]
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: ldr r3, [sp, #76]
+; LE-I32-NEXT: vmov.32 d11[1], r0
+; LE-I32-NEXT: mov r0, r9
+; LE-I32-NEXT: mov r1, r4
+; LE-I32-NEXT: mov r2, r5
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: ldr r3, [sp, #172]
+; LE-I32-NEXT: vmov.32 d10[1], r0
+; LE-I32-NEXT: mov r0, r8
+; LE-I32-NEXT: mov r1, r10
+; LE-I32-NEXT: mov r2, r7
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: add r3, sp, #144
+; LE-I32-NEXT: mov r4, r0
+; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: add r3, sp, #132
+; LE-I32-NEXT: ldr r7, [sp, #128]
+; LE-I32-NEXT: vmov.32 d9[0], r0
+; LE-I32-NEXT: ldm r3, {r1, r2, r3}
+; LE-I32-NEXT: mov r0, r7
+; LE-I32-NEXT: vmov.32 d9[1], r4
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: vmov.32 d8[1], r0
+; LE-I32-NEXT: vorr q0, q5, q5
+; LE-I32-NEXT: vorr q1, q4, q4
+; LE-I32-NEXT: vpop {d8, d9, d10, d11}
+; LE-I32-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; LE-I64-LABEL: lrint_v8fp128:
+; LE-I64: @ %bb.0:
+; LE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT: .pad #4
+; LE-I64-NEXT: sub sp, sp, #4
+; LE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: .pad #8
+; LE-I64-NEXT: sub sp, sp, #8
+; LE-I64-NEXT: mov r11, r3
+; LE-I64-NEXT: add r3, sp, #208
+; LE-I64-NEXT: mov r10, r2
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: mov r5, r0
+; LE-I64-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: add r7, sp, #164
+; LE-I64-NEXT: ldr r6, [sp, #160]
+; LE-I64-NEXT: str r1, [sp, #4] @ 4-byte Spill
+; LE-I64-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEXT: ldm r7, {r1, r2, r3, r7}
+; LE-I64-NEXT: mov r0, r6
+; LE-I64-NEXT: ldr r8, [sp, #128]
+; LE-I64-NEXT: ldr r9, [sp, #144]
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: add r3, sp, #180
+; LE-I64-NEXT: str r1, [sp] @ 4-byte Spill
+; LE-I64-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEXT: mov r0, r7
+; LE-I64-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: add r3, sp, #132
+; LE-I64-NEXT: mov r7, r1
+; LE-I64-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEXT: mov r0, r8
+; LE-I64-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: add r3, sp, #148
+; LE-I64-NEXT: mov r8, r1
+; LE-I64-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEXT: mov r0, r9
+; LE-I64-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: mov r9, r1
+; LE-I64-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEXT: mov r0, r5
+; LE-I64-NEXT: mov r1, r4
+; LE-I64-NEXT: mov r2, r10
+; LE-I64-NEXT: mov r3, r11
+; LE-I64-NEXT: ldr r6, [sp, #112]
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: add r3, sp, #116
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEXT: mov r0, r6
+; LE-I64-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: add r3, sp, #196
+; LE-I64-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEXT: ldr r0, [sp, #192]
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEXT: ldr r0, [sp] @ 4-byte Reload
+; LE-I64-NEXT: vmov.32 d11[1], r7
+; LE-I64-NEXT: vmov.32 d10[1], r0
+; LE-I64-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
+; LE-I64-NEXT: vmov.32 d15[1], r5
+; LE-I64-NEXT: vorr q2, q5, q5
+; LE-I64-NEXT: vmov.32 d13[1], r9
+; LE-I64-NEXT: vmov.32 d9[1], r0
+; LE-I64-NEXT: vmov.32 d14[1], r4
+; LE-I64-NEXT: vmov.32 d12[1], r8
+; LE-I64-NEXT: vorr q0, q7, q7
+; LE-I64-NEXT: vmov.32 d8[1], r1
+; LE-I64-NEXT: vorr q1, q6, q6
+; LE-I64-NEXT: vorr q3, q4, q4
+; LE-I64-NEXT: add sp, sp, #8
+; LE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: add sp, sp, #4
+; LE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; LE-I32-NEON-LABEL: lrint_v8fp128:
+; LE-I32-NEON: @ %bb.0:
+; LE-I32-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I32-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11}
+; LE-I32-NEON-NEXT: vpush {d8, d9, d10, d11}
+; LE-I32-NEON-NEXT: mov r6, r3
+; LE-I32-NEON-NEXT: add r3, sp, #112
+; LE-I32-NEON-NEXT: mov r7, r2
+; LE-I32-NEON-NEXT: mov r4, r1
+; LE-I32-NEON-NEXT: mov r5, r0
+; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: vmov.32 d8[0], r0
+; LE-I32-NEON-NEXT: mov r0, r5
+; LE-I32-NEON-NEXT: mov r1, r4
+; LE-I32-NEON-NEXT: mov r2, r7
+; LE-I32-NEON-NEXT: mov r3, r6
+; LE-I32-NEON-NEXT: ldr r8, [sp, #160]
+; LE-I32-NEON-NEXT: ldr r9, [sp, #64]
+; LE-I32-NEON-NEXT: ldr r10, [sp, #80]
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: add r3, sp, #84
+; LE-I32-NEON-NEXT: vmov.32 d10[0], r0
+; LE-I32-NEON-NEXT: mov r0, r10
+; LE-I32-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: ldr r6, [sp, #96]
+; LE-I32-NEON-NEXT: vmov.32 d11[0], r0
+; LE-I32-NEON-NEXT: ldr r1, [sp, #100]
+; LE-I32-NEON-NEXT: ldr r2, [sp, #104]
+; LE-I32-NEON-NEXT: ldr r3, [sp, #108]
+; LE-I32-NEON-NEXT: mov r0, r6
+; LE-I32-NEON-NEXT: ldr r4, [sp, #68]
+; LE-I32-NEON-NEXT: ldr r5, [sp, #72]
+; LE-I32-NEON-NEXT: ldr r10, [sp, #164]
+; LE-I32-NEON-NEXT: ldr r7, [sp, #168]
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: ldr r3, [sp, #76]
+; LE-I32-NEON-NEXT: vmov.32 d11[1], r0
+; LE-I32-NEON-NEXT: mov r0, r9
+; LE-I32-NEON-NEXT: mov r1, r4
+; LE-I32-NEON-NEXT: mov r2, r5
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: ldr r3, [sp, #172]
+; LE-I32-NEON-NEXT: vmov.32 d10[1], r0
+; LE-I32-NEON-NEXT: mov r0, r8
+; LE-I32-NEON-NEXT: mov r1, r10
+; LE-I32-NEON-NEXT: mov r2, r7
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: add r3, sp, #144
+; LE-I32-NEON-NEXT: mov r4, r0
+; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: add r3, sp, #132
+; LE-I32-NEON-NEXT: ldr r7, [sp, #128]
+; LE-I32-NEON-NEXT: vmov.32 d9[0], r0
+; LE-I32-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I32-NEON-NEXT: mov r0, r7
+; LE-I32-NEON-NEXT: vmov.32 d9[1], r4
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: vmov.32 d8[1], r0
+; LE-I32-NEON-NEXT: vorr q0, q5, q5
+; LE-I32-NEON-NEXT: vorr q1, q4, q4
+; LE-I32-NEON-NEXT: vpop {d8, d9, d10, d11}
+; LE-I32-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; LE-I64-NEON-LABEL: lrint_v8fp128:
+; LE-I64-NEON: @ %bb.0:
+; LE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEON-NEXT: .pad #4
+; LE-I64-NEON-NEXT: sub sp, sp, #4
+; LE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT: .pad #8
+; LE-I64-NEON-NEXT: sub sp, sp, #8
+; LE-I64-NEON-NEXT: mov r11, r3
+; LE-I64-NEON-NEXT: add r3, sp, #208
+; LE-I64-NEON-NEXT: mov r10, r2
+; LE-I64-NEON-NEXT: mov r4, r1
+; LE-I64-NEON-NEXT: mov r5, r0
+; LE-I64-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: add r7, sp, #164
+; LE-I64-NEON-NEXT: ldr r6, [sp, #160]
+; LE-I64-NEON-NEXT: str r1, [sp, #4] @ 4-byte Spill
+; LE-I64-NEON-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEON-NEXT: ldm r7, {r1, r2, r3, r7}
+; LE-I64-NEON-NEXT: mov r0, r6
+; LE-I64-NEON-NEXT: ldr r8, [sp, #128]
+; LE-I64-NEON-NEXT: ldr r9, [sp, #144]
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: add r3, sp, #180
+; LE-I64-NEON-NEXT: str r1, [sp] @ 4-byte Spill
+; LE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT: mov r0, r7
+; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: add r3, sp, #132
+; LE-I64-NEON-NEXT: mov r7, r1
+; LE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEON-NEXT: mov r0, r8
+; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: add r3, sp, #148
+; LE-I64-NEON-NEXT: mov r8, r1
+; LE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT: mov r0, r9
+; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: mov r9, r1
+; LE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT: mov r0, r5
+; LE-I64-NEON-NEXT: mov r1, r4
+; LE-I64-NEON-NEXT: mov r2, r10
+; LE-I64-NEON-NEXT: mov r3, r11
+; LE-I64-NEON-NEXT: ldr r6, [sp, #112]
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: add r3, sp, #116
+; LE-I64-NEON-NEXT: mov r4, r1
+; LE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEON-NEXT: mov r0, r6
+; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: add r3, sp, #196
+; LE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEON-NEXT: ldr r0, [sp, #192]
+; LE-I64-NEON-NEXT: mov r5, r1
+; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT: ldr r0, [sp] @ 4-byte Reload
+; LE-I64-NEON-NEXT: vmov.32 d11[1], r7
+; LE-I64-NEON-NEXT: vmov.32 d10[1], r0
+; LE-I64-NEON-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
+; LE-I64-NEON-NEXT: vmov.32 d15[1], r5
+; LE-I64-NEON-NEXT: vorr q2, q5, q5
+; LE-I64-NEON-NEXT: vmov.32 d13[1], r9
+; LE-I64-NEON-NEXT: vmov.32 d9[1], r0
+; LE-I64-NEON-NEXT: vmov.32 d14[1], r4
+; LE-I64-NEON-NEXT: vmov.32 d12[1], r8
+; LE-I64-NEON-NEXT: vorr q0, q7, q7
+; LE-I64-NEON-NEXT: vmov.32 d8[1], r1
+; LE-I64-NEON-NEXT: vorr q1, q6, q6
+; LE-I64-NEON-NEXT: vorr q3, q4, q4
+; LE-I64-NEON-NEXT: add sp, sp, #8
+; LE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT: add sp, sp, #4
+; LE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I32-LABEL: lrint_v8fp128:
+; BE-I32: @ %bb.0:
+; BE-I32-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I32-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I32-NEXT: .pad #4
+; BE-I32-NEXT: sub sp, sp, #4
+; BE-I32-NEXT: .vsave {d8, d9, d10, d11}
+; BE-I32-NEXT: vpush {d8, d9, d10, d11}
+; BE-I32-NEXT: .pad #8
+; BE-I32-NEXT: sub sp, sp, #8
+; BE-I32-NEXT: str r3, [sp, #4] @ 4-byte Spill
+; BE-I32-NEXT: add r3, sp, #128
+; BE-I32-NEXT: mov r11, r2
+; BE-I32-NEXT: mov r6, r1
+; BE-I32-NEXT: mov r7, r0
+; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: add r3, sp, #100
+; BE-I32-NEXT: ldr r5, [sp, #96]
+; BE-I32-NEXT: vmov.32 d8[0], r0
+; BE-I32-NEXT: ldr r4, [sp, #160]
+; BE-I32-NEXT: ldm r3, {r1, r2, r3}
+; BE-I32-NEXT: mov r0, r5
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: add r3, sp, #164
+; BE-I32-NEXT: vmov.32 d11[0], r0
+; BE-I32-NEXT: mov r0, r4
+; BE-I32-NEXT: ldm r3, {r1, r2, r3}
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: ldr r4, [sp, #176]
+; BE-I32-NEXT: vmov.32 d9[0], r0
+; BE-I32-NEXT: ldr r1, [sp, #180]
+; BE-I32-NEXT: ldr r2, [sp, #184]
+; BE-I32-NEXT: ldr r3, [sp, #188]
+; BE-I32-NEXT: mov r0, r4
+; BE-I32-NEXT: ldr r5, [sp, #116]
+; BE-I32-NEXT: ldr r8, [sp, #120]
+; BE-I32-NEXT: ldr r10, [sp, #84]
+; BE-I32-NEXT: ldr r9, [sp, #88]
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: vmov.32 d9[1], r0
+; BE-I32-NEXT: ldr r3, [sp, #124]
+; BE-I32-NEXT: ldr r0, [sp, #112]
+; BE-I32-NEXT: mov r1, r5
+; BE-I32-NEXT: mov r2, r8
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: vmov.32 d11[1], r0
+; BE-I32-NEXT: ldr r3, [sp, #92]
+; BE-I32-NEXT: ldr r0, [sp, #80]
+; BE-I32-NEXT: mov r1, r10
+; BE-I32-NEXT: mov r2, r9
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: ldr r3, [sp, #4] @ 4-byte Reload
+; BE-I32-NEXT: mov r4, r0
+; BE-I32-NEXT: mov r0, r7
+; BE-I32-NEXT: mov r1, r6
+; BE-I32-NEXT: mov r2, r11
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: add r3, sp, #148
+; BE-I32-NEXT: ldr r7, [sp, #144]
+; BE-I32-NEXT: vmov.32 d10[0], r0
+; BE-I32-NEXT: ldm r3, {r1, r2, r3}
+; BE-I32-NEXT: mov r0, r7
+; BE-I32-NEXT: vmov.32 d10[1], r4
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: vmov.32 d8[1], r0
+; BE-I32-NEXT: vrev64.32 q0, q5
+; BE-I32-NEXT: vrev64.32 q1, q4
+; BE-I32-NEXT: add sp, sp, #8
+; BE-I32-NEXT: vpop {d8, d9, d10, d11}
+; BE-I32-NEXT: add sp, sp, #4
+; BE-I32-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I64-LABEL: lrint_v8fp128:
+; BE-I64: @ %bb.0:
+; BE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT: .pad #4
+; BE-I64-NEXT: sub sp, sp, #4
+; BE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14}
+; BE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14}
+; BE-I64-NEXT: .pad #16
+; BE-I64-NEXT: sub sp, sp, #16
+; BE-I64-NEXT: str r3, [sp, #4] @ 4-byte Spill
+; BE-I64-NEXT: add r3, sp, #208
+; BE-I64-NEXT: mov r11, r2
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: mov r5, r0
+; BE-I64-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: ldr r7, [sp, #176]
+; BE-I64-NEXT: add r3, sp, #180
+; BE-I64-NEXT: str r1, [sp, #12] @ 4-byte Spill
+; BE-I64-NEXT: vmov.32 d8[0], r0
+; BE-I64-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEXT: mov r0, r7
+; BE-I64-NEXT: ldr r6, [sp, #128]
+; BE-I64-NEXT: ldr r8, [sp, #144]
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: add r3, sp, #132
+; BE-I64-NEXT: str r1, [sp, #8] @ 4-byte Spill
+; BE-I64-NEXT: vmov.32 d9[0], r0
+; BE-I64-NEXT: mov r0, r6
+; BE-I64-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: add r3, sp, #148
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEXT: mov r0, r8
+; BE-I64-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: add r3, sp, #160
+; BE-I64-NEXT: mov r9, r0
+; BE-I64-NEXT: mov r7, r1
+; BE-I64-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: ldr r3, [sp, #4] @ 4-byte Reload
+; BE-I64-NEXT: mov r8, r1
+; BE-I64-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEXT: mov r0, r5
+; BE-I64-NEXT: mov r1, r4
+; BE-I64-NEXT: mov r2, r11
+; BE-I64-NEXT: ldr r10, [sp, #112]
+; BE-I64-NEXT: vmov.32 d12[0], r9
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: add r3, sp, #116
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEXT: mov r0, r10
+; BE-I64-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: add r3, sp, #196
+; BE-I64-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEXT: ldr r0, [sp, #192]
+; BE-I64-NEXT: mov r5, r1
+; BE-I64-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
+; BE-I64-NEXT: vmov.32 d14[1], r5
+; BE-I64-NEXT: vmov.32 d9[1], r0
+; BE-I64-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
+; BE-I64-NEXT: vmov.32 d12[1], r7
+; BE-I64-NEXT: vmov.32 d8[1], r0
+; BE-I64-NEXT: vmov.32 d13[1], r4
+; BE-I64-NEXT: vmov.32 d10[1], r6
+; BE-I64-NEXT: vmov.32 d11[1], r8
+; BE-I64-NEXT: vmov.32 d16[1], r1
+; BE-I64-NEXT: vrev64.32 d1, d14
+; BE-I64-NEXT: vrev64.32 d3, d12
+; BE-I64-NEXT: vrev64.32 d5, d9
+; BE-I64-NEXT: vrev64.32 d7, d8
+; BE-I64-NEXT: vrev64.32 d0, d13
+; BE-I64-NEXT: vrev64.32 d2, d10
+; BE-I64-NEXT: vrev64.32 d4, d11
+; BE-I64-NEXT: vrev64.32 d6, d16
+; BE-I64-NEXT: add sp, sp, #16
+; BE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14}
+; BE-I64-NEXT: add sp, sp, #4
+; BE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I32-NEON-LABEL: lrint_v8fp128:
+; BE-I32-NEON: @ %bb.0:
+; BE-I32-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I32-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I32-NEON-NEXT: .pad #4
+; BE-I32-NEON-NEXT: sub sp, sp, #4
+; BE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11}
+; BE-I32-NEON-NEXT: vpush {d8, d9, d10, d11}
+; BE-I32-NEON-NEXT: .pad #8
+; BE-I32-NEON-NEXT: sub sp, sp, #8
+; BE-I32-NEON-NEXT: str r3, [sp, #4] @ 4-byte Spill
+; BE-I32-NEON-NEXT: add r3, sp, #128
+; BE-I32-NEON-NEXT: mov r11, r2
+; BE-I32-NEON-NEXT: mov r6, r1
+; BE-I32-NEON-NEXT: mov r7, r0
+; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: add r3, sp, #100
+; BE-I32-NEON-NEXT: ldr r5, [sp, #96]
+; BE-I32-NEON-NEXT: vmov.32 d8[0], r0
+; BE-I32-NEON-NEXT: ldr r4, [sp, #160]
+; BE-I32-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I32-NEON-NEXT: mov r0, r5
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: add r3, sp, #164
+; BE-I32-NEON-NEXT: vmov.32 d11[0], r0
+; BE-I32-NEON-NEXT: mov r0, r4
+; BE-I32-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: ldr r4, [sp, #176]
+; BE-I32-NEON-NEXT: vmov.32 d9[0], r0
+; BE-I32-NEON-NEXT: ldr r1, [sp, #180]
+; BE-I32-NEON-NEXT: ldr r2, [sp, #184]
+; BE-I32-NEON-NEXT: ldr r3, [sp, #188]
+; BE-I32-NEON-NEXT: mov r0, r4
+; BE-I32-NEON-NEXT: ldr r5, [sp, #116]
+; BE-I32-NEON-NEXT: ldr r8, [sp, #120]
+; BE-I32-NEON-NEXT: ldr r10, [sp, #84]
+; BE-I32-NEON-NEXT: ldr r9, [sp, #88]
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: vmov.32 d9[1], r0
+; BE-I32-NEON-NEXT: ldr r3, [sp, #124]
+; BE-I32-NEON-NEXT: ldr r0, [sp, #112]
+; BE-I32-NEON-NEXT: mov r1, r5
+; BE-I32-NEON-NEXT: mov r2, r8
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: vmov.32 d11[1], r0
+; BE-I32-NEON-NEXT: ldr r3, [sp, #92]
+; BE-I32-NEON-NEXT: ldr r0, [sp, #80]
+; BE-I32-NEON-NEXT: mov r1, r10
+; BE-I32-NEON-NEXT: mov r2, r9
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: ldr r3, [sp, #4] @ 4-byte Reload
+; BE-I32-NEON-NEXT: mov r4, r0
+; BE-I32-NEON-NEXT: mov r0, r7
+; BE-I32-NEON-NEXT: mov r1, r6
+; BE-I32-NEON-NEXT: mov r2, r11
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: add r3, sp, #148
+; BE-I32-NEON-NEXT: ldr r7, [sp, #144]
+; BE-I32-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I32-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I32-NEON-NEXT: mov r0, r7
+; BE-I32-NEON-NEXT: vmov.32 d10[1], r4
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: vmov.32 d8[1], r0
+; BE-I32-NEON-NEXT: vrev64.32 q0, q5
+; BE-I32-NEON-NEXT: vrev64.32 q1, q4
+; BE-I32-NEON-NEXT: add sp, sp, #8
+; BE-I32-NEON-NEXT: vpop {d8, d9, d10, d11}
+; BE-I32-NEON-NEXT: add sp, sp, #4
+; BE-I32-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I64-NEON-LABEL: lrint_v8fp128:
+; BE-I64-NEON: @ %bb.0:
+; BE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEON-NEXT: .pad #4
+; BE-I64-NEON-NEXT: sub sp, sp, #4
+; BE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14}
+; BE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14}
+; BE-I64-NEON-NEXT: .pad #16
+; BE-I64-NEON-NEXT: sub sp, sp, #16
+; BE-I64-NEON-NEXT: str r3, [sp, #4] @ 4-byte Spill
+; BE-I64-NEON-NEXT: add r3, sp, #208
+; BE-I64-NEON-NEXT: mov r11, r2
+; BE-I64-NEON-NEXT: mov r4, r1
+; BE-I64-NEON-NEXT: mov r5, r0
+; BE-I64-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: ldr r7, [sp, #176]
+; BE-I64-NEON-NEXT: add r3, sp, #180
+; BE-I64-NEON-NEXT: str r1, [sp, #12] @ 4-byte Spill
+; BE-I64-NEON-NEXT: vmov.32 d8[0], r0
+; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT: mov r0, r7
+; BE-I64-NEON-NEXT: ldr r6, [sp, #128]
+; BE-I64-NEON-NEXT: ldr r8, [sp, #144]
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: add r3, sp, #132
+; BE-I64-NEON-NEXT: str r1, [sp, #8] @ 4-byte Spill
+; BE-I64-NEON-NEXT: vmov.32 d9[0], r0
+; BE-I64-NEON-NEXT: mov r0, r6
+; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: add r3, sp, #148
+; BE-I64-NEON-NEXT: mov r6, r1
+; BE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT: mov r0, r8
+; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: add r3, sp, #160
+; BE-I64-NEON-NEXT: mov r9, r0
+; BE-I64-NEON-NEXT: mov r7, r1
+; BE-I64-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: ldr r3, [sp, #4] @ 4-byte Reload
+; BE-I64-NEON-NEXT: mov r8, r1
+; BE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT: mov r0, r5
+; BE-I64-NEON-NEXT: mov r1, r4
+; BE-I64-NEON-NEXT: mov r2, r11
+; BE-I64-NEON-NEXT: ldr r10, [sp, #112]
+; BE-I64-NEON-NEXT: vmov.32 d12[0], r9
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: add r3, sp, #116
+; BE-I64-NEON-NEXT: mov r4, r1
+; BE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEON-NEXT: mov r0, r10
+; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: add r3, sp, #196
+; BE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT: ldr r0, [sp, #192]
+; BE-I64-NEON-NEXT: mov r5, r1
+; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEON-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
+; BE-I64-NEON-NEXT: vmov.32 d14[1], r5
+; BE-I64-NEON-NEXT: vmov.32 d9[1], r0
+; BE-I64-NEON-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
+; BE-I64-NEON-NEXT: vmov.32 d12[1], r7
+; BE-I64-NEON-NEXT: vmov.32 d8[1], r0
+; BE-I64-NEON-NEXT: vmov.32 d13[1], r4
+; BE-I64-NEON-NEXT: vmov.32 d10[1], r6
+; BE-I64-NEON-NEXT: vmov.32 d11[1], r8
+; BE-I64-NEON-NEXT: vmov.32 d16[1], r1
+; BE-I64-NEON-NEXT: vrev64.32 d1, d14
+; BE-I64-NEON-NEXT: vrev64.32 d3, d12
+; BE-I64-NEON-NEXT: vrev64.32 d5, d9
+; BE-I64-NEON-NEXT: vrev64.32 d7, d8
+; BE-I64-NEON-NEXT: vrev64.32 d0, d13
+; BE-I64-NEON-NEXT: vrev64.32 d2, d10
+; BE-I64-NEON-NEXT: vrev64.32 d4, d11
+; BE-I64-NEON-NEXT: vrev64.32 d6, d16
+; BE-I64-NEON-NEXT: add sp, sp, #16
+; BE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14}
+; BE-I64-NEON-NEXT: add sp, sp, #4
+; BE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+ %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8fp128(<8 x fp128> %x)
+ ret <8 x iXLen> %a
+}
+declare <8 x iXLen> @llvm.lrint.v8iXLen.v8fp128(<8 x fp128>)
+
+define <16 x iXLen> @lrint_v16fp128(<16 x fp128> %x) {
+; LE-I32-LABEL: lrint_v16fp128:
+; LE-I32: @ %bb.0:
+; LE-I32-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I32-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I32-NEXT: .pad #4
+; LE-I32-NEXT: sub sp, sp, #4
+; LE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT: mov r8, r3
+; LE-I32-NEXT: add r3, sp, #280
+; LE-I32-NEXT: mov r9, r2
+; LE-I32-NEXT: mov r10, r1
+; LE-I32-NEXT: mov r6, r0
+; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: ldr r4, [sp, #216]
+; LE-I32-NEXT: vmov.32 d8[0], r0
+; LE-I32-NEXT: ldr r1, [sp, #220]
+; LE-I32-NEXT: ldr r2, [sp, #224]
+; LE-I32-NEXT: ldr r3, [sp, #228]
+; LE-I32-NEXT: mov r0, r4
+; LE-I32-NEXT: ldr r7, [sp, #152]
+; LE-I32-NEXT: ldr r11, [sp, #104]
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: add r3, sp, #156
+; LE-I32-NEXT: vmov.32 d10[0], r0
+; LE-I32-NEXT: mov r0, r7
+; LE-I32-NEXT: ldm r3, {r1, r2, r3}
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: ldr r7, [sp, #184]
+; LE-I32-NEXT: vmov.32 d12[0], r0
+; LE-I32-NEXT: ldr r1, [sp, #188]
+; LE-I32-NEXT: ldr r2, [sp, #192]
+; LE-I32-NEXT: ldr r3, [sp, #196]
+; LE-I32-NEXT: mov r0, r7
+; LE-I32-NEXT: ldr r4, [sp, #120]
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: add r3, sp, #124
+; LE-I32-NEXT: vmov.32 d13[0], r0
+; LE-I32-NEXT: mov r0, r4
+; LE-I32-NEXT: ldm r3, {r1, r2, r3}
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: ldr r5, [sp, #136]
+; LE-I32-NEXT: vmov.32 d15[0], r0
+; LE-I32-NEXT: ldr r1, [sp, #140]
+; LE-I32-NEXT: ldr r2, [sp, #144]
+; LE-I32-NEXT: ldr r3, [sp, #148]
+; LE-I32-NEXT: mov r0, r5
+; LE-I32-NEXT: ldr r4, [sp, #108]
+; LE-I32-NEXT: ldr r7, [sp, #112]
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: ldr r3, [sp, #116]
+; LE-I32-NEXT: vmov.32 d15[1], r0
+; LE-I32-NEXT: mov r0, r11
+; LE-I32-NEXT: mov r1, r4
+; LE-I32-NEXT: mov r2, r7
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: mov r4, r0
+; LE-I32-NEXT: mov r0, r6
+; LE-I32-NEXT: mov r1, r10
+; LE-I32-NEXT: mov r2, r9
+; LE-I32-NEXT: mov r3, r8
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: ldr r7, [sp, #200]
+; LE-I32-NEXT: vmov.32 d14[0], r0
+; LE-I32-NEXT: ldr r1, [sp, #204]
+; LE-I32-NEXT: ldr r2, [sp, #208]
+; LE-I32-NEXT: ldr r3, [sp, #212]
+; LE-I32-NEXT: mov r0, r7
+; LE-I32-NEXT: ldr r5, [sp, #172]
+; LE-I32-NEXT: vmov.32 d14[1], r4
+; LE-I32-NEXT: ldr r6, [sp, #176]
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: vmov.32 d13[1], r0
+; LE-I32-NEXT: ldr r3, [sp, #180]
+; LE-I32-NEXT: ldr r0, [sp, #168]
+; LE-I32-NEXT: mov r1, r5
+; LE-I32-NEXT: mov r2, r6
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: add r3, sp, #248
+; LE-I32-NEXT: mov r5, r0
+; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: ldr r4, [sp, #264]
+; LE-I32-NEXT: vmov.32 d11[0], r0
+; LE-I32-NEXT: ldr r1, [sp, #268]
+; LE-I32-NEXT: ldr r2, [sp, #272]
+; LE-I32-NEXT: vmov.32 d12[1], r5
+; LE-I32-NEXT: ldr r3, [sp, #276]
+; LE-I32-NEXT: mov r0, r4
+; LE-I32-NEXT: ldr r6, [sp, #236]
+; LE-I32-NEXT: ldr r7, [sp, #240]
+; LE-I32-NEXT: ldr r8, [sp, #332]
+; LE-I32-NEXT: ldr r5, [sp, #336]
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: vmov.32 d11[1], r0
+; LE-I32-NEXT: ldr r3, [sp, #244]
+; LE-I32-NEXT: ldr r0, [sp, #232]
+; LE-I32-NEXT: mov r1, r6
+; LE-I32-NEXT: mov r2, r7
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: vmov.32 d10[1], r0
+; LE-I32-NEXT: ldr r3, [sp, #340]
+; LE-I32-NEXT: ldr r0, [sp, #328]
+; LE-I32-NEXT: mov r1, r8
+; LE-I32-NEXT: mov r2, r5
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: add r3, sp, #312
+; LE-I32-NEXT: mov r4, r0
+; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: add r3, sp, #300
+; LE-I32-NEXT: ldr r7, [sp, #296]
+; LE-I32-NEXT: vmov.32 d9[0], r0
+; LE-I32-NEXT: ldm r3, {r1, r2, r3}
+; LE-I32-NEXT: mov r0, r7
+; LE-I32-NEXT: vmov.32 d9[1], r4
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: vmov.32 d8[1], r0
+; LE-I32-NEXT: vorr q0, q7, q7
+; LE-I32-NEXT: vorr q1, q6, q6
+; LE-I32-NEXT: vorr q2, q5, q5
+; LE-I32-NEXT: vorr q3, q4, q4
+; LE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT: add sp, sp, #4
+; LE-I32-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; LE-I64-LABEL: lrint_v16fp128:
+; LE-I64: @ %bb.0:
+; LE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT: .pad #4
+; LE-I64-NEXT: sub sp, sp, #4
+; LE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: .pad #72
+; LE-I64-NEXT: sub sp, sp, #72
+; LE-I64-NEXT: mov r6, r3
+; LE-I64-NEXT: add r3, sp, #408
+; LE-I64-NEXT: mov r7, r2
+; LE-I64-NEXT: mov r4, r0
+; LE-I64-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: add r5, sp, #176
+; LE-I64-NEXT: mov r10, r1
+; LE-I64-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEXT: mov r0, r7
+; LE-I64-NEXT: ldm r5, {r2, r3, r5}
+; LE-I64-NEXT: mov r1, r6
+; LE-I64-NEXT: ldr r8, [sp, #232]
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: add r3, sp, #188
+; LE-I64-NEXT: mov r9, r1
+; LE-I64-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEXT: mov r0, r5
+; LE-I64-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: add r3, sp, #236
+; LE-I64-NEXT: mov r11, r1
+; LE-I64-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEXT: mov r0, r8
+; LE-I64-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: add r3, sp, #252
+; LE-I64-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEXT: ldr r0, [sp, #248]
+; LE-I64-NEXT: mov r8, r1
+; LE-I64-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: add r3, sp, #268
+; LE-I64-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEXT: ldr r0, [sp, #264]
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: add r3, sp, #284
+; LE-I64-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEXT: ldr r0, [sp, #280]
+; LE-I64-NEXT: mov r7, r1
+; LE-I64-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: add r3, sp, #316
+; LE-I64-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEXT: ldr r0, [sp, #312]
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: vmov.32 d15[1], r5
+; LE-I64-NEXT: add lr, sp, #56
+; LE-I64-NEXT: ldr r5, [sp, #300]
+; LE-I64-NEXT: vmov.32 d14[1], r7
+; LE-I64-NEXT: ldr r2, [sp, #304]
+; LE-I64-NEXT: ldr r3, [sp, #308]
+; LE-I64-NEXT: vmov.32 d11[1], r6
+; LE-I64-NEXT: ldr r6, [sp, #200]
+; LE-I64-NEXT: ldr r7, [sp, #204]
+; LE-I64-NEXT: vmov.32 d10[1], r8
+; LE-I64-NEXT: ldr r8, [sp, #344]
+; LE-I64-NEXT: vmov.32 d9[1], r11
+; LE-I64-NEXT: ldr r11, [sp, #216]
+; LE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #40
+; LE-I64-NEXT: vmov.32 d17[0], r0
+; LE-I64-NEXT: ldr r0, [sp, #296]
+; LE-I64-NEXT: vmov.32 d8[1], r9
+; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #24
+; LE-I64-NEXT: vorr q5, q8, q8
+; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEXT: vorr q4, q6, q6
+; LE-I64-NEXT: vmov.32 d11[1], r1
+; LE-I64-NEXT: mov r1, r5
+; LE-I64-NEXT: vmov.32 d9[1], r10
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEXT: ldr r2, [sp, #208]
+; LE-I64-NEXT: ldr r3, [sp, #212]
+; LE-I64-NEXT: add lr, sp, #8
+; LE-I64-NEXT: mov r9, r1
+; LE-I64-NEXT: mov r0, r6
+; LE-I64-NEXT: mov r1, r7
+; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: add r3, sp, #220
+; LE-I64-NEXT: mov r10, r1
+; LE-I64-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEXT: mov r0, r11
+; LE-I64-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: add r3, sp, #348
+; LE-I64-NEXT: mov r11, r1
+; LE-I64-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEXT: mov r0, r8
+; LE-I64-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: add r3, sp, #364
+; LE-I64-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEXT: ldr r0, [sp, #360]
+; LE-I64-NEXT: mov r8, r1
+; LE-I64-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: add r3, sp, #380
+; LE-I64-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEXT: ldr r0, [sp, #376]
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: add r3, sp, #396
+; LE-I64-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEXT: ldr r0, [sp, #392]
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: add r3, sp, #332
+; LE-I64-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEXT: ldr r0, [sp, #328]
+; LE-I64-NEXT: mov r7, r1
+; LE-I64-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: add lr, sp, #8
+; LE-I64-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEXT: add r0, r4, #64
+; LE-I64-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #24
+; LE-I64-NEXT: vmov.32 d13[1], r8
+; LE-I64-NEXT: vmov.32 d18[1], r9
+; LE-I64-NEXT: vmov.32 d15[1], r6
+; LE-I64-NEXT: vmov.32 d12[1], r1
+; LE-I64-NEXT: vmov.32 d14[1], r5
+; LE-I64-NEXT: vst1.64 {d18, d19}, [r0:128]!
+; LE-I64-NEXT: vst1.64 {d12, d13}, [r0:128]!
+; LE-I64-NEXT: vmov.32 d8[1], r7
+; LE-I64-NEXT: vst1.64 {d14, d15}, [r0:128]!
+; LE-I64-NEXT: vst1.64 {d8, d9}, [r0:128]
+; LE-I64-NEXT: vmov.32 d11[1], r11
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #40
+; LE-I64-NEXT: vmov.32 d10[1], r10
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r4:128]!
+; LE-I64-NEXT: vst1.64 {d10, d11}, [r4:128]!
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #56
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r4:128]!
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r4:128]
+; LE-I64-NEXT: add sp, sp, #72
+; LE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: add sp, sp, #4
+; LE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; LE-I32-NEON-LABEL: lrint_v16fp128:
+; LE-I32-NEON: @ %bb.0:
+; LE-I32-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I32-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I32-NEON-NEXT: .pad #4
+; LE-I32-NEON-NEXT: sub sp, sp, #4
+; LE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT: mov r8, r3
+; LE-I32-NEON-NEXT: add r3, sp, #280
+; LE-I32-NEON-NEXT: mov r9, r2
+; LE-I32-NEON-NEXT: mov r10, r1
+; LE-I32-NEON-NEXT: mov r6, r0
+; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: ldr r4, [sp, #216]
+; LE-I32-NEON-NEXT: vmov.32 d8[0], r0
+; LE-I32-NEON-NEXT: ldr r1, [sp, #220]
+; LE-I32-NEON-NEXT: ldr r2, [sp, #224]
+; LE-I32-NEON-NEXT: ldr r3, [sp, #228]
+; LE-I32-NEON-NEXT: mov r0, r4
+; LE-I32-NEON-NEXT: ldr r7, [sp, #152]
+; LE-I32-NEON-NEXT: ldr r11, [sp, #104]
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: add r3, sp, #156
+; LE-I32-NEON-NEXT: vmov.32 d10[0], r0
+; LE-I32-NEON-NEXT: mov r0, r7
+; LE-I32-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: ldr r7, [sp, #184]
+; LE-I32-NEON-NEXT: vmov.32 d12[0], r0
+; LE-I32-NEON-NEXT: ldr r1, [sp, #188]
+; LE-I32-NEON-NEXT: ldr r2, [sp, #192]
+; LE-I32-NEON-NEXT: ldr r3, [sp, #196]
+; LE-I32-NEON-NEXT: mov r0, r7
+; LE-I32-NEON-NEXT: ldr r4, [sp, #120]
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: add r3, sp, #124
+; LE-I32-NEON-NEXT: vmov.32 d13[0], r0
+; LE-I32-NEON-NEXT: mov r0, r4
+; LE-I32-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: ldr r5, [sp, #136]
+; LE-I32-NEON-NEXT: vmov.32 d15[0], r0
+; LE-I32-NEON-NEXT: ldr r1, [sp, #140]
+; LE-I32-NEON-NEXT: ldr r2, [sp, #144]
+; LE-I32-NEON-NEXT: ldr r3, [sp, #148]
+; LE-I32-NEON-NEXT: mov r0, r5
+; LE-I32-NEON-NEXT: ldr r4, [sp, #108]
+; LE-I32-NEON-NEXT: ldr r7, [sp, #112]
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: ldr r3, [sp, #116]
+; LE-I32-NEON-NEXT: vmov.32 d15[1], r0
+; LE-I32-NEON-NEXT: mov r0, r11
+; LE-I32-NEON-NEXT: mov r1, r4
+; LE-I32-NEON-NEXT: mov r2, r7
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: mov r4, r0
+; LE-I32-NEON-NEXT: mov r0, r6
+; LE-I32-NEON-NEXT: mov r1, r10
+; LE-I32-NEON-NEXT: mov r2, r9
+; LE-I32-NEON-NEXT: mov r3, r8
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: ldr r7, [sp, #200]
+; LE-I32-NEON-NEXT: vmov.32 d14[0], r0
+; LE-I32-NEON-NEXT: ldr r1, [sp, #204]
+; LE-I32-NEON-NEXT: ldr r2, [sp, #208]
+; LE-I32-NEON-NEXT: ldr r3, [sp, #212]
+; LE-I32-NEON-NEXT: mov r0, r7
+; LE-I32-NEON-NEXT: ldr r5, [sp, #172]
+; LE-I32-NEON-NEXT: vmov.32 d14[1], r4
+; LE-I32-NEON-NEXT: ldr r6, [sp, #176]
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: vmov.32 d13[1], r0
+; LE-I32-NEON-NEXT: ldr r3, [sp, #180]
+; LE-I32-NEON-NEXT: ldr r0, [sp, #168]
+; LE-I32-NEON-NEXT: mov r1, r5
+; LE-I32-NEON-NEXT: mov r2, r6
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: add r3, sp, #248
+; LE-I32-NEON-NEXT: mov r5, r0
+; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: ldr r4, [sp, #264]
+; LE-I32-NEON-NEXT: vmov.32 d11[0], r0
+; LE-I32-NEON-NEXT: ldr r1, [sp, #268]
+; LE-I32-NEON-NEXT: ldr r2, [sp, #272]
+; LE-I32-NEON-NEXT: vmov.32 d12[1], r5
+; LE-I32-NEON-NEXT: ldr r3, [sp, #276]
+; LE-I32-NEON-NEXT: mov r0, r4
+; LE-I32-NEON-NEXT: ldr r6, [sp, #236]
+; LE-I32-NEON-NEXT: ldr r7, [sp, #240]
+; LE-I32-NEON-NEXT: ldr r8, [sp, #332]
+; LE-I32-NEON-NEXT: ldr r5, [sp, #336]
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: vmov.32 d11[1], r0
+; LE-I32-NEON-NEXT: ldr r3, [sp, #244]
+; LE-I32-NEON-NEXT: ldr r0, [sp, #232]
+; LE-I32-NEON-NEXT: mov r1, r6
+; LE-I32-NEON-NEXT: mov r2, r7
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: vmov.32 d10[1], r0
+; LE-I32-NEON-NEXT: ldr r3, [sp, #340]
+; LE-I32-NEON-NEXT: ldr r0, [sp, #328]
+; LE-I32-NEON-NEXT: mov r1, r8
+; LE-I32-NEON-NEXT: mov r2, r5
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: add r3, sp, #312
+; LE-I32-NEON-NEXT: mov r4, r0
+; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: add r3, sp, #300
+; LE-I32-NEON-NEXT: ldr r7, [sp, #296]
+; LE-I32-NEON-NEXT: vmov.32 d9[0], r0
+; LE-I32-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I32-NEON-NEXT: mov r0, r7
+; LE-I32-NEON-NEXT: vmov.32 d9[1], r4
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: vmov.32 d8[1], r0
+; LE-I32-NEON-NEXT: vorr q0, q7, q7
+; LE-I32-NEON-NEXT: vorr q1, q6, q6
+; LE-I32-NEON-NEXT: vorr q2, q5, q5
+; LE-I32-NEON-NEXT: vorr q3, q4, q4
+; LE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT: add sp, sp, #4
+; LE-I32-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; LE-I64-NEON-LABEL: lrint_v16fp128:
+; LE-I64-NEON: @ %bb.0:
+; LE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEON-NEXT: .pad #4
+; LE-I64-NEON-NEXT: sub sp, sp, #4
+; LE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT: .pad #72
+; LE-I64-NEON-NEXT: sub sp, sp, #72
+; LE-I64-NEON-NEXT: mov r6, r3
+; LE-I64-NEON-NEXT: add r3, sp, #408
+; LE-I64-NEON-NEXT: mov r7, r2
+; LE-I64-NEON-NEXT: mov r4, r0
+; LE-I64-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: add r5, sp, #176
+; LE-I64-NEON-NEXT: mov r10, r1
+; LE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT: mov r0, r7
+; LE-I64-NEON-NEXT: ldm r5, {r2, r3, r5}
+; LE-I64-NEON-NEXT: mov r1, r6
+; LE-I64-NEON-NEXT: ldr r8, [sp, #232]
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: add r3, sp, #188
+; LE-I64-NEON-NEXT: mov r9, r1
+; LE-I64-NEON-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT: mov r0, r5
+; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: add r3, sp, #236
+; LE-I64-NEON-NEXT: mov r11, r1
+; LE-I64-NEON-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEON-NEXT: mov r0, r8
+; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: add r3, sp, #252
+; LE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT: ldr r0, [sp, #248]
+; LE-I64-NEON-NEXT: mov r8, r1
+; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: add r3, sp, #268
+; LE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEON-NEXT: ldr r0, [sp, #264]
+; LE-I64-NEON-NEXT: mov r6, r1
+; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: add r3, sp, #284
+; LE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEON-NEXT: ldr r0, [sp, #280]
+; LE-I64-NEON-NEXT: mov r7, r1
+; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: add r3, sp, #316
+; LE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEON-NEXT: ldr r0, [sp, #312]
+; LE-I64-NEON-NEXT: mov r5, r1
+; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: vmov.32 d15[1], r5
+; LE-I64-NEON-NEXT: add lr, sp, #56
+; LE-I64-NEON-NEXT: ldr r5, [sp, #300]
+; LE-I64-NEON-NEXT: vmov.32 d14[1], r7
+; LE-I64-NEON-NEXT: ldr r2, [sp, #304]
+; LE-I64-NEON-NEXT: ldr r3, [sp, #308]
+; LE-I64-NEON-NEXT: vmov.32 d11[1], r6
+; LE-I64-NEON-NEXT: ldr r6, [sp, #200]
+; LE-I64-NEON-NEXT: ldr r7, [sp, #204]
+; LE-I64-NEON-NEXT: vmov.32 d10[1], r8
+; LE-I64-NEON-NEXT: ldr r8, [sp, #344]
+; LE-I64-NEON-NEXT: vmov.32 d9[1], r11
+; LE-I64-NEON-NEXT: ldr r11, [sp, #216]
+; LE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #40
+; LE-I64-NEON-NEXT: vmov.32 d17[0], r0
+; LE-I64-NEON-NEXT: ldr r0, [sp, #296]
+; LE-I64-NEON-NEXT: vmov.32 d8[1], r9
+; LE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #24
+; LE-I64-NEON-NEXT: vorr q5, q8, q8
+; LE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEON-NEXT: vorr q4, q6, q6
+; LE-I64-NEON-NEXT: vmov.32 d11[1], r1
+; LE-I64-NEON-NEXT: mov r1, r5
+; LE-I64-NEON-NEXT: vmov.32 d9[1], r10
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT: ldr r2, [sp, #208]
+; LE-I64-NEON-NEXT: ldr r3, [sp, #212]
+; LE-I64-NEON-NEXT: add lr, sp, #8
+; LE-I64-NEON-NEXT: mov r9, r1
+; LE-I64-NEON-NEXT: mov r0, r6
+; LE-I64-NEON-NEXT: mov r1, r7
+; LE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: add r3, sp, #220
+; LE-I64-NEON-NEXT: mov r10, r1
+; LE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT: mov r0, r11
+; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: add r3, sp, #348
+; LE-I64-NEON-NEXT: mov r11, r1
+; LE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEON-NEXT: mov r0, r8
+; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: add r3, sp, #364
+; LE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT: ldr r0, [sp, #360]
+; LE-I64-NEON-NEXT: mov r8, r1
+; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: add r3, sp, #380
+; LE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEON-NEXT: ldr r0, [sp, #376]
+; LE-I64-NEON-NEXT: mov r5, r1
+; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: add r3, sp, #396
+; LE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEON-NEXT: ldr r0, [sp, #392]
+; LE-I64-NEON-NEXT: mov r6, r1
+; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: add r3, sp, #332
+; LE-I64-NEON-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT: ldr r0, [sp, #328]
+; LE-I64-NEON-NEXT: mov r7, r1
+; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: add lr, sp, #8
+; LE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT: add r0, r4, #64
+; LE-I64-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #24
+; LE-I64-NEON-NEXT: vmov.32 d13[1], r8
+; LE-I64-NEON-NEXT: vmov.32 d18[1], r9
+; LE-I64-NEON-NEXT: vmov.32 d15[1], r6
+; LE-I64-NEON-NEXT: vmov.32 d12[1], r1
+; LE-I64-NEON-NEXT: vmov.32 d14[1], r5
+; LE-I64-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]!
+; LE-I64-NEON-NEXT: vst1.64 {d12, d13}, [r0:128]!
+; LE-I64-NEON-NEXT: vmov.32 d8[1], r7
+; LE-I64-NEON-NEXT: vst1.64 {d14, d15}, [r0:128]!
+; LE-I64-NEON-NEXT: vst1.64 {d8, d9}, [r0:128]
+; LE-I64-NEON-NEXT: vmov.32 d11[1], r11
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #40
+; LE-I64-NEON-NEXT: vmov.32 d10[1], r10
+; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r4:128]!
+; LE-I64-NEON-NEXT: vst1.64 {d10, d11}, [r4:128]!
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #56
+; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r4:128]!
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r4:128]
+; LE-I64-NEON-NEXT: add sp, sp, #72
+; LE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT: add sp, sp, #4
+; LE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I32-LABEL: lrint_v16fp128:
+; BE-I32: @ %bb.0:
+; BE-I32-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I32-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I32-NEXT: .pad #4
+; BE-I32-NEXT: sub sp, sp, #4
+; BE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT: .pad #16
+; BE-I32-NEXT: sub sp, sp, #16
+; BE-I32-NEXT: stm sp, {r0, r1, r2, r3} @ 16-byte Folded Spill
+; BE-I32-NEXT: add r3, sp, #264
+; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: add r3, sp, #332
+; BE-I32-NEXT: ldr r7, [sp, #328]
+; BE-I32-NEXT: vmov.32 d9[0], r0
+; BE-I32-NEXT: ldr r10, [sp, #280]
+; BE-I32-NEXT: ldm r3, {r1, r2, r3}
+; BE-I32-NEXT: mov r0, r7
+; BE-I32-NEXT: ldr r8, [sp, #168]
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: ldr r5, [sp, #344]
+; BE-I32-NEXT: vmov.32 d11[0], r0
+; BE-I32-NEXT: ldr r1, [sp, #348]
+; BE-I32-NEXT: ldr r2, [sp, #352]
+; BE-I32-NEXT: ldr r3, [sp, #356]
+; BE-I32-NEXT: mov r0, r5
+; BE-I32-NEXT: ldr r7, [sp, #284]
+; BE-I32-NEXT: ldr r4, [sp, #288]
+; BE-I32-NEXT: ldr r6, [sp, #172]
+; BE-I32-NEXT: ldr r9, [sp, #176]
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: ldr r3, [sp, #292]
+; BE-I32-NEXT: vmov.32 d11[1], r0
+; BE-I32-NEXT: mov r0, r10
+; BE-I32-NEXT: mov r1, r7
+; BE-I32-NEXT: mov r2, r4
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: ldr r3, [sp, #180]
+; BE-I32-NEXT: vmov.32 d9[1], r0
+; BE-I32-NEXT: mov r0, r8
+; BE-I32-NEXT: mov r1, r6
+; BE-I32-NEXT: mov r2, r9
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: add r3, sp, #232
+; BE-I32-NEXT: mov r4, r0
+; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: add r3, sp, #136
+; BE-I32-NEXT: mov r6, r0
+; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: ldr r5, [sp, #296]
+; BE-I32-NEXT: vmov.32 d13[0], r0
+; BE-I32-NEXT: ldr r1, [sp, #300]
+; BE-I32-NEXT: ldr r2, [sp, #304]
+; BE-I32-NEXT: ldr r3, [sp, #308]
+; BE-I32-NEXT: mov r0, r5
+; BE-I32-NEXT: ldr r10, [sp, #216]
+; BE-I32-NEXT: ldr r8, [sp, #220]
+; BE-I32-NEXT: ldr r9, [sp, #152]
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: ldr r7, [sp, #248]
+; BE-I32-NEXT: vmov.32 d10[0], r0
+; BE-I32-NEXT: ldr r1, [sp, #252]
+; BE-I32-NEXT: ldr r2, [sp, #256]
+; BE-I32-NEXT: vmov.32 d8[0], r6
+; BE-I32-NEXT: ldr r3, [sp, #260]
+; BE-I32-NEXT: mov r0, r7
+; BE-I32-NEXT: ldr r5, [sp, #224]
+; BE-I32-NEXT: ldr r11, [sp, #120]
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: ldr r3, [sp, #228]
+; BE-I32-NEXT: vmov.32 d8[1], r0
+; BE-I32-NEXT: mov r0, r10
+; BE-I32-NEXT: mov r1, r8
+; BE-I32-NEXT: mov r2, r5
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: add r3, sp, #200
+; BE-I32-NEXT: mov r5, r0
+; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: vmov.32 d15[0], r0
+; BE-I32-NEXT: ldr r0, [sp, #184]
+; BE-I32-NEXT: ldr r1, [sp, #188]
+; BE-I32-NEXT: ldr r2, [sp, #192]
+; BE-I32-NEXT: vmov.32 d14[0], r4
+; BE-I32-NEXT: ldr r3, [sp, #196]
+; BE-I32-NEXT: vmov.32 d15[1], r5
+; BE-I32-NEXT: ldr r7, [sp, #156]
+; BE-I32-NEXT: ldr r6, [sp, #160]
+; BE-I32-NEXT: ldr r4, [sp, #124]
+; BE-I32-NEXT: ldr r5, [sp, #128]
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: ldr r3, [sp, #164]
+; BE-I32-NEXT: vmov.32 d14[1], r0
+; BE-I32-NEXT: mov r0, r9
+; BE-I32-NEXT: mov r1, r7
+; BE-I32-NEXT: mov r2, r6
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: ldr r3, [sp, #132]
+; BE-I32-NEXT: vmov.32 d13[1], r0
+; BE-I32-NEXT: mov r0, r11
+; BE-I32-NEXT: mov r1, r4
+; BE-I32-NEXT: mov r2, r5
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: mov r4, r0
+; BE-I32-NEXT: ldm sp, {r0, r1, r2, r3} @ 16-byte Folded Reload
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: add r3, sp, #316
+; BE-I32-NEXT: ldr r7, [sp, #312]
+; BE-I32-NEXT: vmov.32 d12[0], r0
+; BE-I32-NEXT: ldm r3, {r1, r2, r3}
+; BE-I32-NEXT: mov r0, r7
+; BE-I32-NEXT: vmov.32 d12[1], r4
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: vmov.32 d10[1], r0
+; BE-I32-NEXT: vrev64.32 q0, q6
+; BE-I32-NEXT: vrev64.32 q1, q7
+; BE-I32-NEXT: vrev64.32 q2, q4
+; BE-I32-NEXT: vrev64.32 q3, q5
+; BE-I32-NEXT: add sp, sp, #16
+; BE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT: add sp, sp, #4
+; BE-I32-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I64-LABEL: lrint_v16fp128:
+; BE-I64: @ %bb.0:
+; BE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT: .pad #4
+; BE-I64-NEXT: sub sp, sp, #4
+; BE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT: .pad #56
+; BE-I64-NEXT: sub sp, sp, #56
+; BE-I64-NEXT: mov r5, r3
+; BE-I64-NEXT: add r3, sp, #376
+; BE-I64-NEXT: mov r6, r2
+; BE-I64-NEXT: mov r4, r0
+; BE-I64-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: ldr r7, [sp, #392]
+; BE-I64-NEXT: add r3, sp, #396
+; BE-I64-NEXT: mov r9, r1
+; BE-I64-NEXT: vmov.32 d8[0], r0
+; BE-I64-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEXT: mov r0, r7
+; BE-I64-NEXT: ldr r11, [sp, #168]
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: ldr r2, [sp, #160]
+; BE-I64-NEXT: mov r10, r1
+; BE-I64-NEXT: ldr r3, [sp, #164]
+; BE-I64-NEXT: vmov.32 d9[0], r0
+; BE-I64-NEXT: mov r0, r6
+; BE-I64-NEXT: mov r1, r5
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: add r3, sp, #172
+; BE-I64-NEXT: mov r8, r1
+; BE-I64-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEXT: mov r0, r11
+; BE-I64-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: add r3, sp, #220
+; BE-I64-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEXT: ldr r0, [sp, #216]
+; BE-I64-NEXT: mov r11, r1
+; BE-I64-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: add r3, sp, #236
+; BE-I64-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEXT: ldr r0, [sp, #232]
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: add r3, sp, #252
+; BE-I64-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEXT: ldr r0, [sp, #248]
+; BE-I64-NEXT: mov r7, r1
+; BE-I64-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: add r3, sp, #268
+; BE-I64-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEXT: ldr r0, [sp, #264]
+; BE-I64-NEXT: mov r5, r1
+; BE-I64-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEXT: ldr r0, [sp, #280]
+; BE-I64-NEXT: ldr r2, [sp, #288]
+; BE-I64-NEXT: vmov.32 d13[1], r7
+; BE-I64-NEXT: ldr r7, [sp, #284]
+; BE-I64-NEXT: ldr r3, [sp, #292]
+; BE-I64-NEXT: vmov.32 d14[1], r5
+; BE-I64-NEXT: ldr r5, [sp, #328]
+; BE-I64-NEXT: vmov.32 d12[1], r6
+; BE-I64-NEXT: ldr r6, [sp, #300]
+; BE-I64-NEXT: vmov.32 d10[1], r8
+; BE-I64-NEXT: ldr r8, [sp, #184]
+; BE-I64-NEXT: vmov.32 d11[1], r11
+; BE-I64-NEXT: vmov.32 d9[1], r10
+; BE-I64-NEXT: vmov.32 d8[1], r9
+; BE-I64-NEXT: vmov.32 d15[1], r1
+; BE-I64-NEXT: mov r1, r7
+; BE-I64-NEXT: vstr d14, [sp, #48] @ 8-byte Spill
+; BE-I64-NEXT: vstr d13, [sp, #40] @ 8-byte Spill
+; BE-I64-NEXT: vstr d12, [sp, #32] @ 8-byte Spill
+; BE-I64-NEXT: vstr d11, [sp, #24] @ 8-byte Spill
+; BE-I64-NEXT: vstr d10, [sp, #16] @ 8-byte Spill
+; BE-I64-NEXT: vstr d9, [sp, #8] @ 8-byte Spill
+; BE-I64-NEXT: vstr d8, [sp] @ 8-byte Spill
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: mov r10, r1
+; BE-I64-NEXT: ldr r1, [sp, #296]
+; BE-I64-NEXT: ldr r2, [sp, #304]
+; BE-I64-NEXT: vmov.32 d8[0], r0
+; BE-I64-NEXT: ldr r3, [sp, #308]
+; BE-I64-NEXT: mov r0, r1
+; BE-I64-NEXT: mov r1, r6
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: add r3, sp, #332
+; BE-I64-NEXT: mov r11, r1
+; BE-I64-NEXT: vmov.32 d9[0], r0
+; BE-I64-NEXT: mov r0, r5
+; BE-I64-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: add r3, sp, #188
+; BE-I64-NEXT: mov r7, r1
+; BE-I64-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEXT: mov r0, r8
+; BE-I64-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: add r3, sp, #204
+; BE-I64-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEXT: ldr r0, [sp, #200]
+; BE-I64-NEXT: mov r8, r1
+; BE-I64-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: add r3, sp, #348
+; BE-I64-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEXT: ldr r0, [sp, #344]
+; BE-I64-NEXT: mov r5, r1
+; BE-I64-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: add r3, sp, #364
+; BE-I64-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEXT: ldr r0, [sp, #360]
+; BE-I64-NEXT: mov r9, r1
+; BE-I64-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: add r3, sp, #316
+; BE-I64-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEXT: ldr r0, [sp, #312]
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: vldr d18, [sp, #48] @ 8-byte Reload
+; BE-I64-NEXT: vrev64.32 d17, d15
+; BE-I64-NEXT: vrev64.32 d16, d18
+; BE-I64-NEXT: vldr d18, [sp, #40] @ 8-byte Reload
+; BE-I64-NEXT: vmov.32 d24[0], r0
+; BE-I64-NEXT: add r0, r4, #64
+; BE-I64-NEXT: vldr d20, [sp, #32] @ 8-byte Reload
+; BE-I64-NEXT: vrev64.32 d19, d18
+; BE-I64-NEXT: vmov.32 d9[1], r11
+; BE-I64-NEXT: vmov.32 d10[1], r7
+; BE-I64-NEXT: vrev64.32 d18, d20
+; BE-I64-NEXT: vldr d20, [sp, #24] @ 8-byte Reload
+; BE-I64-NEXT: vmov.32 d8[1], r10
+; BE-I64-NEXT: vmov.32 d14[1], r6
+; BE-I64-NEXT: vmov.32 d24[1], r1
+; BE-I64-NEXT: vldr d22, [sp, #16] @ 8-byte Reload
+; BE-I64-NEXT: vrev64.32 d21, d20
+; BE-I64-NEXT: vrev64.32 d1, d9
+; BE-I64-NEXT: vmov.32 d13[1], r9
+; BE-I64-NEXT: vrev64.32 d31, d10
+; BE-I64-NEXT: vrev64.32 d20, d22
+; BE-I64-NEXT: vldr d22, [sp, #8] @ 8-byte Reload
+; BE-I64-NEXT: vrev64.32 d0, d8
+; BE-I64-NEXT: vrev64.32 d29, d14
+; BE-I64-NEXT: vmov.32 d12[1], r5
+; BE-I64-NEXT: vrev64.32 d30, d24
+; BE-I64-NEXT: vrev64.32 d27, d22
+; BE-I64-NEXT: vldr d22, [sp] @ 8-byte Reload
+; BE-I64-NEXT: vst1.64 {d0, d1}, [r0:128]!
+; BE-I64-NEXT: vmov.32 d11[1], r8
+; BE-I64-NEXT: vrev64.32 d28, d13
+; BE-I64-NEXT: vst1.64 {d30, d31}, [r0:128]!
+; BE-I64-NEXT: vrev64.32 d26, d22
+; BE-I64-NEXT: vrev64.32 d23, d12
+; BE-I64-NEXT: vst1.64 {d28, d29}, [r0:128]!
+; BE-I64-NEXT: vrev64.32 d22, d11
+; BE-I64-NEXT: vst1.64 {d26, d27}, [r0:128]
+; BE-I64-NEXT: vst1.64 {d20, d21}, [r4:128]!
+; BE-I64-NEXT: vst1.64 {d22, d23}, [r4:128]!
+; BE-I64-NEXT: vst1.64 {d18, d19}, [r4:128]!
+; BE-I64-NEXT: vst1.64 {d16, d17}, [r4:128]
+; BE-I64-NEXT: add sp, sp, #56
+; BE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT: add sp, sp, #4
+; BE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I32-NEON-LABEL: lrint_v16fp128:
+; BE-I32-NEON: @ %bb.0:
+; BE-I32-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I32-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I32-NEON-NEXT: .pad #4
+; BE-I32-NEON-NEXT: sub sp, sp, #4
+; BE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT: .pad #16
+; BE-I32-NEON-NEXT: sub sp, sp, #16
+; BE-I32-NEON-NEXT: stm sp, {r0, r1, r2, r3} @ 16-byte Folded Spill
+; BE-I32-NEON-NEXT: add r3, sp, #264
+; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: add r3, sp, #332
+; BE-I32-NEON-NEXT: ldr r7, [sp, #328]
+; BE-I32-NEON-NEXT: vmov.32 d9[0], r0
+; BE-I32-NEON-NEXT: ldr r10, [sp, #280]
+; BE-I32-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I32-NEON-NEXT: mov r0, r7
+; BE-I32-NEON-NEXT: ldr r8, [sp, #168]
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: ldr r5, [sp, #344]
+; BE-I32-NEON-NEXT: vmov.32 d11[0], r0
+; BE-I32-NEON-NEXT: ldr r1, [sp, #348]
+; BE-I32-NEON-NEXT: ldr r2, [sp, #352]
+; BE-I32-NEON-NEXT: ldr r3, [sp, #356]
+; BE-I32-NEON-NEXT: mov r0, r5
+; BE-I32-NEON-NEXT: ldr r7, [sp, #284]
+; BE-I32-NEON-NEXT: ldr r4, [sp, #288]
+; BE-I32-NEON-NEXT: ldr r6, [sp, #172]
+; BE-I32-NEON-NEXT: ldr r9, [sp, #176]
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: ldr r3, [sp, #292]
+; BE-I32-NEON-NEXT: vmov.32 d11[1], r0
+; BE-I32-NEON-NEXT: mov r0, r10
+; BE-I32-NEON-NEXT: mov r1, r7
+; BE-I32-NEON-NEXT: mov r2, r4
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: ldr r3, [sp, #180]
+; BE-I32-NEON-NEXT: vmov.32 d9[1], r0
+; BE-I32-NEON-NEXT: mov r0, r8
+; BE-I32-NEON-NEXT: mov r1, r6
+; BE-I32-NEON-NEXT: mov r2, r9
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: add r3, sp, #232
+; BE-I32-NEON-NEXT: mov r4, r0
+; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: add r3, sp, #136
+; BE-I32-NEON-NEXT: mov r6, r0
+; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: ldr r5, [sp, #296]
+; BE-I32-NEON-NEXT: vmov.32 d13[0], r0
+; BE-I32-NEON-NEXT: ldr r1, [sp, #300]
+; BE-I32-NEON-NEXT: ldr r2, [sp, #304]
+; BE-I32-NEON-NEXT: ldr r3, [sp, #308]
+; BE-I32-NEON-NEXT: mov r0, r5
+; BE-I32-NEON-NEXT: ldr r10, [sp, #216]
+; BE-I32-NEON-NEXT: ldr r8, [sp, #220]
+; BE-I32-NEON-NEXT: ldr r9, [sp, #152]
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: ldr r7, [sp, #248]
+; BE-I32-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I32-NEON-NEXT: ldr r1, [sp, #252]
+; BE-I32-NEON-NEXT: ldr r2, [sp, #256]
+; BE-I32-NEON-NEXT: vmov.32 d8[0], r6
+; BE-I32-NEON-NEXT: ldr r3, [sp, #260]
+; BE-I32-NEON-NEXT: mov r0, r7
+; BE-I32-NEON-NEXT: ldr r5, [sp, #224]
+; BE-I32-NEON-NEXT: ldr r11, [sp, #120]
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: ldr r3, [sp, #228]
+; BE-I32-NEON-NEXT: vmov.32 d8[1], r0
+; BE-I32-NEON-NEXT: mov r0, r10
+; BE-I32-NEON-NEXT: mov r1, r8
+; BE-I32-NEON-NEXT: mov r2, r5
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: add r3, sp, #200
+; BE-I32-NEON-NEXT: mov r5, r0
+; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: vmov.32 d15[0], r0
+; BE-I32-NEON-NEXT: ldr r0, [sp, #184]
+; BE-I32-NEON-NEXT: ldr r1, [sp, #188]
+; BE-I32-NEON-NEXT: ldr r2, [sp, #192]
+; BE-I32-NEON-NEXT: vmov.32 d14[0], r4
+; BE-I32-NEON-NEXT: ldr r3, [sp, #196]
+; BE-I32-NEON-NEXT: vmov.32 d15[1], r5
+; BE-I32-NEON-NEXT: ldr r7, [sp, #156]
+; BE-I32-NEON-NEXT: ldr r6, [sp, #160]
+; BE-I32-NEON-NEXT: ldr r4, [sp, #124]
+; BE-I32-NEON-NEXT: ldr r5, [sp, #128]
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: ldr r3, [sp, #164]
+; BE-I32-NEON-NEXT: vmov.32 d14[1], r0
+; BE-I32-NEON-NEXT: mov r0, r9
+; BE-I32-NEON-NEXT: mov r1, r7
+; BE-I32-NEON-NEXT: mov r2, r6
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: ldr r3, [sp, #132]
+; BE-I32-NEON-NEXT: vmov.32 d13[1], r0
+; BE-I32-NEON-NEXT: mov r0, r11
+; BE-I32-NEON-NEXT: mov r1, r4
+; BE-I32-NEON-NEXT: mov r2, r5
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: mov r4, r0
+; BE-I32-NEON-NEXT: ldm sp, {r0, r1, r2, r3} @ 16-byte Folded Reload
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: add r3, sp, #316
+; BE-I32-NEON-NEXT: ldr r7, [sp, #312]
+; BE-I32-NEON-NEXT: vmov.32 d12[0], r0
+; BE-I32-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I32-NEON-NEXT: mov r0, r7
+; BE-I32-NEON-NEXT: vmov.32 d12[1], r4
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: vmov.32 d10[1], r0
+; BE-I32-NEON-NEXT: vrev64.32 q0, q6
+; BE-I32-NEON-NEXT: vrev64.32 q1, q7
+; BE-I32-NEON-NEXT: vrev64.32 q2, q4
+; BE-I32-NEON-NEXT: vrev64.32 q3, q5
+; BE-I32-NEON-NEXT: add sp, sp, #16
+; BE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT: add sp, sp, #4
+; BE-I32-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I64-NEON-LABEL: lrint_v16fp128:
+; BE-I64-NEON: @ %bb.0:
+; BE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEON-NEXT: .pad #4
+; BE-I64-NEON-NEXT: sub sp, sp, #4
+; BE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT: .pad #56
+; BE-I64-NEON-NEXT: sub sp, sp, #56
+; BE-I64-NEON-NEXT: mov r5, r3
+; BE-I64-NEON-NEXT: add r3, sp, #376
+; BE-I64-NEON-NEXT: mov r6, r2
+; BE-I64-NEON-NEXT: mov r4, r0
+; BE-I64-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: ldr r7, [sp, #392]
+; BE-I64-NEON-NEXT: add r3, sp, #396
+; BE-I64-NEON-NEXT: mov r9, r1
+; BE-I64-NEON-NEXT: vmov.32 d8[0], r0
+; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT: mov r0, r7
+; BE-I64-NEON-NEXT: ldr r11, [sp, #168]
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: ldr r2, [sp, #160]
+; BE-I64-NEON-NEXT: mov r10, r1
+; BE-I64-NEON-NEXT: ldr r3, [sp, #164]
+; BE-I64-NEON-NEXT: vmov.32 d9[0], r0
+; BE-I64-NEON-NEXT: mov r0, r6
+; BE-I64-NEON-NEXT: mov r1, r5
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: add r3, sp, #172
+; BE-I64-NEON-NEXT: mov r8, r1
+; BE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT: mov r0, r11
+; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: add r3, sp, #220
+; BE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT: ldr r0, [sp, #216]
+; BE-I64-NEON-NEXT: mov r11, r1
+; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: add r3, sp, #236
+; BE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEON-NEXT: ldr r0, [sp, #232]
+; BE-I64-NEON-NEXT: mov r6, r1
+; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: add r3, sp, #252
+; BE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEON-NEXT: ldr r0, [sp, #248]
+; BE-I64-NEON-NEXT: mov r7, r1
+; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: add r3, sp, #268
+; BE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT: ldr r0, [sp, #264]
+; BE-I64-NEON-NEXT: mov r5, r1
+; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT: ldr r0, [sp, #280]
+; BE-I64-NEON-NEXT: ldr r2, [sp, #288]
+; BE-I64-NEON-NEXT: vmov.32 d13[1], r7
+; BE-I64-NEON-NEXT: ldr r7, [sp, #284]
+; BE-I64-NEON-NEXT: ldr r3, [sp, #292]
+; BE-I64-NEON-NEXT: vmov.32 d14[1], r5
+; BE-I64-NEON-NEXT: ldr r5, [sp, #328]
+; BE-I64-NEON-NEXT: vmov.32 d12[1], r6
+; BE-I64-NEON-NEXT: ldr r6, [sp, #300]
+; BE-I64-NEON-NEXT: vmov.32 d10[1], r8
+; BE-I64-NEON-NEXT: ldr r8, [sp, #184]
+; BE-I64-NEON-NEXT: vmov.32 d11[1], r11
+; BE-I64-NEON-NEXT: vmov.32 d9[1], r10
+; BE-I64-NEON-NEXT: vmov.32 d8[1], r9
+; BE-I64-NEON-NEXT: vmov.32 d15[1], r1
+; BE-I64-NEON-NEXT: mov r1, r7
+; BE-I64-NEON-NEXT: vstr d14, [sp, #48] @ 8-byte Spill
+; BE-I64-NEON-NEXT: vstr d13, [sp, #40] @ 8-byte Spill
+; BE-I64-NEON-NEXT: vstr d12, [sp, #32] @ 8-byte Spill
+; BE-I64-NEON-NEXT: vstr d11, [sp, #24] @ 8-byte Spill
+; BE-I64-NEON-NEXT: vstr d10, [sp, #16] @ 8-byte Spill
+; BE-I64-NEON-NEXT: vstr d9, [sp, #8] @ 8-byte Spill
+; BE-I64-NEON-NEXT: vstr d8, [sp] @ 8-byte Spill
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: mov r10, r1
+; BE-I64-NEON-NEXT: ldr r1, [sp, #296]
+; BE-I64-NEON-NEXT: ldr r2, [sp, #304]
+; BE-I64-NEON-NEXT: vmov.32 d8[0], r0
+; BE-I64-NEON-NEXT: ldr r3, [sp, #308]
+; BE-I64-NEON-NEXT: mov r0, r1
+; BE-I64-NEON-NEXT: mov r1, r6
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: add r3, sp, #332
+; BE-I64-NEON-NEXT: mov r11, r1
+; BE-I64-NEON-NEXT: vmov.32 d9[0], r0
+; BE-I64-NEON-NEXT: mov r0, r5
+; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: add r3, sp, #188
+; BE-I64-NEON-NEXT: mov r7, r1
+; BE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT: mov r0, r8
+; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: add r3, sp, #204
+; BE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT: ldr r0, [sp, #200]
+; BE-I64-NEON-NEXT: mov r8, r1
+; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: add r3, sp, #348
+; BE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEON-NEXT: ldr r0, [sp, #344]
+; BE-I64-NEON-NEXT: mov r5, r1
+; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: add r3, sp, #364
+; BE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEON-NEXT: ldr r0, [sp, #360]
+; BE-I64-NEON-NEXT: mov r9, r1
+; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: add r3, sp, #316
+; BE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT: ldr r0, [sp, #312]
+; BE-I64-NEON-NEXT: mov r6, r1
+; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: vldr d18, [sp, #48] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vrev64.32 d17, d15
+; BE-I64-NEON-NEXT: vrev64.32 d16, d18
+; BE-I64-NEON-NEXT: vldr d18, [sp, #40] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vmov.32 d24[0], r0
+; BE-I64-NEON-NEXT: add r0, r4, #64
+; BE-I64-NEON-NEXT: vldr d20, [sp, #32] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vrev64.32 d19, d18
+; BE-I64-NEON-NEXT: vmov.32 d9[1], r11
+; BE-I64-NEON-NEXT: vmov.32 d10[1], r7
+; BE-I64-NEON-NEXT: vrev64.32 d18, d20
+; BE-I64-NEON-NEXT: vldr d20, [sp, #24] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vmov.32 d8[1], r10
+; BE-I64-NEON-NEXT: vmov.32 d14[1], r6
+; BE-I64-NEON-NEXT: vmov.32 d24[1], r1
+; BE-I64-NEON-NEXT: vldr d22, [sp, #16] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vrev64.32 d21, d20
+; BE-I64-NEON-NEXT: vrev64.32 d1, d9
+; BE-I64-NEON-NEXT: vmov.32 d13[1], r9
+; BE-I64-NEON-NEXT: vrev64.32 d31, d10
+; BE-I64-NEON-NEXT: vrev64.32 d20, d22
+; BE-I64-NEON-NEXT: vldr d22, [sp, #8] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vrev64.32 d0, d8
+; BE-I64-NEON-NEXT: vrev64.32 d29, d14
+; BE-I64-NEON-NEXT: vmov.32 d12[1], r5
+; BE-I64-NEON-NEXT: vrev64.32 d30, d24
+; BE-I64-NEON-NEXT: vrev64.32 d27, d22
+; BE-I64-NEON-NEXT: vldr d22, [sp] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vst1.64 {d0, d1}, [r0:128]!
+; BE-I64-NEON-NEXT: vmov.32 d11[1], r8
+; BE-I64-NEON-NEXT: vrev64.32 d28, d13
+; BE-I64-NEON-NEXT: vst1.64 {d30, d31}, [r0:128]!
+; BE-I64-NEON-NEXT: vrev64.32 d26, d22
+; BE-I64-NEON-NEXT: vrev64.32 d23, d12
+; BE-I64-NEON-NEXT: vst1.64 {d28, d29}, [r0:128]!
+; BE-I64-NEON-NEXT: vrev64.32 d22, d11
+; BE-I64-NEON-NEXT: vst1.64 {d26, d27}, [r0:128]
+; BE-I64-NEON-NEXT: vst1.64 {d20, d21}, [r4:128]!
+; BE-I64-NEON-NEXT: vst1.64 {d22, d23}, [r4:128]!
+; BE-I64-NEON-NEXT: vst1.64 {d18, d19}, [r4:128]!
+; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r4:128]
+; BE-I64-NEON-NEXT: add sp, sp, #56
+; BE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT: add sp, sp, #4
+; BE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+ %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16fp128(<16 x fp128> %x)
+ ret <16 x iXLen> %a
+}
+declare <16 x iXLen> @llvm.lrint.v16iXLen.v16fp128(<16 x fp128>)
+
+define <32 x iXLen> @lrint_v32fp128(<32 x fp128> %x) {
+; LE-I32-LABEL: lrint_v32fp128:
+; LE-I32: @ %bb.0:
+; LE-I32-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I32-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I32-NEXT: .pad #4
+; LE-I32-NEXT: sub sp, sp, #4
+; LE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT: .pad #80
+; LE-I32-NEXT: sub sp, sp, #80
+; LE-I32-NEXT: str r3, [sp, #16] @ 4-byte Spill
+; LE-I32-NEXT: add r3, sp, #336
+; LE-I32-NEXT: str r2, [sp, #12] @ 4-byte Spill
+; LE-I32-NEXT: mov r9, r0
+; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: add r3, sp, #244
+; LE-I32-NEXT: ldr r7, [sp, #240]
+; LE-I32-NEXT: vmov.32 d9[0], r0
+; LE-I32-NEXT: ldr r5, [sp, #288]
+; LE-I32-NEXT: ldm r3, {r1, r2, r3}
+; LE-I32-NEXT: mov r0, r7
+; LE-I32-NEXT: ldr r8, [sp, #352]
+; LE-I32-NEXT: ldr r11, [sp, #656]
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: add r3, sp, #292
+; LE-I32-NEXT: vmov.32 d10[0], r0
+; LE-I32-NEXT: mov r0, r5
+; LE-I32-NEXT: ldm r3, {r1, r2, r3}
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: add r3, sp, #272
+; LE-I32-NEXT: mov r10, r0
+; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: ldr r6, [sp, #256]
+; LE-I32-NEXT: vmov.32 d11[0], r0
+; LE-I32-NEXT: ldr r1, [sp, #260]
+; LE-I32-NEXT: ldr r2, [sp, #264]
+; LE-I32-NEXT: ldr r3, [sp, #268]
+; LE-I32-NEXT: mov r0, r6
+; LE-I32-NEXT: ldr r7, [sp, #660]
+; LE-I32-NEXT: vmov.32 d11[1], r10
+; LE-I32-NEXT: ldr r5, [sp, #664]
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: vmov.32 d10[1], r0
+; LE-I32-NEXT: ldr r1, [sp, #356]
+; LE-I32-NEXT: ldr r2, [sp, #360]
+; LE-I32-NEXT: add lr, sp, #32
+; LE-I32-NEXT: ldr r3, [sp, #364]
+; LE-I32-NEXT: mov r0, r8
+; LE-I32-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: ldr r3, [sp, #668]
+; LE-I32-NEXT: add lr, sp, #48
+; LE-I32-NEXT: vmov.32 d9[1], r0
+; LE-I32-NEXT: mov r0, r11
+; LE-I32-NEXT: mov r1, r7
+; LE-I32-NEXT: mov r2, r5
+; LE-I32-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: add r3, sp, #400
+; LE-I32-NEXT: mov r8, r0
+; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: add r3, sp, #592
+; LE-I32-NEXT: mov r6, r0
+; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: ldr r4, [sp, #416]
+; LE-I32-NEXT: vmov.32 d15[0], r0
+; LE-I32-NEXT: ldr r1, [sp, #420]
+; LE-I32-NEXT: ldr r2, [sp, #424]
+; LE-I32-NEXT: vmov.32 d13[0], r6
+; LE-I32-NEXT: ldr r3, [sp, #428]
+; LE-I32-NEXT: mov r0, r4
+; LE-I32-NEXT: ldr r7, [sp, #224]
+; LE-I32-NEXT: ldr r10, [sp, #228]
+; LE-I32-NEXT: ldr r5, [sp, #232]
+; LE-I32-NEXT: ldr r11, [sp, #464]
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: ldr r3, [sp, #236]
+; LE-I32-NEXT: vmov.32 d13[1], r0
+; LE-I32-NEXT: mov r0, r7
+; LE-I32-NEXT: mov r1, r10
+; LE-I32-NEXT: mov r2, r5
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: add r3, sp, #208
+; LE-I32-NEXT: mov r4, r0
+; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: vmov.32 d9[0], r0
+; LE-I32-NEXT: ldr r0, [sp, #672]
+; LE-I32-NEXT: ldr r1, [sp, #676]
+; LE-I32-NEXT: ldr r2, [sp, #680]
+; LE-I32-NEXT: vmov.32 d11[0], r8
+; LE-I32-NEXT: ldr r3, [sp, #684]
+; LE-I32-NEXT: vmov.32 d9[1], r4
+; LE-I32-NEXT: ldr r7, [sp, #612]
+; LE-I32-NEXT: ldr r6, [sp, #616]
+; LE-I32-NEXT: ldr r5, [sp, #468]
+; LE-I32-NEXT: ldr r4, [sp, #472]
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: vmov.32 d11[1], r0
+; LE-I32-NEXT: ldr r3, [sp, #620]
+; LE-I32-NEXT: ldr r0, [sp, #608]
+; LE-I32-NEXT: mov r1, r7
+; LE-I32-NEXT: mov r2, r6
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: ldr r3, [sp, #476]
+; LE-I32-NEXT: vmov.32 d15[1], r0
+; LE-I32-NEXT: mov r0, r11
+; LE-I32-NEXT: mov r1, r5
+; LE-I32-NEXT: mov r2, r4
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: add r3, sp, #560
+; LE-I32-NEXT: str r0, [sp, #8] @ 4-byte Spill
+; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: add r3, sp, #644
+; LE-I32-NEXT: ldr r7, [sp, #640]
+; LE-I32-NEXT: vmov.32 d14[0], r0
+; LE-I32-NEXT: add lr, sp, #64
+; LE-I32-NEXT: ldm r3, {r1, r2, r3}
+; LE-I32-NEXT: mov r0, r7
+; LE-I32-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: add r3, sp, #624
+; LE-I32-NEXT: mov r11, r0
+; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: add r3, sp, #196
+; LE-I32-NEXT: ldr r7, [sp, #192]
+; LE-I32-NEXT: vmov.32 d10[0], r0
+; LE-I32-NEXT: ldm r3, {r1, r2, r3}
+; LE-I32-NEXT: mov r0, r7
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: mov r6, r0
+; LE-I32-NEXT: ldr r2, [sp, #184]
+; LE-I32-NEXT: ldr r3, [sp, #188]
+; LE-I32-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
+; LE-I32-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: add r3, sp, #324
+; LE-I32-NEXT: ldr r7, [sp, #320]
+; LE-I32-NEXT: vmov.32 d8[0], r0
+; LE-I32-NEXT: ldm r3, {r1, r2, r3}
+; LE-I32-NEXT: mov r0, r7
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: add r3, sp, #304
+; LE-I32-NEXT: mov r7, r0
+; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: add lr, sp, #48
+; LE-I32-NEXT: ldr r4, [sp, #368]
+; LE-I32-NEXT: ldr r1, [sp, #372]
+; LE-I32-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload
+; LE-I32-NEXT: ldr r2, [sp, #376]
+; LE-I32-NEXT: ldr r3, [sp, #380]
+; LE-I32-NEXT: vmov.32 d14[0], r0
+; LE-I32-NEXT: mov r0, r4
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: ldr r5, [sp, #384]
+; LE-I32-NEXT: vmov.32 d12[0], r0
+; LE-I32-NEXT: ldr r1, [sp, #388]
+; LE-I32-NEXT: ldr r2, [sp, #392]
+; LE-I32-NEXT: ldr r3, [sp, #396]
+; LE-I32-NEXT: mov r0, r5
+; LE-I32-NEXT: ldr r4, [sp, #432]
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: vmov.32 d12[1], r0
+; LE-I32-NEXT: ldr r1, [sp, #436]
+; LE-I32-NEXT: ldr r2, [sp, #440]
+; LE-I32-NEXT: add lr, sp, #16
+; LE-I32-NEXT: ldr r3, [sp, #444]
+; LE-I32-NEXT: mov r0, r4
+; LE-I32-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: vmov.32 d12[0], r0
+; LE-I32-NEXT: ldr r0, [sp, #576]
+; LE-I32-NEXT: ldr r1, [sp, #580]
+; LE-I32-NEXT: add lr, sp, #48
+; LE-I32-NEXT: vmov.32 d14[1], r7
+; LE-I32-NEXT: ldr r2, [sp, #584]
+; LE-I32-NEXT: ldr r3, [sp, #588]
+; LE-I32-NEXT: vmov.32 d10[1], r11
+; LE-I32-NEXT: ldr r8, [sp, #448]
+; LE-I32-NEXT: ldr r4, [sp, #544]
+; LE-I32-NEXT: ldr r10, [sp, #548]
+; LE-I32-NEXT: vmov.32 d8[1], r6
+; LE-I32-NEXT: ldr r7, [sp, #552]
+; LE-I32-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I32-NEXT: ldr r11, [sp, #512]
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: add lr, sp, #64
+; LE-I32-NEXT: ldr r3, [sp, #556]
+; LE-I32-NEXT: mov r1, r10
+; LE-I32-NEXT: mov r2, r7
+; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT: vmov.32 d16[1], r0
+; LE-I32-NEXT: mov r0, r4
+; LE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: add r3, sp, #528
+; LE-I32-NEXT: mov r4, r0
+; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
+; LE-I32-NEXT: vmov.32 d15[0], r0
+; LE-I32-NEXT: ldr r0, [sp, #480]
+; LE-I32-NEXT: ldr r2, [sp, #488]
+; LE-I32-NEXT: vmov.32 d13[0], r1
+; LE-I32-NEXT: ldr r1, [sp, #484]
+; LE-I32-NEXT: ldr r3, [sp, #492]
+; LE-I32-NEXT: vmov.32 d15[1], r4
+; LE-I32-NEXT: ldr r7, [sp, #452]
+; LE-I32-NEXT: ldr r5, [sp, #456]
+; LE-I32-NEXT: ldr r6, [sp, #516]
+; LE-I32-NEXT: ldr r4, [sp, #520]
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: ldr r3, [sp, #460]
+; LE-I32-NEXT: vmov.32 d13[1], r0
+; LE-I32-NEXT: mov r0, r8
+; LE-I32-NEXT: mov r1, r7
+; LE-I32-NEXT: mov r2, r5
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: ldr r3, [sp, #524]
+; LE-I32-NEXT: vmov.32 d12[1], r0
+; LE-I32-NEXT: mov r0, r11
+; LE-I32-NEXT: mov r1, r6
+; LE-I32-NEXT: mov r2, r4
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: add r3, sp, #496
+; LE-I32-NEXT: mov r4, r0
+; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEXT: bl lrintl
+; LE-I32-NEXT: vmov.32 d14[0], r0
+; LE-I32-NEXT: add r0, r9, #64
+; LE-I32-NEXT: add lr, sp, #64
+; LE-I32-NEXT: vst1.32 {d12, d13}, [r0:128]!
+; LE-I32-NEXT: vmov.32 d14[1], r4
+; LE-I32-NEXT: vst1.32 {d14, d15}, [r0:128]!
+; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT: add lr, sp, #32
+; LE-I32-NEXT: vst1.32 {d16, d17}, [r0:128]!
+; LE-I32-NEXT: vst1.64 {d10, d11}, [r0:128]
+; LE-I32-NEXT: vst1.32 {d8, d9}, [r9:128]!
+; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT: add lr, sp, #48
+; LE-I32-NEXT: vst1.32 {d16, d17}, [r9:128]!
+; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT: add lr, sp, #16
+; LE-I32-NEXT: vst1.32 {d16, d17}, [r9:128]!
+; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT: vst1.64 {d16, d17}, [r9:128]
+; LE-I32-NEXT: add sp, sp, #80
+; LE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT: add sp, sp, #4
+; LE-I32-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; LE-I64-LABEL: lrint_v32fp128:
+; LE-I64: @ %bb.0:
+; LE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT: .pad #4
+; LE-I64-NEXT: sub sp, sp, #4
+; LE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: .pad #192
+; LE-I64-NEXT: sub sp, sp, #192
+; LE-I64-NEXT: str r3, [sp, #60] @ 4-byte Spill
+; LE-I64-NEXT: add r3, sp, #688
+; LE-I64-NEXT: str r2, [sp, #56] @ 4-byte Spill
+; LE-I64-NEXT: mov r9, r0
+; LE-I64-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: add r3, sp, #560
+; LE-I64-NEXT: mov r4, r0
+; LE-I64-NEXT: str r1, [sp, #64] @ 4-byte Spill
+; LE-I64-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: vmov.32 d17[0], r0
+; LE-I64-NEXT: ldr r7, [sp, #544]
+; LE-I64-NEXT: ldr r6, [sp, #548]
+; LE-I64-NEXT: add lr, sp, #96
+; LE-I64-NEXT: ldr r2, [sp, #552]
+; LE-I64-NEXT: vmov.32 d17[1], r1
+; LE-I64-NEXT: ldr r3, [sp, #556]
+; LE-I64-NEXT: mov r0, r7
+; LE-I64-NEXT: mov r1, r6
+; LE-I64-NEXT: vorr q4, q8, q8
+; LE-I64-NEXT: ldr r5, [sp, #528]
+; LE-I64-NEXT: vmov.32 d17[0], r4
+; LE-I64-NEXT: ldr r10, [sp, #304]
+; LE-I64-NEXT: ldr r8, [sp, #368]
+; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: add r3, sp, #532
+; LE-I64-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEXT: mov r11, r1
+; LE-I64-NEXT: add lr, sp, #144
+; LE-I64-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEXT: mov r0, r5
+; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: add r3, sp, #308
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: add lr, sp, #176
+; LE-I64-NEXT: vmov.32 d17[0], r0
+; LE-I64-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEXT: mov r0, r10
+; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: add r3, sp, #372
+; LE-I64-NEXT: mov r10, r1
+; LE-I64-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEXT: mov r0, r8
+; LE-I64-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: add r3, sp, #404
+; LE-I64-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEXT: ldr r0, [sp, #400]
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: add r3, sp, #596
+; LE-I64-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEXT: ldr r0, [sp, #592]
+; LE-I64-NEXT: mov r7, r1
+; LE-I64-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: add r3, sp, #676
+; LE-I64-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEXT: ldr r0, [sp, #672]
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: add lr, sp, #96
+; LE-I64-NEXT: vmov.32 d13[1], r4
+; LE-I64-NEXT: str r1, [sp, #52] @ 4-byte Spill
+; LE-I64-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #80
+; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #128
+; LE-I64-NEXT: vmov.32 d9[1], r7
+; LE-I64-NEXT: ldr r1, [sp, #628]
+; LE-I64-NEXT: ldr r2, [sp, #632]
+; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #112
+; LE-I64-NEXT: vmov.32 d15[1], r6
+; LE-I64-NEXT: ldr r3, [sp, #636]
+; LE-I64-NEXT: ldr r7, [sp, #64] @ 4-byte Reload
+; LE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #160
+; LE-I64-NEXT: vmov.32 d11[1], r10
+; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #176
+; LE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #144
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: vmov.32 d18[0], r0
+; LE-I64-NEXT: ldr r0, [sp, #624]
+; LE-I64-NEXT: vmov.32 d16[1], r11
+; LE-I64-NEXT: vmov.32 d9[1], r5
+; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #96
+; LE-I64-NEXT: vmov.32 d19[1], r7
+; LE-I64-NEXT: vstmia lr, {d18, d19} @ 16-byte Spill
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: add r3, sp, #512
+; LE-I64-NEXT: str r0, [sp, #48] @ 4-byte Spill
+; LE-I64-NEXT: str r1, [sp, #64] @ 4-byte Spill
+; LE-I64-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: add r3, sp, #768
+; LE-I64-NEXT: mov r11, r0
+; LE-I64-NEXT: str r1, [sp, #28] @ 4-byte Spill
+; LE-I64-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: ldr r6, [sp, #784]
+; LE-I64-NEXT: add r3, sp, #788
+; LE-I64-NEXT: mov r8, r1
+; LE-I64-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEXT: mov r0, r6
+; LE-I64-NEXT: ldr r5, [sp, #736]
+; LE-I64-NEXT: ldr r7, [sp, #752]
+; LE-I64-NEXT: ldr r4, [sp, #720]
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: add r3, sp, #740
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEXT: mov r0, r5
+; LE-I64-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: add r3, sp, #756
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEXT: mov r0, r7
+; LE-I64-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: add r3, sp, #724
+; LE-I64-NEXT: mov r7, r1
+; LE-I64-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEXT: mov r0, r4
+; LE-I64-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: vmov.32 d13[1], r7
+; LE-I64-NEXT: add lr, sp, #8
+; LE-I64-NEXT: ldr r2, [sp, #296]
+; LE-I64-NEXT: vmov.32 d12[1], r5
+; LE-I64-NEXT: ldr r3, [sp, #300]
+; LE-I64-NEXT: ldr r4, [sp, #576]
+; LE-I64-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEXT: ldr r0, [sp, #56] @ 4-byte Reload
+; LE-I64-NEXT: ldr r10, [sp, #384]
+; LE-I64-NEXT: vmov.32 d15[1], r6
+; LE-I64-NEXT: ldr r6, [sp, #352]
+; LE-I64-NEXT: vmov.32 d14[1], r8
+; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #32
+; LE-I64-NEXT: vmov.32 d11[1], r1
+; LE-I64-NEXT: ldr r1, [sp, #60] @ 4-byte Reload
+; LE-I64-NEXT: vmov.32 d8[0], r11
+; LE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #176
+; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: add lr, sp, #160
+; LE-I64-NEXT: add r3, sp, #356
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEXT: vmov.32 d16[0], r0
+; LE-I64-NEXT: mov r0, r6
+; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: add lr, sp, #112
+; LE-I64-NEXT: add r3, sp, #388
+; LE-I64-NEXT: mov r7, r1
+; LE-I64-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload
+; LE-I64-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEXT: mov r0, r10
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: add lr, sp, #128
+; LE-I64-NEXT: add r3, sp, #580
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I64-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEXT: mov r0, r4
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: add lr, sp, #80
+; LE-I64-NEXT: add r3, sp, #708
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEXT: ldr r0, [sp, #704]
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: vmov.32 d8[1], r4
+; LE-I64-NEXT: add lr, sp, #80
+; LE-I64-NEXT: ldr r2, [sp, #52] @ 4-byte Reload
+; LE-I64-NEXT: vmov.32 d12[1], r6
+; LE-I64-NEXT: ldr r6, [sp, #644]
+; LE-I64-NEXT: ldr r3, [sp, #652]
+; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #128
+; LE-I64-NEXT: vmov.32 d14[1], r7
+; LE-I64-NEXT: ldr r4, [sp, #480]
+; LE-I64-NEXT: ldr r7, [sp, #656]
+; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #112
+; LE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #160
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEXT: ldr r0, [sp, #28] @ 4-byte Reload
+; LE-I64-NEXT: ldr r10, [sp, #496]
+; LE-I64-NEXT: vmov.32 d16[1], r5
+; LE-I64-NEXT: add r5, r9, #192
+; LE-I64-NEXT: ldr r8, [sp, #608]
+; LE-I64-NEXT: vmov.32 d10[1], r1
+; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #176
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: vmov.32 d16[1], r0
+; LE-I64-NEXT: ldr r0, [sp, #640]
+; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #96
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #8
+; LE-I64-NEXT: vmov.32 d16[1], r2
+; LE-I64-NEXT: ldr r2, [sp, #648]
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; LE-I64-NEXT: vst1.64 {d10, d11}, [r5:128]!
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; LE-I64-NEXT: ldr r1, [sp, #48] @ 4-byte Reload
+; LE-I64-NEXT: vmov.32 d9[0], r1
+; LE-I64-NEXT: mov r1, r6
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: add r3, sp, #660
+; LE-I64-NEXT: mov r11, r1
+; LE-I64-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEXT: mov r0, r7
+; LE-I64-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: add r3, sp, #484
+; LE-I64-NEXT: mov r7, r1
+; LE-I64-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEXT: mov r0, r4
+; LE-I64-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: add r3, sp, #500
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEXT: mov r0, r10
+; LE-I64-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: add r3, sp, #612
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEXT: mov r0, r8
+; LE-I64-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEXT: ldr r0, [sp, #64] @ 4-byte Reload
+; LE-I64-NEXT: add lr, sp, #96
+; LE-I64-NEXT: add r8, r9, #128
+; LE-I64-NEXT: vmov.32 d13[1], r7
+; LE-I64-NEXT: ldr r2, [sp, #344]
+; LE-I64-NEXT: ldr r3, [sp, #348]
+; LE-I64-NEXT: vmov.32 d12[1], r11
+; LE-I64-NEXT: ldr r7, [sp, #452]
+; LE-I64-NEXT: ldr r10, [sp, #416]
+; LE-I64-NEXT: vmov.32 d9[1], r0
+; LE-I64-NEXT: ldr r0, [sp, #336]
+; LE-I64-NEXT: vmov.32 d8[1], r1
+; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #64
+; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #32
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #144
+; LE-I64-NEXT: vmov.32 d11[1], r4
+; LE-I64-NEXT: ldr r4, [sp, #340]
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r5:128]
+; LE-I64-NEXT: mov r1, r4
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #80
+; LE-I64-NEXT: vmov.32 d10[1], r6
+; LE-I64-NEXT: ldr r6, [sp, #448]
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r8:128]!
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r8:128]!
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: ldr r2, [sp, #456]
+; LE-I64-NEXT: mov r11, r1
+; LE-I64-NEXT: ldr r3, [sp, #460]
+; LE-I64-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEXT: mov r0, r6
+; LE-I64-NEXT: mov r1, r7
+; LE-I64-NEXT: ldr r5, [sp, #432]
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: add r3, sp, #468
+; LE-I64-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEXT: ldr r0, [sp, #464]
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: add r3, sp, #420
+; LE-I64-NEXT: mov r7, r1
+; LE-I64-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEXT: mov r0, r10
+; LE-I64-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: add r3, sp, #436
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEXT: mov r0, r5
+; LE-I64-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: add r3, sp, #324
+; LE-I64-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEXT: ldr r0, [sp, #320]
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEXT: bl lrintl
+; LE-I64-NEXT: add lr, sp, #64
+; LE-I64-NEXT: vmov.32 d9[1], r5
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #96
+; LE-I64-NEXT: vmov.32 d13[1], r7
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r8:128]!
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #176
+; LE-I64-NEXT: vmov.32 d8[1], r4
+; LE-I64-NEXT: vmov.32 d12[1], r6
+; LE-I64-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEXT: add r0, r9, #64
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r8:128]
+; LE-I64-NEXT: vst1.64 {d8, d9}, [r0:128]!
+; LE-I64-NEXT: vst1.64 {d12, d13}, [r0:128]!
+; LE-I64-NEXT: vst1.64 {d10, d11}, [r0:128]!
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #160
+; LE-I64-NEXT: vmov.32 d15[1], r11
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #112
+; LE-I64-NEXT: vmov.32 d14[1], r1
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r9:128]!
+; LE-I64-NEXT: vst1.64 {d14, d15}, [r9:128]!
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #128
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r9:128]!
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r9:128]
+; LE-I64-NEXT: add sp, sp, #192
+; LE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: add sp, sp, #4
+; LE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; LE-I32-NEON-LABEL: lrint_v32fp128:
+; LE-I32-NEON: @ %bb.0:
+; LE-I32-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I32-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I32-NEON-NEXT: .pad #4
+; LE-I32-NEON-NEXT: sub sp, sp, #4
+; LE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT: .pad #80
+; LE-I32-NEON-NEXT: sub sp, sp, #80
+; LE-I32-NEON-NEXT: str r3, [sp, #16] @ 4-byte Spill
+; LE-I32-NEON-NEXT: add r3, sp, #336
+; LE-I32-NEON-NEXT: str r2, [sp, #12] @ 4-byte Spill
+; LE-I32-NEON-NEXT: mov r9, r0
+; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: add r3, sp, #244
+; LE-I32-NEON-NEXT: ldr r7, [sp, #240]
+; LE-I32-NEON-NEXT: vmov.32 d9[0], r0
+; LE-I32-NEON-NEXT: ldr r5, [sp, #288]
+; LE-I32-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I32-NEON-NEXT: mov r0, r7
+; LE-I32-NEON-NEXT: ldr r8, [sp, #352]
+; LE-I32-NEON-NEXT: ldr r11, [sp, #656]
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: add r3, sp, #292
+; LE-I32-NEON-NEXT: vmov.32 d10[0], r0
+; LE-I32-NEON-NEXT: mov r0, r5
+; LE-I32-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: add r3, sp, #272
+; LE-I32-NEON-NEXT: mov r10, r0
+; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: ldr r6, [sp, #256]
+; LE-I32-NEON-NEXT: vmov.32 d11[0], r0
+; LE-I32-NEON-NEXT: ldr r1, [sp, #260]
+; LE-I32-NEON-NEXT: ldr r2, [sp, #264]
+; LE-I32-NEON-NEXT: ldr r3, [sp, #268]
+; LE-I32-NEON-NEXT: mov r0, r6
+; LE-I32-NEON-NEXT: ldr r7, [sp, #660]
+; LE-I32-NEON-NEXT: vmov.32 d11[1], r10
+; LE-I32-NEON-NEXT: ldr r5, [sp, #664]
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: vmov.32 d10[1], r0
+; LE-I32-NEON-NEXT: ldr r1, [sp, #356]
+; LE-I32-NEON-NEXT: ldr r2, [sp, #360]
+; LE-I32-NEON-NEXT: add lr, sp, #32
+; LE-I32-NEON-NEXT: ldr r3, [sp, #364]
+; LE-I32-NEON-NEXT: mov r0, r8
+; LE-I32-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: ldr r3, [sp, #668]
+; LE-I32-NEON-NEXT: add lr, sp, #48
+; LE-I32-NEON-NEXT: vmov.32 d9[1], r0
+; LE-I32-NEON-NEXT: mov r0, r11
+; LE-I32-NEON-NEXT: mov r1, r7
+; LE-I32-NEON-NEXT: mov r2, r5
+; LE-I32-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: add r3, sp, #400
+; LE-I32-NEON-NEXT: mov r8, r0
+; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: add r3, sp, #592
+; LE-I32-NEON-NEXT: mov r6, r0
+; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: ldr r4, [sp, #416]
+; LE-I32-NEON-NEXT: vmov.32 d15[0], r0
+; LE-I32-NEON-NEXT: ldr r1, [sp, #420]
+; LE-I32-NEON-NEXT: ldr r2, [sp, #424]
+; LE-I32-NEON-NEXT: vmov.32 d13[0], r6
+; LE-I32-NEON-NEXT: ldr r3, [sp, #428]
+; LE-I32-NEON-NEXT: mov r0, r4
+; LE-I32-NEON-NEXT: ldr r7, [sp, #224]
+; LE-I32-NEON-NEXT: ldr r10, [sp, #228]
+; LE-I32-NEON-NEXT: ldr r5, [sp, #232]
+; LE-I32-NEON-NEXT: ldr r11, [sp, #464]
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: ldr r3, [sp, #236]
+; LE-I32-NEON-NEXT: vmov.32 d13[1], r0
+; LE-I32-NEON-NEXT: mov r0, r7
+; LE-I32-NEON-NEXT: mov r1, r10
+; LE-I32-NEON-NEXT: mov r2, r5
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: add r3, sp, #208
+; LE-I32-NEON-NEXT: mov r4, r0
+; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: vmov.32 d9[0], r0
+; LE-I32-NEON-NEXT: ldr r0, [sp, #672]
+; LE-I32-NEON-NEXT: ldr r1, [sp, #676]
+; LE-I32-NEON-NEXT: ldr r2, [sp, #680]
+; LE-I32-NEON-NEXT: vmov.32 d11[0], r8
+; LE-I32-NEON-NEXT: ldr r3, [sp, #684]
+; LE-I32-NEON-NEXT: vmov.32 d9[1], r4
+; LE-I32-NEON-NEXT: ldr r7, [sp, #612]
+; LE-I32-NEON-NEXT: ldr r6, [sp, #616]
+; LE-I32-NEON-NEXT: ldr r5, [sp, #468]
+; LE-I32-NEON-NEXT: ldr r4, [sp, #472]
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: vmov.32 d11[1], r0
+; LE-I32-NEON-NEXT: ldr r3, [sp, #620]
+; LE-I32-NEON-NEXT: ldr r0, [sp, #608]
+; LE-I32-NEON-NEXT: mov r1, r7
+; LE-I32-NEON-NEXT: mov r2, r6
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: ldr r3, [sp, #476]
+; LE-I32-NEON-NEXT: vmov.32 d15[1], r0
+; LE-I32-NEON-NEXT: mov r0, r11
+; LE-I32-NEON-NEXT: mov r1, r5
+; LE-I32-NEON-NEXT: mov r2, r4
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: add r3, sp, #560
+; LE-I32-NEON-NEXT: str r0, [sp, #8] @ 4-byte Spill
+; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: add r3, sp, #644
+; LE-I32-NEON-NEXT: ldr r7, [sp, #640]
+; LE-I32-NEON-NEXT: vmov.32 d14[0], r0
+; LE-I32-NEON-NEXT: add lr, sp, #64
+; LE-I32-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I32-NEON-NEXT: mov r0, r7
+; LE-I32-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: add r3, sp, #624
+; LE-I32-NEON-NEXT: mov r11, r0
+; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: add r3, sp, #196
+; LE-I32-NEON-NEXT: ldr r7, [sp, #192]
+; LE-I32-NEON-NEXT: vmov.32 d10[0], r0
+; LE-I32-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I32-NEON-NEXT: mov r0, r7
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: mov r6, r0
+; LE-I32-NEON-NEXT: ldr r2, [sp, #184]
+; LE-I32-NEON-NEXT: ldr r3, [sp, #188]
+; LE-I32-NEON-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
+; LE-I32-NEON-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: add r3, sp, #324
+; LE-I32-NEON-NEXT: ldr r7, [sp, #320]
+; LE-I32-NEON-NEXT: vmov.32 d8[0], r0
+; LE-I32-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I32-NEON-NEXT: mov r0, r7
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: add r3, sp, #304
+; LE-I32-NEON-NEXT: mov r7, r0
+; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: add lr, sp, #48
+; LE-I32-NEON-NEXT: ldr r4, [sp, #368]
+; LE-I32-NEON-NEXT: ldr r1, [sp, #372]
+; LE-I32-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload
+; LE-I32-NEON-NEXT: ldr r2, [sp, #376]
+; LE-I32-NEON-NEXT: ldr r3, [sp, #380]
+; LE-I32-NEON-NEXT: vmov.32 d14[0], r0
+; LE-I32-NEON-NEXT: mov r0, r4
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: ldr r5, [sp, #384]
+; LE-I32-NEON-NEXT: vmov.32 d12[0], r0
+; LE-I32-NEON-NEXT: ldr r1, [sp, #388]
+; LE-I32-NEON-NEXT: ldr r2, [sp, #392]
+; LE-I32-NEON-NEXT: ldr r3, [sp, #396]
+; LE-I32-NEON-NEXT: mov r0, r5
+; LE-I32-NEON-NEXT: ldr r4, [sp, #432]
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: vmov.32 d12[1], r0
+; LE-I32-NEON-NEXT: ldr r1, [sp, #436]
+; LE-I32-NEON-NEXT: ldr r2, [sp, #440]
+; LE-I32-NEON-NEXT: add lr, sp, #16
+; LE-I32-NEON-NEXT: ldr r3, [sp, #444]
+; LE-I32-NEON-NEXT: mov r0, r4
+; LE-I32-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: vmov.32 d12[0], r0
+; LE-I32-NEON-NEXT: ldr r0, [sp, #576]
+; LE-I32-NEON-NEXT: ldr r1, [sp, #580]
+; LE-I32-NEON-NEXT: add lr, sp, #48
+; LE-I32-NEON-NEXT: vmov.32 d14[1], r7
+; LE-I32-NEON-NEXT: ldr r2, [sp, #584]
+; LE-I32-NEON-NEXT: ldr r3, [sp, #588]
+; LE-I32-NEON-NEXT: vmov.32 d10[1], r11
+; LE-I32-NEON-NEXT: ldr r8, [sp, #448]
+; LE-I32-NEON-NEXT: ldr r4, [sp, #544]
+; LE-I32-NEON-NEXT: ldr r10, [sp, #548]
+; LE-I32-NEON-NEXT: vmov.32 d8[1], r6
+; LE-I32-NEON-NEXT: ldr r7, [sp, #552]
+; LE-I32-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I32-NEON-NEXT: ldr r11, [sp, #512]
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: add lr, sp, #64
+; LE-I32-NEON-NEXT: ldr r3, [sp, #556]
+; LE-I32-NEON-NEXT: mov r1, r10
+; LE-I32-NEON-NEXT: mov r2, r7
+; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT: vmov.32 d16[1], r0
+; LE-I32-NEON-NEXT: mov r0, r4
+; LE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: add r3, sp, #528
+; LE-I32-NEON-NEXT: mov r4, r0
+; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
+; LE-I32-NEON-NEXT: vmov.32 d15[0], r0
+; LE-I32-NEON-NEXT: ldr r0, [sp, #480]
+; LE-I32-NEON-NEXT: ldr r2, [sp, #488]
+; LE-I32-NEON-NEXT: vmov.32 d13[0], r1
+; LE-I32-NEON-NEXT: ldr r1, [sp, #484]
+; LE-I32-NEON-NEXT: ldr r3, [sp, #492]
+; LE-I32-NEON-NEXT: vmov.32 d15[1], r4
+; LE-I32-NEON-NEXT: ldr r7, [sp, #452]
+; LE-I32-NEON-NEXT: ldr r5, [sp, #456]
+; LE-I32-NEON-NEXT: ldr r6, [sp, #516]
+; LE-I32-NEON-NEXT: ldr r4, [sp, #520]
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: ldr r3, [sp, #460]
+; LE-I32-NEON-NEXT: vmov.32 d13[1], r0
+; LE-I32-NEON-NEXT: mov r0, r8
+; LE-I32-NEON-NEXT: mov r1, r7
+; LE-I32-NEON-NEXT: mov r2, r5
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: ldr r3, [sp, #524]
+; LE-I32-NEON-NEXT: vmov.32 d12[1], r0
+; LE-I32-NEON-NEXT: mov r0, r11
+; LE-I32-NEON-NEXT: mov r1, r6
+; LE-I32-NEON-NEXT: mov r2, r4
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: add r3, sp, #496
+; LE-I32-NEON-NEXT: mov r4, r0
+; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEON-NEXT: bl lrintl
+; LE-I32-NEON-NEXT: vmov.32 d14[0], r0
+; LE-I32-NEON-NEXT: add r0, r9, #64
+; LE-I32-NEON-NEXT: add lr, sp, #64
+; LE-I32-NEON-NEXT: vst1.32 {d12, d13}, [r0:128]!
+; LE-I32-NEON-NEXT: vmov.32 d14[1], r4
+; LE-I32-NEON-NEXT: vst1.32 {d14, d15}, [r0:128]!
+; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT: add lr, sp, #32
+; LE-I32-NEON-NEXT: vst1.32 {d16, d17}, [r0:128]!
+; LE-I32-NEON-NEXT: vst1.64 {d10, d11}, [r0:128]
+; LE-I32-NEON-NEXT: vst1.32 {d8, d9}, [r9:128]!
+; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT: add lr, sp, #48
+; LE-I32-NEON-NEXT: vst1.32 {d16, d17}, [r9:128]!
+; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT: add lr, sp, #16
+; LE-I32-NEON-NEXT: vst1.32 {d16, d17}, [r9:128]!
+; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT: vst1.64 {d16, d17}, [r9:128]
+; LE-I32-NEON-NEXT: add sp, sp, #80
+; LE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT: add sp, sp, #4
+; LE-I32-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; LE-I64-NEON-LABEL: lrint_v32fp128:
+; LE-I64-NEON: @ %bb.0:
+; LE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEON-NEXT: .pad #4
+; LE-I64-NEON-NEXT: sub sp, sp, #4
+; LE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT: .pad #192
+; LE-I64-NEON-NEXT: sub sp, sp, #192
+; LE-I64-NEON-NEXT: str r3, [sp, #60] @ 4-byte Spill
+; LE-I64-NEON-NEXT: add r3, sp, #688
+; LE-I64-NEON-NEXT: str r2, [sp, #56] @ 4-byte Spill
+; LE-I64-NEON-NEXT: mov r9, r0
+; LE-I64-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: add r3, sp, #560
+; LE-I64-NEON-NEXT: mov r4, r0
+; LE-I64-NEON-NEXT: str r1, [sp, #64] @ 4-byte Spill
+; LE-I64-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: vmov.32 d17[0], r0
+; LE-I64-NEON-NEXT: ldr r7, [sp, #544]
+; LE-I64-NEON-NEXT: ldr r6, [sp, #548]
+; LE-I64-NEON-NEXT: add lr, sp, #96
+; LE-I64-NEON-NEXT: ldr r2, [sp, #552]
+; LE-I64-NEON-NEXT: vmov.32 d17[1], r1
+; LE-I64-NEON-NEXT: ldr r3, [sp, #556]
+; LE-I64-NEON-NEXT: mov r0, r7
+; LE-I64-NEON-NEXT: mov r1, r6
+; LE-I64-NEON-NEXT: vorr q4, q8, q8
+; LE-I64-NEON-NEXT: ldr r5, [sp, #528]
+; LE-I64-NEON-NEXT: vmov.32 d17[0], r4
+; LE-I64-NEON-NEXT: ldr r10, [sp, #304]
+; LE-I64-NEON-NEXT: ldr r8, [sp, #368]
+; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: add r3, sp, #532
+; LE-I64-NEON-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT: mov r11, r1
+; LE-I64-NEON-NEXT: add lr, sp, #144
+; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT: mov r0, r5
+; LE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: add r3, sp, #308
+; LE-I64-NEON-NEXT: mov r5, r1
+; LE-I64-NEON-NEXT: add lr, sp, #176
+; LE-I64-NEON-NEXT: vmov.32 d17[0], r0
+; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT: mov r0, r10
+; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: add r3, sp, #372
+; LE-I64-NEON-NEXT: mov r10, r1
+; LE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEON-NEXT: mov r0, r8
+; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: add r3, sp, #404
+; LE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEON-NEXT: ldr r0, [sp, #400]
+; LE-I64-NEON-NEXT: mov r6, r1
+; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: add r3, sp, #596
+; LE-I64-NEON-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEON-NEXT: ldr r0, [sp, #592]
+; LE-I64-NEON-NEXT: mov r7, r1
+; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: add r3, sp, #676
+; LE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT: ldr r0, [sp, #672]
+; LE-I64-NEON-NEXT: mov r4, r1
+; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: add lr, sp, #96
+; LE-I64-NEON-NEXT: vmov.32 d13[1], r4
+; LE-I64-NEON-NEXT: str r1, [sp, #52] @ 4-byte Spill
+; LE-I64-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #80
+; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #128
+; LE-I64-NEON-NEXT: vmov.32 d9[1], r7
+; LE-I64-NEON-NEXT: ldr r1, [sp, #628]
+; LE-I64-NEON-NEXT: ldr r2, [sp, #632]
+; LE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #112
+; LE-I64-NEON-NEXT: vmov.32 d15[1], r6
+; LE-I64-NEON-NEXT: ldr r3, [sp, #636]
+; LE-I64-NEON-NEXT: ldr r7, [sp, #64] @ 4-byte Reload
+; LE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #160
+; LE-I64-NEON-NEXT: vmov.32 d11[1], r10
+; LE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #176
+; LE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #144
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vmov.32 d18[0], r0
+; LE-I64-NEON-NEXT: ldr r0, [sp, #624]
+; LE-I64-NEON-NEXT: vmov.32 d16[1], r11
+; LE-I64-NEON-NEXT: vmov.32 d9[1], r5
+; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #96
+; LE-I64-NEON-NEXT: vmov.32 d19[1], r7
+; LE-I64-NEON-NEXT: vstmia lr, {d18, d19} @ 16-byte Spill
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: add r3, sp, #512
+; LE-I64-NEON-NEXT: str r0, [sp, #48] @ 4-byte Spill
+; LE-I64-NEON-NEXT: str r1, [sp, #64] @ 4-byte Spill
+; LE-I64-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: add r3, sp, #768
+; LE-I64-NEON-NEXT: mov r11, r0
+; LE-I64-NEON-NEXT: str r1, [sp, #28] @ 4-byte Spill
+; LE-I64-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: ldr r6, [sp, #784]
+; LE-I64-NEON-NEXT: add r3, sp, #788
+; LE-I64-NEON-NEXT: mov r8, r1
+; LE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT: mov r0, r6
+; LE-I64-NEON-NEXT: ldr r5, [sp, #736]
+; LE-I64-NEON-NEXT: ldr r7, [sp, #752]
+; LE-I64-NEON-NEXT: ldr r4, [sp, #720]
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: add r3, sp, #740
+; LE-I64-NEON-NEXT: mov r6, r1
+; LE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEON-NEXT: mov r0, r5
+; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: add r3, sp, #756
+; LE-I64-NEON-NEXT: mov r5, r1
+; LE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT: mov r0, r7
+; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: add r3, sp, #724
+; LE-I64-NEON-NEXT: mov r7, r1
+; LE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT: mov r0, r4
+; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: vmov.32 d13[1], r7
+; LE-I64-NEON-NEXT: add lr, sp, #8
+; LE-I64-NEON-NEXT: ldr r2, [sp, #296]
+; LE-I64-NEON-NEXT: vmov.32 d12[1], r5
+; LE-I64-NEON-NEXT: ldr r3, [sp, #300]
+; LE-I64-NEON-NEXT: ldr r4, [sp, #576]
+; LE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEON-NEXT: ldr r0, [sp, #56] @ 4-byte Reload
+; LE-I64-NEON-NEXT: ldr r10, [sp, #384]
+; LE-I64-NEON-NEXT: vmov.32 d15[1], r6
+; LE-I64-NEON-NEXT: ldr r6, [sp, #352]
+; LE-I64-NEON-NEXT: vmov.32 d14[1], r8
+; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #32
+; LE-I64-NEON-NEXT: vmov.32 d11[1], r1
+; LE-I64-NEON-NEXT: ldr r1, [sp, #60] @ 4-byte Reload
+; LE-I64-NEON-NEXT: vmov.32 d8[0], r11
+; LE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #176
+; LE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: add lr, sp, #160
+; LE-I64-NEON-NEXT: add r3, sp, #356
+; LE-I64-NEON-NEXT: mov r5, r1
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT: vmov.32 d16[0], r0
+; LE-I64-NEON-NEXT: mov r0, r6
+; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: add lr, sp, #112
+; LE-I64-NEON-NEXT: add r3, sp, #388
+; LE-I64-NEON-NEXT: mov r7, r1
+; LE-I64-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload
+; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEON-NEXT: mov r0, r10
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: add lr, sp, #128
+; LE-I64-NEON-NEXT: add r3, sp, #580
+; LE-I64-NEON-NEXT: mov r6, r1
+; LE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT: mov r0, r4
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: add lr, sp, #80
+; LE-I64-NEON-NEXT: add r3, sp, #708
+; LE-I64-NEON-NEXT: mov r4, r1
+; LE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT: ldr r0, [sp, #704]
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: vmov.32 d8[1], r4
+; LE-I64-NEON-NEXT: add lr, sp, #80
+; LE-I64-NEON-NEXT: ldr r2, [sp, #52] @ 4-byte Reload
+; LE-I64-NEON-NEXT: vmov.32 d12[1], r6
+; LE-I64-NEON-NEXT: ldr r6, [sp, #644]
+; LE-I64-NEON-NEXT: ldr r3, [sp, #652]
+; LE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #128
+; LE-I64-NEON-NEXT: vmov.32 d14[1], r7
+; LE-I64-NEON-NEXT: ldr r4, [sp, #480]
+; LE-I64-NEON-NEXT: ldr r7, [sp, #656]
+; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #112
+; LE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #160
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT: ldr r0, [sp, #28] @ 4-byte Reload
+; LE-I64-NEON-NEXT: ldr r10, [sp, #496]
+; LE-I64-NEON-NEXT: vmov.32 d16[1], r5
+; LE-I64-NEON-NEXT: add r5, r9, #192
+; LE-I64-NEON-NEXT: ldr r8, [sp, #608]
+; LE-I64-NEON-NEXT: vmov.32 d10[1], r1
+; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #176
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vmov.32 d16[1], r0
+; LE-I64-NEON-NEXT: ldr r0, [sp, #640]
+; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #96
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #8
+; LE-I64-NEON-NEXT: vmov.32 d16[1], r2
+; LE-I64-NEON-NEXT: ldr r2, [sp, #648]
+; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; LE-I64-NEON-NEXT: vst1.64 {d10, d11}, [r5:128]!
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]!
+; LE-I64-NEON-NEXT: ldr r1, [sp, #48] @ 4-byte Reload
+; LE-I64-NEON-NEXT: vmov.32 d9[0], r1
+; LE-I64-NEON-NEXT: mov r1, r6
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: add r3, sp, #660
+; LE-I64-NEON-NEXT: mov r11, r1
+; LE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT: mov r0, r7
+; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: add r3, sp, #484
+; LE-I64-NEON-NEXT: mov r7, r1
+; LE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT: mov r0, r4
+; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: add r3, sp, #500
+; LE-I64-NEON-NEXT: mov r6, r1
+; LE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT: mov r0, r10
+; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: add r3, sp, #612
+; LE-I64-NEON-NEXT: mov r4, r1
+; LE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEON-NEXT: mov r0, r8
+; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT: ldr r0, [sp, #64] @ 4-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #96
+; LE-I64-NEON-NEXT: add r8, r9, #128
+; LE-I64-NEON-NEXT: vmov.32 d13[1], r7
+; LE-I64-NEON-NEXT: ldr r2, [sp, #344]
+; LE-I64-NEON-NEXT: ldr r3, [sp, #348]
+; LE-I64-NEON-NEXT: vmov.32 d12[1], r11
+; LE-I64-NEON-NEXT: ldr r7, [sp, #452]
+; LE-I64-NEON-NEXT: ldr r10, [sp, #416]
+; LE-I64-NEON-NEXT: vmov.32 d9[1], r0
+; LE-I64-NEON-NEXT: ldr r0, [sp, #336]
+; LE-I64-NEON-NEXT: vmov.32 d8[1], r1
+; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #64
+; LE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #32
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #144
+; LE-I64-NEON-NEXT: vmov.32 d11[1], r4
+; LE-I64-NEON-NEXT: ldr r4, [sp, #340]
+; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]
+; LE-I64-NEON-NEXT: mov r1, r4
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #80
+; LE-I64-NEON-NEXT: vmov.32 d10[1], r6
+; LE-I64-NEON-NEXT: ldr r6, [sp, #448]
+; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r8:128]!
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r8:128]!
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: ldr r2, [sp, #456]
+; LE-I64-NEON-NEXT: mov r11, r1
+; LE-I64-NEON-NEXT: ldr r3, [sp, #460]
+; LE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEON-NEXT: mov r0, r6
+; LE-I64-NEON-NEXT: mov r1, r7
+; LE-I64-NEON-NEXT: ldr r5, [sp, #432]
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: add r3, sp, #468
+; LE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT: ldr r0, [sp, #464]
+; LE-I64-NEON-NEXT: mov r6, r1
+; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: add r3, sp, #420
+; LE-I64-NEON-NEXT: mov r7, r1
+; LE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT: mov r0, r10
+; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: add r3, sp, #436
+; LE-I64-NEON-NEXT: mov r4, r1
+; LE-I64-NEON-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT: mov r0, r5
+; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: add r3, sp, #324
+; LE-I64-NEON-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEON-NEXT: ldr r0, [sp, #320]
+; LE-I64-NEON-NEXT: mov r5, r1
+; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT: bl lrintl
+; LE-I64-NEON-NEXT: add lr, sp, #64
+; LE-I64-NEON-NEXT: vmov.32 d9[1], r5
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #96
+; LE-I64-NEON-NEXT: vmov.32 d13[1], r7
+; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r8:128]!
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #176
+; LE-I64-NEON-NEXT: vmov.32 d8[1], r4
+; LE-I64-NEON-NEXT: vmov.32 d12[1], r6
+; LE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEON-NEXT: add r0, r9, #64
+; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r8:128]
+; LE-I64-NEON-NEXT: vst1.64 {d8, d9}, [r0:128]!
+; LE-I64-NEON-NEXT: vst1.64 {d12, d13}, [r0:128]!
+; LE-I64-NEON-NEXT: vst1.64 {d10, d11}, [r0:128]!
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #160
+; LE-I64-NEON-NEXT: vmov.32 d15[1], r11
+; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #112
+; LE-I64-NEON-NEXT: vmov.32 d14[1], r1
+; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r9:128]!
+; LE-I64-NEON-NEXT: vst1.64 {d14, d15}, [r9:128]!
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #128
+; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r9:128]!
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r9:128]
+; LE-I64-NEON-NEXT: add sp, sp, #192
+; LE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT: add sp, sp, #4
+; LE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I32-LABEL: lrint_v32fp128:
+; BE-I32: @ %bb.0:
+; BE-I32-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I32-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I32-NEXT: .pad #4
+; BE-I32-NEXT: sub sp, sp, #4
+; BE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT: .pad #104
+; BE-I32-NEXT: sub sp, sp, #104
+; BE-I32-NEXT: mov r4, r3
+; BE-I32-NEXT: add r3, sp, #248
+; BE-I32-NEXT: mov r8, r2
+; BE-I32-NEXT: mov r11, r0
+; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: add r3, sp, #616
+; BE-I32-NEXT: mov r9, r0
+; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: add r3, sp, #680
+; BE-I32-NEXT: mov r5, r0
+; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: ldr r7, [sp, #232]
+; BE-I32-NEXT: add lr, sp, #72
+; BE-I32-NEXT: ldr r1, [sp, #236]
+; BE-I32-NEXT: vmov.32 d17[0], r0
+; BE-I32-NEXT: ldr r2, [sp, #240]
+; BE-I32-NEXT: ldr r3, [sp, #244]
+; BE-I32-NEXT: mov r0, r7
+; BE-I32-NEXT: ldr r10, [sp, #376]
+; BE-I32-NEXT: vmov.32 d11[0], r5
+; BE-I32-NEXT: ldr r6, [sp, #296]
+; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: add r3, sp, #300
+; BE-I32-NEXT: vmov.32 d15[0], r0
+; BE-I32-NEXT: mov r0, r6
+; BE-I32-NEXT: ldm r3, {r1, r2, r3}
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: add r3, sp, #380
+; BE-I32-NEXT: vmov.32 d13[0], r0
+; BE-I32-NEXT: mov r0, r10
+; BE-I32-NEXT: ldm r3, {r1, r2, r3}
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: add r3, sp, #360
+; BE-I32-NEXT: mov r5, r0
+; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: vmov.32 d17[0], r0
+; BE-I32-NEXT: ldr r6, [sp, #312]
+; BE-I32-NEXT: ldr r1, [sp, #316]
+; BE-I32-NEXT: ldr r2, [sp, #320]
+; BE-I32-NEXT: ldr r3, [sp, #324]
+; BE-I32-NEXT: vmov.32 d17[1], r5
+; BE-I32-NEXT: mov r0, r6
+; BE-I32-NEXT: ldr r7, [sp, #572]
+; BE-I32-NEXT: vorr q4, q8, q8
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: ldr r6, [sp, #632]
+; BE-I32-NEXT: add lr, sp, #88
+; BE-I32-NEXT: ldr r1, [sp, #636]
+; BE-I32-NEXT: vmov.32 d13[1], r0
+; BE-I32-NEXT: ldr r2, [sp, #640]
+; BE-I32-NEXT: ldr r3, [sp, #644]
+; BE-I32-NEXT: mov r0, r6
+; BE-I32-NEXT: ldr r5, [sp, #576]
+; BE-I32-NEXT: vmov.32 d15[1], r9
+; BE-I32-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: vmov.32 d11[1], r0
+; BE-I32-NEXT: ldr r3, [sp, #580]
+; BE-I32-NEXT: ldr r0, [sp, #568]
+; BE-I32-NEXT: mov r1, r7
+; BE-I32-NEXT: mov r2, r5
+; BE-I32-NEXT: vorr q6, q5, q5
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: add r3, sp, #552
+; BE-I32-NEXT: mov r9, r0
+; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: add r3, sp, #520
+; BE-I32-NEXT: mov r5, r0
+; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: ldr r6, [sp, #584]
+; BE-I32-NEXT: add lr, sp, #8
+; BE-I32-NEXT: ldr r1, [sp, #588]
+; BE-I32-NEXT: vmov.32 d16[0], r0
+; BE-I32-NEXT: ldr r2, [sp, #592]
+; BE-I32-NEXT: ldr r3, [sp, #596]
+; BE-I32-NEXT: mov r0, r6
+; BE-I32-NEXT: vmov.32 d17[0], r5
+; BE-I32-NEXT: ldr r7, [sp, #216]
+; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: add r3, sp, #220
+; BE-I32-NEXT: vmov.32 d12[0], r0
+; BE-I32-NEXT: mov r0, r7
+; BE-I32-NEXT: ldm r3, {r1, r2, r3}
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: ldr r2, [sp, #208]
+; BE-I32-NEXT: mov r7, r0
+; BE-I32-NEXT: ldr r3, [sp, #212]
+; BE-I32-NEXT: mov r0, r8
+; BE-I32-NEXT: mov r1, r4
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: add r3, sp, #456
+; BE-I32-NEXT: mov r5, r0
+; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: ldr r6, [sp, #328]
+; BE-I32-NEXT: vmov.32 d10[0], r0
+; BE-I32-NEXT: ldr r1, [sp, #332]
+; BE-I32-NEXT: ldr r2, [sp, #336]
+; BE-I32-NEXT: vmov.32 d14[0], r5
+; BE-I32-NEXT: ldr r3, [sp, #340]
+; BE-I32-NEXT: mov r0, r6
+; BE-I32-NEXT: ldr r10, [sp, #504]
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: ldr r6, [sp, #344]
+; BE-I32-NEXT: vmov.32 d8[0], r0
+; BE-I32-NEXT: ldr r1, [sp, #348]
+; BE-I32-NEXT: ldr r2, [sp, #352]
+; BE-I32-NEXT: ldr r3, [sp, #356]
+; BE-I32-NEXT: mov r0, r6
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: vmov.32 d8[1], r0
+; BE-I32-NEXT: ldr r6, [sp, #600]
+; BE-I32-NEXT: add lr, sp, #56
+; BE-I32-NEXT: ldr r1, [sp, #604]
+; BE-I32-NEXT: vmov.32 d14[1], r7
+; BE-I32-NEXT: ldr r2, [sp, #608]
+; BE-I32-NEXT: ldr r3, [sp, #612]
+; BE-I32-NEXT: mov r0, r6
+; BE-I32-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; BE-I32-NEXT: add lr, sp, #40
+; BE-I32-NEXT: ldr r5, [sp, #508]
+; BE-I32-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: vmov.32 d12[1], r0
+; BE-I32-NEXT: add lr, sp, #24
+; BE-I32-NEXT: ldr r7, [sp, #536]
+; BE-I32-NEXT: ldr r1, [sp, #540]
+; BE-I32-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I32-NEXT: add lr, sp, #8
+; BE-I32-NEXT: mov r0, r7
+; BE-I32-NEXT: ldr r2, [sp, #544]
+; BE-I32-NEXT: ldr r3, [sp, #548]
+; BE-I32-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-I32-NEXT: ldr r6, [sp, #512]
+; BE-I32-NEXT: vmov.32 d13[1], r9
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: ldr r3, [sp, #516]
+; BE-I32-NEXT: vmov.32 d12[1], r0
+; BE-I32-NEXT: mov r0, r10
+; BE-I32-NEXT: mov r1, r5
+; BE-I32-NEXT: mov r2, r6
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: add r3, sp, #488
+; BE-I32-NEXT: mov r5, r0
+; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: add r3, sp, #424
+; BE-I32-NEXT: mov r7, r0
+; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: ldr r6, [sp, #264]
+; BE-I32-NEXT: vmov.32 d15[0], r0
+; BE-I32-NEXT: ldr r1, [sp, #268]
+; BE-I32-NEXT: ldr r2, [sp, #272]
+; BE-I32-NEXT: vmov.32 d11[0], r7
+; BE-I32-NEXT: ldr r3, [sp, #276]
+; BE-I32-NEXT: mov r0, r6
+; BE-I32-NEXT: ldr r8, [sp, #696]
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: add lr, sp, #88
+; BE-I32-NEXT: ldr r4, [sp, #472]
+; BE-I32-NEXT: ldr r1, [sp, #476]
+; BE-I32-NEXT: vmov.32 d11[1], r5
+; BE-I32-NEXT: ldr r2, [sp, #480]
+; BE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEXT: ldr r3, [sp, #484]
+; BE-I32-NEXT: vmov.32 d16[0], r0
+; BE-I32-NEXT: mov r0, r4
+; BE-I32-NEXT: ldr r6, [sp, #700]
+; BE-I32-NEXT: ldr r7, [sp, #704]
+; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: ldr r3, [sp, #708]
+; BE-I32-NEXT: vmov.32 d10[1], r0
+; BE-I32-NEXT: mov r0, r8
+; BE-I32-NEXT: mov r1, r6
+; BE-I32-NEXT: mov r2, r7
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: add r3, sp, #648
+; BE-I32-NEXT: mov r4, r0
+; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: add lr, sp, #72
+; BE-I32-NEXT: ldr r5, [sp, #664]
+; BE-I32-NEXT: ldr r1, [sp, #668]
+; BE-I32-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I32-NEXT: ldr r2, [sp, #672]
+; BE-I32-NEXT: ldr r3, [sp, #676]
+; BE-I32-NEXT: vmov.32 d8[0], r0
+; BE-I32-NEXT: mov r0, r5
+; BE-I32-NEXT: ldr r6, [sp, #444]
+; BE-I32-NEXT: vmov.32 d9[1], r4
+; BE-I32-NEXT: ldr r7, [sp, #448]
+; BE-I32-NEXT: ldr r8, [sp, #412]
+; BE-I32-NEXT: ldr r4, [sp, #416]
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: vmov.32 d8[1], r0
+; BE-I32-NEXT: ldr r3, [sp, #452]
+; BE-I32-NEXT: ldr r0, [sp, #440]
+; BE-I32-NEXT: mov r1, r6
+; BE-I32-NEXT: mov r2, r7
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: vmov.32 d15[1], r0
+; BE-I32-NEXT: ldr r3, [sp, #420]
+; BE-I32-NEXT: ldr r0, [sp, #408]
+; BE-I32-NEXT: mov r1, r8
+; BE-I32-NEXT: mov r2, r4
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: add r3, sp, #392
+; BE-I32-NEXT: mov r4, r0
+; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: add r3, sp, #284
+; BE-I32-NEXT: ldr r7, [sp, #280]
+; BE-I32-NEXT: vmov.32 d14[0], r0
+; BE-I32-NEXT: ldm r3, {r1, r2, r3}
+; BE-I32-NEXT: mov r0, r7
+; BE-I32-NEXT: vmov.32 d14[1], r4
+; BE-I32-NEXT: bl lrintl
+; BE-I32-NEXT: add lr, sp, #88
+; BE-I32-NEXT: vrev64.32 q9, q4
+; BE-I32-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload
+; BE-I32-NEXT: add lr, sp, #24
+; BE-I32-NEXT: vrev64.32 q8, q7
+; BE-I32-NEXT: vmov.32 d20[1], r0
+; BE-I32-NEXT: add r0, r11, #64
+; BE-I32-NEXT: vst1.32 {d10, d11}, [r0:128]!
+; BE-I32-NEXT: vst1.32 {d12, d13}, [r0:128]!
+; BE-I32-NEXT: vldmia lr, {d22, d23} @ 16-byte Reload
+; BE-I32-NEXT: add lr, sp, #40
+; BE-I32-NEXT: vst1.32 {d22, d23}, [r0:128]!
+; BE-I32-NEXT: vst1.64 {d18, d19}, [r0:128]
+; BE-I32-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I32-NEXT: add lr, sp, #56
+; BE-I32-NEXT: vst1.32 {d18, d19}, [r11:128]!
+; BE-I32-NEXT: vst1.32 {d20, d21}, [r11:128]!
+; BE-I32-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I32-NEXT: vst1.32 {d18, d19}, [r11:128]!
+; BE-I32-NEXT: vst1.64 {d16, d17}, [r11:128]
+; BE-I32-NEXT: add sp, sp, #104
+; BE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT: add sp, sp, #4
+; BE-I32-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I64-LABEL: lrint_v32fp128:
+; BE-I64: @ %bb.0:
+; BE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT: .pad #4
+; BE-I64-NEXT: sub sp, sp, #4
+; BE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT: .pad #152
+; BE-I64-NEXT: sub sp, sp, #152
+; BE-I64-NEXT: str r3, [sp, #120] @ 4-byte Spill
+; BE-I64-NEXT: add r3, sp, #712
+; BE-I64-NEXT: str r2, [sp, #112] @ 4-byte Spill
+; BE-I64-NEXT: mov r9, r0
+; BE-I64-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: ldr r7, [sp, #648]
+; BE-I64-NEXT: add r3, sp, #652
+; BE-I64-NEXT: mov r5, r1
+; BE-I64-NEXT: vmov.32 d9[0], r0
+; BE-I64-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEXT: mov r0, r7
+; BE-I64-NEXT: ldr r6, [sp, #520]
+; BE-I64-NEXT: ldr r8, [sp, #632]
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: add r3, sp, #524
+; BE-I64-NEXT: mov r7, r1
+; BE-I64-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEXT: mov r0, r6
+; BE-I64-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: add r3, sp, #636
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: vmov.32 d8[0], r0
+; BE-I64-NEXT: mov r0, r8
+; BE-I64-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEXT: ldr r0, [sp, #488]
+; BE-I64-NEXT: vmov.32 d8[1], r4
+; BE-I64-NEXT: ldr r1, [sp, #492]
+; BE-I64-NEXT: ldr r2, [sp, #496]
+; BE-I64-NEXT: vmov.32 d10[1], r7
+; BE-I64-NEXT: ldr r3, [sp, #500]
+; BE-I64-NEXT: vmov.32 d9[1], r5
+; BE-I64-NEXT: vstr d8, [sp, #144] @ 8-byte Spill
+; BE-I64-NEXT: vstr d10, [sp, #136] @ 8-byte Spill
+; BE-I64-NEXT: vstr d9, [sp, #128] @ 8-byte Spill
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: add r3, sp, #680
+; BE-I64-NEXT: str r0, [sp, #104] @ 4-byte Spill
+; BE-I64-NEXT: str r1, [sp, #88] @ 4-byte Spill
+; BE-I64-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: ldr r0, [sp, #728]
+; BE-I64-NEXT: ldr r2, [sp, #736]
+; BE-I64-NEXT: vmov.32 d11[1], r6
+; BE-I64-NEXT: ldr r6, [sp, #732]
+; BE-I64-NEXT: ldr r3, [sp, #740]
+; BE-I64-NEXT: vmov.32 d16[1], r1
+; BE-I64-NEXT: ldr r5, [sp, #504]
+; BE-I64-NEXT: mov r1, r6
+; BE-I64-NEXT: ldr r7, [sp, #744]
+; BE-I64-NEXT: ldr r4, [sp, #748]
+; BE-I64-NEXT: vstr d11, [sp, #24] @ 8-byte Spill
+; BE-I64-NEXT: vstr d16, [sp, #8] @ 8-byte Spill
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: ldr r2, [sp, #752]
+; BE-I64-NEXT: mov r11, r1
+; BE-I64-NEXT: ldr r3, [sp, #756]
+; BE-I64-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEXT: mov r0, r7
+; BE-I64-NEXT: mov r1, r4
+; BE-I64-NEXT: ldr r10, [sp, #552]
+; BE-I64-NEXT: ldr r6, [sp, #664]
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: add r3, sp, #508
+; BE-I64-NEXT: mov r8, r1
+; BE-I64-NEXT: vmov.32 d8[0], r0
+; BE-I64-NEXT: mov r0, r5
+; BE-I64-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: add r3, sp, #540
+; BE-I64-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEXT: ldr r0, [sp, #536]
+; BE-I64-NEXT: mov r7, r1
+; BE-I64-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: add r3, sp, #556
+; BE-I64-NEXT: mov r5, r1
+; BE-I64-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEXT: mov r0, r10
+; BE-I64-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: add r3, sp, #668
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: vmov.32 d9[0], r0
+; BE-I64-NEXT: mov r0, r6
+; BE-I64-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: add r3, sp, #700
+; BE-I64-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEXT: ldr r0, [sp, #696]
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEXT: ldr r0, [sp, #104] @ 4-byte Reload
+; BE-I64-NEXT: ldr r2, [sp, #256]
+; BE-I64-NEXT: vmov.32 d13[1], r11
+; BE-I64-NEXT: ldr r3, [sp, #260]
+; BE-I64-NEXT: vmov.32 d14[1], r6
+; BE-I64-NEXT: ldr r6, [sp, #264]
+; BE-I64-NEXT: vmov.32 d9[1], r4
+; BE-I64-NEXT: ldr r4, [sp, #344]
+; BE-I64-NEXT: vmov.32 d12[1], r5
+; BE-I64-NEXT: ldr r5, [sp, #312]
+; BE-I64-NEXT: vmov.32 d8[1], r8
+; BE-I64-NEXT: ldr r8, [sp, #328]
+; BE-I64-NEXT: vmov.32 d10[1], r7
+; BE-I64-NEXT: vstr d13, [sp, #32] @ 8-byte Spill
+; BE-I64-NEXT: vmov.32 d11[1], r1
+; BE-I64-NEXT: ldr r1, [sp, #120] @ 4-byte Reload
+; BE-I64-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEXT: ldr r0, [sp, #112] @ 4-byte Reload
+; BE-I64-NEXT: vstr d14, [sp] @ 8-byte Spill
+; BE-I64-NEXT: vstr d9, [sp, #16] @ 8-byte Spill
+; BE-I64-NEXT: vstr d12, [sp, #56] @ 8-byte Spill
+; BE-I64-NEXT: vstr d10, [sp, #64] @ 8-byte Spill
+; BE-I64-NEXT: vstr d8, [sp, #40] @ 8-byte Spill
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: add r3, sp, #268
+; BE-I64-NEXT: mov r11, r1
+; BE-I64-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEXT: mov r0, r6
+; BE-I64-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: add r3, sp, #316
+; BE-I64-NEXT: mov r10, r1
+; BE-I64-NEXT: vmov.32 d8[0], r0
+; BE-I64-NEXT: mov r0, r5
+; BE-I64-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: add r3, sp, #332
+; BE-I64-NEXT: mov r7, r1
+; BE-I64-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEXT: mov r0, r8
+; BE-I64-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: add r3, sp, #348
+; BE-I64-NEXT: mov r5, r1
+; BE-I64-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEXT: mov r0, r4
+; BE-I64-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: add r3, sp, #364
+; BE-I64-NEXT: vmov.32 d9[0], r0
+; BE-I64-NEXT: ldr r0, [sp, #360]
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: add r3, sp, #476
+; BE-I64-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEXT: ldr r0, [sp, #472]
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: ldr r0, [sp, #88] @ 4-byte Reload
+; BE-I64-NEXT: ldr r2, [sp, #592]
+; BE-I64-NEXT: vldr d20, [sp, #136] @ 8-byte Reload
+; BE-I64-NEXT: vmov.32 d16[1], r1
+; BE-I64-NEXT: ldr r1, [sp, #588]
+; BE-I64-NEXT: ldr r3, [sp, #596]
+; BE-I64-NEXT: vldr d22, [sp, #24] @ 8-byte Reload
+; BE-I64-NEXT: vldr d18, [sp, #8] @ 8-byte Reload
+; BE-I64-NEXT: vrev64.32 d21, d20
+; BE-I64-NEXT: vmov.32 d10[1], r6
+; BE-I64-NEXT: ldr r6, [sp, #600]
+; BE-I64-NEXT: vmov.32 d9[1], r4
+; BE-I64-NEXT: ldr r4, [sp, #616]
+; BE-I64-NEXT: vmov.32 d12[1], r7
+; BE-I64-NEXT: ldr r7, [sp, #604]
+; BE-I64-NEXT: vmov.32 d8[1], r10
+; BE-I64-NEXT: add r10, r9, #192
+; BE-I64-NEXT: vmov.32 d14[1], r11
+; BE-I64-NEXT: ldr r11, [sp, #440]
+; BE-I64-NEXT: vmov.32 d13[1], r0
+; BE-I64-NEXT: ldr r0, [sp, #584]
+; BE-I64-NEXT: vmov.32 d15[1], r5
+; BE-I64-NEXT: vstr d16, [sp, #48] @ 8-byte Spill
+; BE-I64-NEXT: vldr d16, [sp, #128] @ 8-byte Reload
+; BE-I64-NEXT: vrev64.32 d20, d22
+; BE-I64-NEXT: vldr d22, [sp] @ 8-byte Reload
+; BE-I64-NEXT: vrev64.32 d19, d18
+; BE-I64-NEXT: vrev64.32 d17, d16
+; BE-I64-NEXT: vrev64.32 d18, d22
+; BE-I64-NEXT: vstr d10, [sp, #120] @ 8-byte Spill
+; BE-I64-NEXT: vstr d9, [sp, #112] @ 8-byte Spill
+; BE-I64-NEXT: vstr d15, [sp, #104] @ 8-byte Spill
+; BE-I64-NEXT: vstr d12, [sp, #96] @ 8-byte Spill
+; BE-I64-NEXT: vstr d8, [sp, #80] @ 8-byte Spill
+; BE-I64-NEXT: vstr d14, [sp, #72] @ 8-byte Spill
+; BE-I64-NEXT: vstr d13, [sp, #88] @ 8-byte Spill
+; BE-I64-NEXT: vst1.64 {d20, d21}, [r10:128]!
+; BE-I64-NEXT: vrev64.32 d16, d11
+; BE-I64-NEXT: vst1.64 {d18, d19}, [r10:128]!
+; BE-I64-NEXT: vst1.64 {d16, d17}, [r10:128]!
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: ldr r2, [sp, #608]
+; BE-I64-NEXT: mov r8, r1
+; BE-I64-NEXT: ldr r3, [sp, #612]
+; BE-I64-NEXT: vmov.32 d8[0], r0
+; BE-I64-NEXT: mov r0, r6
+; BE-I64-NEXT: mov r1, r7
+; BE-I64-NEXT: ldr r5, [sp, #456]
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: add r3, sp, #620
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: vmov.32 d9[0], r0
+; BE-I64-NEXT: mov r0, r4
+; BE-I64-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: add r3, sp, #444
+; BE-I64-NEXT: mov r7, r1
+; BE-I64-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEXT: mov r0, r11
+; BE-I64-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: add r3, sp, #460
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEXT: mov r0, r5
+; BE-I64-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: add r3, sp, #572
+; BE-I64-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEXT: ldr r0, [sp, #568]
+; BE-I64-NEXT: mov r5, r1
+; BE-I64-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: vldr d16, [sp, #16] @ 8-byte Reload
+; BE-I64-NEXT: vldr d18, [sp, #56] @ 8-byte Reload
+; BE-I64-NEXT: vrev64.32 d17, d16
+; BE-I64-NEXT: ldr r2, [sp, #304]
+; BE-I64-NEXT: vrev64.32 d16, d18
+; BE-I64-NEXT: ldr r3, [sp, #308]
+; BE-I64-NEXT: vldr d18, [sp, #144] @ 8-byte Reload
+; BE-I64-NEXT: vldr d20, [sp, #64] @ 8-byte Reload
+; BE-I64-NEXT: vrev64.32 d19, d18
+; BE-I64-NEXT: vrev64.32 d18, d20
+; BE-I64-NEXT: vldr d20, [sp, #40] @ 8-byte Reload
+; BE-I64-NEXT: vldr d22, [sp, #32] @ 8-byte Reload
+; BE-I64-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEXT: ldr r0, [sp, #296]
+; BE-I64-NEXT: vmov.32 d10[1], r7
+; BE-I64-NEXT: ldr r7, [sp, #412]
+; BE-I64-NEXT: vmov.32 d9[1], r6
+; BE-I64-NEXT: ldr r6, [sp, #408]
+; BE-I64-NEXT: vmov.32 d8[1], r8
+; BE-I64-NEXT: add r8, r9, #128
+; BE-I64-NEXT: vrev64.32 d21, d20
+; BE-I64-NEXT: vmov.32 d13[1], r5
+; BE-I64-NEXT: ldr r5, [sp, #300]
+; BE-I64-NEXT: vrev64.32 d20, d22
+; BE-I64-NEXT: vmov.32 d14[1], r1
+; BE-I64-NEXT: mov r1, r5
+; BE-I64-NEXT: vstr d10, [sp, #136] @ 8-byte Spill
+; BE-I64-NEXT: vstr d9, [sp, #128] @ 8-byte Spill
+; BE-I64-NEXT: vstr d8, [sp, #24] @ 8-byte Spill
+; BE-I64-NEXT: vst1.64 {d20, d21}, [r10:128]
+; BE-I64-NEXT: vst1.64 {d18, d19}, [r8:128]!
+; BE-I64-NEXT: vmov.32 d11[1], r4
+; BE-I64-NEXT: ldr r4, [sp, #424]
+; BE-I64-NEXT: ldr r10, [sp, #376]
+; BE-I64-NEXT: vst1.64 {d16, d17}, [r8:128]!
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: ldr r2, [sp, #416]
+; BE-I64-NEXT: mov r11, r1
+; BE-I64-NEXT: ldr r3, [sp, #420]
+; BE-I64-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEXT: mov r0, r6
+; BE-I64-NEXT: mov r1, r7
+; BE-I64-NEXT: ldr r5, [sp, #392]
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: add r3, sp, #428
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: vmov.32 d8[0], r0
+; BE-I64-NEXT: mov r0, r4
+; BE-I64-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: add r3, sp, #380
+; BE-I64-NEXT: mov r7, r1
+; BE-I64-NEXT: vmov.32 d9[0], r0
+; BE-I64-NEXT: mov r0, r10
+; BE-I64-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: add r3, sp, #396
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEXT: mov r0, r5
+; BE-I64-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: add r3, sp, #284
+; BE-I64-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEXT: ldr r0, [sp, #280]
+; BE-I64-NEXT: mov r5, r1
+; BE-I64-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEXT: bl lrintl
+; BE-I64-NEXT: vldr d16, [sp, #120] @ 8-byte Reload
+; BE-I64-NEXT: vldr d18, [sp, #112] @ 8-byte Reload
+; BE-I64-NEXT: vrev64.32 d17, d16
+; BE-I64-NEXT: vldr d26, [sp, #136] @ 8-byte Reload
+; BE-I64-NEXT: vrev64.32 d16, d18
+; BE-I64-NEXT: vldr d18, [sp, #104] @ 8-byte Reload
+; BE-I64-NEXT: vrev64.32 d31, d26
+; BE-I64-NEXT: vldr d26, [sp, #128] @ 8-byte Reload
+; BE-I64-NEXT: vldr d20, [sp, #96] @ 8-byte Reload
+; BE-I64-NEXT: vrev64.32 d19, d18
+; BE-I64-NEXT: vrev64.32 d18, d20
+; BE-I64-NEXT: vldr d20, [sp, #80] @ 8-byte Reload
+; BE-I64-NEXT: vrev64.32 d30, d26
+; BE-I64-NEXT: vldr d26, [sp, #24] @ 8-byte Reload
+; BE-I64-NEXT: vmov.32 d10[1], r5
+; BE-I64-NEXT: vldr d22, [sp, #72] @ 8-byte Reload
+; BE-I64-NEXT: vrev64.32 d21, d20
+; BE-I64-NEXT: vrev64.32 d1, d26
+; BE-I64-NEXT: vmov.32 d9[1], r7
+; BE-I64-NEXT: vmov.32 d12[1], r4
+; BE-I64-NEXT: vrev64.32 d20, d22
+; BE-I64-NEXT: vldr d22, [sp, #88] @ 8-byte Reload
+; BE-I64-NEXT: vmov.32 d8[1], r6
+; BE-I64-NEXT: vrev64.32 d0, d14
+; BE-I64-NEXT: vmov.32 d28[0], r0
+; BE-I64-NEXT: add r0, r9, #64
+; BE-I64-NEXT: vrev64.32 d3, d10
+; BE-I64-NEXT: vldr d24, [sp, #48] @ 8-byte Reload
+; BE-I64-NEXT: vrev64.32 d23, d22
+; BE-I64-NEXT: vrev64.32 d5, d9
+; BE-I64-NEXT: vst1.64 {d0, d1}, [r8:128]!
+; BE-I64-NEXT: vrev64.32 d2, d12
+; BE-I64-NEXT: vmov.32 d15[1], r11
+; BE-I64-NEXT: vrev64.32 d22, d24
+; BE-I64-NEXT: vrev64.32 d25, d13
+; BE-I64-NEXT: vrev64.32 d4, d8
+; BE-I64-NEXT: vst1.64 {d30, d31}, [r8:128]
+; BE-I64-NEXT: vst1.64 {d2, d3}, [r0:128]!
+; BE-I64-NEXT: vmov.32 d28[1], r1
+; BE-I64-NEXT: vrev64.32 d24, d11
+; BE-I64-NEXT: vst1.64 {d4, d5}, [r0:128]!
+; BE-I64-NEXT: vrev64.32 d27, d15
+; BE-I64-NEXT: vst1.64 {d24, d25}, [r0:128]!
+; BE-I64-NEXT: vrev64.32 d26, d28
+; BE-I64-NEXT: vst1.64 {d22, d23}, [r0:128]
+; BE-I64-NEXT: vst1.64 {d20, d21}, [r9:128]!
+; BE-I64-NEXT: vst1.64 {d26, d27}, [r9:128]!
+; BE-I64-NEXT: vst1.64 {d18, d19}, [r9:128]!
+; BE-I64-NEXT: vst1.64 {d16, d17}, [r9:128]
+; BE-I64-NEXT: add sp, sp, #152
+; BE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT: add sp, sp, #4
+; BE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I32-NEON-LABEL: lrint_v32fp128:
+; BE-I32-NEON: @ %bb.0:
+; BE-I32-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I32-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I32-NEON-NEXT: .pad #4
+; BE-I32-NEON-NEXT: sub sp, sp, #4
+; BE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT: .pad #104
+; BE-I32-NEON-NEXT: sub sp, sp, #104
+; BE-I32-NEON-NEXT: mov r4, r3
+; BE-I32-NEON-NEXT: add r3, sp, #248
+; BE-I32-NEON-NEXT: mov r8, r2
+; BE-I32-NEON-NEXT: mov r11, r0
+; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: add r3, sp, #616
+; BE-I32-NEON-NEXT: mov r9, r0
+; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: add r3, sp, #680
+; BE-I32-NEON-NEXT: mov r5, r0
+; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: ldr r7, [sp, #232]
+; BE-I32-NEON-NEXT: add lr, sp, #72
+; BE-I32-NEON-NEXT: ldr r1, [sp, #236]
+; BE-I32-NEON-NEXT: vmov.32 d17[0], r0
+; BE-I32-NEON-NEXT: ldr r2, [sp, #240]
+; BE-I32-NEON-NEXT: ldr r3, [sp, #244]
+; BE-I32-NEON-NEXT: mov r0, r7
+; BE-I32-NEON-NEXT: ldr r10, [sp, #376]
+; BE-I32-NEON-NEXT: vmov.32 d11[0], r5
+; BE-I32-NEON-NEXT: ldr r6, [sp, #296]
+; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: add r3, sp, #300
+; BE-I32-NEON-NEXT: vmov.32 d15[0], r0
+; BE-I32-NEON-NEXT: mov r0, r6
+; BE-I32-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: add r3, sp, #380
+; BE-I32-NEON-NEXT: vmov.32 d13[0], r0
+; BE-I32-NEON-NEXT: mov r0, r10
+; BE-I32-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: add r3, sp, #360
+; BE-I32-NEON-NEXT: mov r5, r0
+; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: vmov.32 d17[0], r0
+; BE-I32-NEON-NEXT: ldr r6, [sp, #312]
+; BE-I32-NEON-NEXT: ldr r1, [sp, #316]
+; BE-I32-NEON-NEXT: ldr r2, [sp, #320]
+; BE-I32-NEON-NEXT: ldr r3, [sp, #324]
+; BE-I32-NEON-NEXT: vmov.32 d17[1], r5
+; BE-I32-NEON-NEXT: mov r0, r6
+; BE-I32-NEON-NEXT: ldr r7, [sp, #572]
+; BE-I32-NEON-NEXT: vorr q4, q8, q8
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: ldr r6, [sp, #632]
+; BE-I32-NEON-NEXT: add lr, sp, #88
+; BE-I32-NEON-NEXT: ldr r1, [sp, #636]
+; BE-I32-NEON-NEXT: vmov.32 d13[1], r0
+; BE-I32-NEON-NEXT: ldr r2, [sp, #640]
+; BE-I32-NEON-NEXT: ldr r3, [sp, #644]
+; BE-I32-NEON-NEXT: mov r0, r6
+; BE-I32-NEON-NEXT: ldr r5, [sp, #576]
+; BE-I32-NEON-NEXT: vmov.32 d15[1], r9
+; BE-I32-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: vmov.32 d11[1], r0
+; BE-I32-NEON-NEXT: ldr r3, [sp, #580]
+; BE-I32-NEON-NEXT: ldr r0, [sp, #568]
+; BE-I32-NEON-NEXT: mov r1, r7
+; BE-I32-NEON-NEXT: mov r2, r5
+; BE-I32-NEON-NEXT: vorr q6, q5, q5
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: add r3, sp, #552
+; BE-I32-NEON-NEXT: mov r9, r0
+; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: add r3, sp, #520
+; BE-I32-NEON-NEXT: mov r5, r0
+; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: ldr r6, [sp, #584]
+; BE-I32-NEON-NEXT: add lr, sp, #8
+; BE-I32-NEON-NEXT: ldr r1, [sp, #588]
+; BE-I32-NEON-NEXT: vmov.32 d16[0], r0
+; BE-I32-NEON-NEXT: ldr r2, [sp, #592]
+; BE-I32-NEON-NEXT: ldr r3, [sp, #596]
+; BE-I32-NEON-NEXT: mov r0, r6
+; BE-I32-NEON-NEXT: vmov.32 d17[0], r5
+; BE-I32-NEON-NEXT: ldr r7, [sp, #216]
+; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: add r3, sp, #220
+; BE-I32-NEON-NEXT: vmov.32 d12[0], r0
+; BE-I32-NEON-NEXT: mov r0, r7
+; BE-I32-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: ldr r2, [sp, #208]
+; BE-I32-NEON-NEXT: mov r7, r0
+; BE-I32-NEON-NEXT: ldr r3, [sp, #212]
+; BE-I32-NEON-NEXT: mov r0, r8
+; BE-I32-NEON-NEXT: mov r1, r4
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: add r3, sp, #456
+; BE-I32-NEON-NEXT: mov r5, r0
+; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: ldr r6, [sp, #328]
+; BE-I32-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I32-NEON-NEXT: ldr r1, [sp, #332]
+; BE-I32-NEON-NEXT: ldr r2, [sp, #336]
+; BE-I32-NEON-NEXT: vmov.32 d14[0], r5
+; BE-I32-NEON-NEXT: ldr r3, [sp, #340]
+; BE-I32-NEON-NEXT: mov r0, r6
+; BE-I32-NEON-NEXT: ldr r10, [sp, #504]
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: ldr r6, [sp, #344]
+; BE-I32-NEON-NEXT: vmov.32 d8[0], r0
+; BE-I32-NEON-NEXT: ldr r1, [sp, #348]
+; BE-I32-NEON-NEXT: ldr r2, [sp, #352]
+; BE-I32-NEON-NEXT: ldr r3, [sp, #356]
+; BE-I32-NEON-NEXT: mov r0, r6
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: vmov.32 d8[1], r0
+; BE-I32-NEON-NEXT: ldr r6, [sp, #600]
+; BE-I32-NEON-NEXT: add lr, sp, #56
+; BE-I32-NEON-NEXT: ldr r1, [sp, #604]
+; BE-I32-NEON-NEXT: vmov.32 d14[1], r7
+; BE-I32-NEON-NEXT: ldr r2, [sp, #608]
+; BE-I32-NEON-NEXT: ldr r3, [sp, #612]
+; BE-I32-NEON-NEXT: mov r0, r6
+; BE-I32-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; BE-I32-NEON-NEXT: add lr, sp, #40
+; BE-I32-NEON-NEXT: ldr r5, [sp, #508]
+; BE-I32-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: vmov.32 d12[1], r0
+; BE-I32-NEON-NEXT: add lr, sp, #24
+; BE-I32-NEON-NEXT: ldr r7, [sp, #536]
+; BE-I32-NEON-NEXT: ldr r1, [sp, #540]
+; BE-I32-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I32-NEON-NEXT: add lr, sp, #8
+; BE-I32-NEON-NEXT: mov r0, r7
+; BE-I32-NEON-NEXT: ldr r2, [sp, #544]
+; BE-I32-NEON-NEXT: ldr r3, [sp, #548]
+; BE-I32-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-I32-NEON-NEXT: ldr r6, [sp, #512]
+; BE-I32-NEON-NEXT: vmov.32 d13[1], r9
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: ldr r3, [sp, #516]
+; BE-I32-NEON-NEXT: vmov.32 d12[1], r0
+; BE-I32-NEON-NEXT: mov r0, r10
+; BE-I32-NEON-NEXT: mov r1, r5
+; BE-I32-NEON-NEXT: mov r2, r6
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: add r3, sp, #488
+; BE-I32-NEON-NEXT: mov r5, r0
+; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: add r3, sp, #424
+; BE-I32-NEON-NEXT: mov r7, r0
+; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: ldr r6, [sp, #264]
+; BE-I32-NEON-NEXT: vmov.32 d15[0], r0
+; BE-I32-NEON-NEXT: ldr r1, [sp, #268]
+; BE-I32-NEON-NEXT: ldr r2, [sp, #272]
+; BE-I32-NEON-NEXT: vmov.32 d11[0], r7
+; BE-I32-NEON-NEXT: ldr r3, [sp, #276]
+; BE-I32-NEON-NEXT: mov r0, r6
+; BE-I32-NEON-NEXT: ldr r8, [sp, #696]
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: add lr, sp, #88
+; BE-I32-NEON-NEXT: ldr r4, [sp, #472]
+; BE-I32-NEON-NEXT: ldr r1, [sp, #476]
+; BE-I32-NEON-NEXT: vmov.32 d11[1], r5
+; BE-I32-NEON-NEXT: ldr r2, [sp, #480]
+; BE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEON-NEXT: ldr r3, [sp, #484]
+; BE-I32-NEON-NEXT: vmov.32 d16[0], r0
+; BE-I32-NEON-NEXT: mov r0, r4
+; BE-I32-NEON-NEXT: ldr r6, [sp, #700]
+; BE-I32-NEON-NEXT: ldr r7, [sp, #704]
+; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: ldr r3, [sp, #708]
+; BE-I32-NEON-NEXT: vmov.32 d10[1], r0
+; BE-I32-NEON-NEXT: mov r0, r8
+; BE-I32-NEON-NEXT: mov r1, r6
+; BE-I32-NEON-NEXT: mov r2, r7
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: add r3, sp, #648
+; BE-I32-NEON-NEXT: mov r4, r0
+; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: add lr, sp, #72
+; BE-I32-NEON-NEXT: ldr r5, [sp, #664]
+; BE-I32-NEON-NEXT: ldr r1, [sp, #668]
+; BE-I32-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I32-NEON-NEXT: ldr r2, [sp, #672]
+; BE-I32-NEON-NEXT: ldr r3, [sp, #676]
+; BE-I32-NEON-NEXT: vmov.32 d8[0], r0
+; BE-I32-NEON-NEXT: mov r0, r5
+; BE-I32-NEON-NEXT: ldr r6, [sp, #444]
+; BE-I32-NEON-NEXT: vmov.32 d9[1], r4
+; BE-I32-NEON-NEXT: ldr r7, [sp, #448]
+; BE-I32-NEON-NEXT: ldr r8, [sp, #412]
+; BE-I32-NEON-NEXT: ldr r4, [sp, #416]
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: vmov.32 d8[1], r0
+; BE-I32-NEON-NEXT: ldr r3, [sp, #452]
+; BE-I32-NEON-NEXT: ldr r0, [sp, #440]
+; BE-I32-NEON-NEXT: mov r1, r6
+; BE-I32-NEON-NEXT: mov r2, r7
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: vmov.32 d15[1], r0
+; BE-I32-NEON-NEXT: ldr r3, [sp, #420]
+; BE-I32-NEON-NEXT: ldr r0, [sp, #408]
+; BE-I32-NEON-NEXT: mov r1, r8
+; BE-I32-NEON-NEXT: mov r2, r4
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: add r3, sp, #392
+; BE-I32-NEON-NEXT: mov r4, r0
+; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: add r3, sp, #284
+; BE-I32-NEON-NEXT: ldr r7, [sp, #280]
+; BE-I32-NEON-NEXT: vmov.32 d14[0], r0
+; BE-I32-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I32-NEON-NEXT: mov r0, r7
+; BE-I32-NEON-NEXT: vmov.32 d14[1], r4
+; BE-I32-NEON-NEXT: bl lrintl
+; BE-I32-NEON-NEXT: add lr, sp, #88
+; BE-I32-NEON-NEXT: vrev64.32 q9, q4
+; BE-I32-NEON-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload
+; BE-I32-NEON-NEXT: add lr, sp, #24
+; BE-I32-NEON-NEXT: vrev64.32 q8, q7
+; BE-I32-NEON-NEXT: vmov.32 d20[1], r0
+; BE-I32-NEON-NEXT: add r0, r11, #64
+; BE-I32-NEON-NEXT: vst1.32 {d10, d11}, [r0:128]!
+; BE-I32-NEON-NEXT: vst1.32 {d12, d13}, [r0:128]!
+; BE-I32-NEON-NEXT: vldmia lr, {d22, d23} @ 16-byte Reload
+; BE-I32-NEON-NEXT: add lr, sp, #40
+; BE-I32-NEON-NEXT: vst1.32 {d22, d23}, [r0:128]!
+; BE-I32-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]
+; BE-I32-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I32-NEON-NEXT: add lr, sp, #56
+; BE-I32-NEON-NEXT: vst1.32 {d18, d19}, [r11:128]!
+; BE-I32-NEON-NEXT: vst1.32 {d20, d21}, [r11:128]!
+; BE-I32-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I32-NEON-NEXT: vst1.32 {d18, d19}, [r11:128]!
+; BE-I32-NEON-NEXT: vst1.64 {d16, d17}, [r11:128]
+; BE-I32-NEON-NEXT: add sp, sp, #104
+; BE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT: add sp, sp, #4
+; BE-I32-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I64-NEON-LABEL: lrint_v32fp128:
+; BE-I64-NEON: @ %bb.0:
+; BE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEON-NEXT: .pad #4
+; BE-I64-NEON-NEXT: sub sp, sp, #4
+; BE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT: .pad #152
+; BE-I64-NEON-NEXT: sub sp, sp, #152
+; BE-I64-NEON-NEXT: str r3, [sp, #120] @ 4-byte Spill
+; BE-I64-NEON-NEXT: add r3, sp, #712
+; BE-I64-NEON-NEXT: str r2, [sp, #112] @ 4-byte Spill
+; BE-I64-NEON-NEXT: mov r9, r0
+; BE-I64-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: ldr r7, [sp, #648]
+; BE-I64-NEON-NEXT: add r3, sp, #652
+; BE-I64-NEON-NEXT: mov r5, r1
+; BE-I64-NEON-NEXT: vmov.32 d9[0], r0
+; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT: mov r0, r7
+; BE-I64-NEON-NEXT: ldr r6, [sp, #520]
+; BE-I64-NEON-NEXT: ldr r8, [sp, #632]
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: add r3, sp, #524
+; BE-I64-NEON-NEXT: mov r7, r1
+; BE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT: mov r0, r6
+; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: add r3, sp, #636
+; BE-I64-NEON-NEXT: mov r4, r1
+; BE-I64-NEON-NEXT: vmov.32 d8[0], r0
+; BE-I64-NEON-NEXT: mov r0, r8
+; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: mov r6, r1
+; BE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT: ldr r0, [sp, #488]
+; BE-I64-NEON-NEXT: vmov.32 d8[1], r4
+; BE-I64-NEON-NEXT: ldr r1, [sp, #492]
+; BE-I64-NEON-NEXT: ldr r2, [sp, #496]
+; BE-I64-NEON-NEXT: vmov.32 d10[1], r7
+; BE-I64-NEON-NEXT: ldr r3, [sp, #500]
+; BE-I64-NEON-NEXT: vmov.32 d9[1], r5
+; BE-I64-NEON-NEXT: vstr d8, [sp, #144] @ 8-byte Spill
+; BE-I64-NEON-NEXT: vstr d10, [sp, #136] @ 8-byte Spill
+; BE-I64-NEON-NEXT: vstr d9, [sp, #128] @ 8-byte Spill
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: add r3, sp, #680
+; BE-I64-NEON-NEXT: str r0, [sp, #104] @ 4-byte Spill
+; BE-I64-NEON-NEXT: str r1, [sp, #88] @ 4-byte Spill
+; BE-I64-NEON-NEXT: ldm r3, {r0, r1, r2, r3}
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEON-NEXT: ldr r0, [sp, #728]
+; BE-I64-NEON-NEXT: ldr r2, [sp, #736]
+; BE-I64-NEON-NEXT: vmov.32 d11[1], r6
+; BE-I64-NEON-NEXT: ldr r6, [sp, #732]
+; BE-I64-NEON-NEXT: ldr r3, [sp, #740]
+; BE-I64-NEON-NEXT: vmov.32 d16[1], r1
+; BE-I64-NEON-NEXT: ldr r5, [sp, #504]
+; BE-I64-NEON-NEXT: mov r1, r6
+; BE-I64-NEON-NEXT: ldr r7, [sp, #744]
+; BE-I64-NEON-NEXT: ldr r4, [sp, #748]
+; BE-I64-NEON-NEXT: vstr d11, [sp, #24] @ 8-byte Spill
+; BE-I64-NEON-NEXT: vstr d16, [sp, #8] @ 8-byte Spill
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: ldr r2, [sp, #752]
+; BE-I64-NEON-NEXT: mov r11, r1
+; BE-I64-NEON-NEXT: ldr r3, [sp, #756]
+; BE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEON-NEXT: mov r0, r7
+; BE-I64-NEON-NEXT: mov r1, r4
+; BE-I64-NEON-NEXT: ldr r10, [sp, #552]
+; BE-I64-NEON-NEXT: ldr r6, [sp, #664]
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: add r3, sp, #508
+; BE-I64-NEON-NEXT: mov r8, r1
+; BE-I64-NEON-NEXT: vmov.32 d8[0], r0
+; BE-I64-NEON-NEXT: mov r0, r5
+; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: add r3, sp, #540
+; BE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT: ldr r0, [sp, #536]
+; BE-I64-NEON-NEXT: mov r7, r1
+; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: add r3, sp, #556
+; BE-I64-NEON-NEXT: mov r5, r1
+; BE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEON-NEXT: mov r0, r10
+; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: add r3, sp, #668
+; BE-I64-NEON-NEXT: mov r4, r1
+; BE-I64-NEON-NEXT: vmov.32 d9[0], r0
+; BE-I64-NEON-NEXT: mov r0, r6
+; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: add r3, sp, #700
+; BE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT: ldr r0, [sp, #696]
+; BE-I64-NEON-NEXT: mov r6, r1
+; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT: ldr r0, [sp, #104] @ 4-byte Reload
+; BE-I64-NEON-NEXT: ldr r2, [sp, #256]
+; BE-I64-NEON-NEXT: vmov.32 d13[1], r11
+; BE-I64-NEON-NEXT: ldr r3, [sp, #260]
+; BE-I64-NEON-NEXT: vmov.32 d14[1], r6
+; BE-I64-NEON-NEXT: ldr r6, [sp, #264]
+; BE-I64-NEON-NEXT: vmov.32 d9[1], r4
+; BE-I64-NEON-NEXT: ldr r4, [sp, #344]
+; BE-I64-NEON-NEXT: vmov.32 d12[1], r5
+; BE-I64-NEON-NEXT: ldr r5, [sp, #312]
+; BE-I64-NEON-NEXT: vmov.32 d8[1], r8
+; BE-I64-NEON-NEXT: ldr r8, [sp, #328]
+; BE-I64-NEON-NEXT: vmov.32 d10[1], r7
+; BE-I64-NEON-NEXT: vstr d13, [sp, #32] @ 8-byte Spill
+; BE-I64-NEON-NEXT: vmov.32 d11[1], r1
+; BE-I64-NEON-NEXT: ldr r1, [sp, #120] @ 4-byte Reload
+; BE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEON-NEXT: ldr r0, [sp, #112] @ 4-byte Reload
+; BE-I64-NEON-NEXT: vstr d14, [sp] @ 8-byte Spill
+; BE-I64-NEON-NEXT: vstr d9, [sp, #16] @ 8-byte Spill
+; BE-I64-NEON-NEXT: vstr d12, [sp, #56] @ 8-byte Spill
+; BE-I64-NEON-NEXT: vstr d10, [sp, #64] @ 8-byte Spill
+; BE-I64-NEON-NEXT: vstr d8, [sp, #40] @ 8-byte Spill
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: add r3, sp, #268
+; BE-I64-NEON-NEXT: mov r11, r1
+; BE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT: mov r0, r6
+; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: add r3, sp, #316
+; BE-I64-NEON-NEXT: mov r10, r1
+; BE-I64-NEON-NEXT: vmov.32 d8[0], r0
+; BE-I64-NEON-NEXT: mov r0, r5
+; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: add r3, sp, #332
+; BE-I64-NEON-NEXT: mov r7, r1
+; BE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEON-NEXT: mov r0, r8
+; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: add r3, sp, #348
+; BE-I64-NEON-NEXT: mov r5, r1
+; BE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT: mov r0, r4
+; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: add r3, sp, #364
+; BE-I64-NEON-NEXT: vmov.32 d9[0], r0
+; BE-I64-NEON-NEXT: ldr r0, [sp, #360]
+; BE-I64-NEON-NEXT: mov r4, r1
+; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: add r3, sp, #476
+; BE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT: ldr r0, [sp, #472]
+; BE-I64-NEON-NEXT: mov r6, r1
+; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEON-NEXT: ldr r0, [sp, #88] @ 4-byte Reload
+; BE-I64-NEON-NEXT: ldr r2, [sp, #592]
+; BE-I64-NEON-NEXT: vldr d20, [sp, #136] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vmov.32 d16[1], r1
+; BE-I64-NEON-NEXT: ldr r1, [sp, #588]
+; BE-I64-NEON-NEXT: ldr r3, [sp, #596]
+; BE-I64-NEON-NEXT: vldr d22, [sp, #24] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vldr d18, [sp, #8] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vrev64.32 d21, d20
+; BE-I64-NEON-NEXT: vmov.32 d10[1], r6
+; BE-I64-NEON-NEXT: ldr r6, [sp, #600]
+; BE-I64-NEON-NEXT: vmov.32 d9[1], r4
+; BE-I64-NEON-NEXT: ldr r4, [sp, #616]
+; BE-I64-NEON-NEXT: vmov.32 d12[1], r7
+; BE-I64-NEON-NEXT: ldr r7, [sp, #604]
+; BE-I64-NEON-NEXT: vmov.32 d8[1], r10
+; BE-I64-NEON-NEXT: add r10, r9, #192
+; BE-I64-NEON-NEXT: vmov.32 d14[1], r11
+; BE-I64-NEON-NEXT: ldr r11, [sp, #440]
+; BE-I64-NEON-NEXT: vmov.32 d13[1], r0
+; BE-I64-NEON-NEXT: ldr r0, [sp, #584]
+; BE-I64-NEON-NEXT: vmov.32 d15[1], r5
+; BE-I64-NEON-NEXT: vstr d16, [sp, #48] @ 8-byte Spill
+; BE-I64-NEON-NEXT: vldr d16, [sp, #128] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vrev64.32 d20, d22
+; BE-I64-NEON-NEXT: vldr d22, [sp] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vrev64.32 d19, d18
+; BE-I64-NEON-NEXT: vrev64.32 d17, d16
+; BE-I64-NEON-NEXT: vrev64.32 d18, d22
+; BE-I64-NEON-NEXT: vstr d10, [sp, #120] @ 8-byte Spill
+; BE-I64-NEON-NEXT: vstr d9, [sp, #112] @ 8-byte Spill
+; BE-I64-NEON-NEXT: vstr d15, [sp, #104] @ 8-byte Spill
+; BE-I64-NEON-NEXT: vstr d12, [sp, #96] @ 8-byte Spill
+; BE-I64-NEON-NEXT: vstr d8, [sp, #80] @ 8-byte Spill
+; BE-I64-NEON-NEXT: vstr d14, [sp, #72] @ 8-byte Spill
+; BE-I64-NEON-NEXT: vstr d13, [sp, #88] @ 8-byte Spill
+; BE-I64-NEON-NEXT: vst1.64 {d20, d21}, [r10:128]!
+; BE-I64-NEON-NEXT: vrev64.32 d16, d11
+; BE-I64-NEON-NEXT: vst1.64 {d18, d19}, [r10:128]!
+; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r10:128]!
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: ldr r2, [sp, #608]
+; BE-I64-NEON-NEXT: mov r8, r1
+; BE-I64-NEON-NEXT: ldr r3, [sp, #612]
+; BE-I64-NEON-NEXT: vmov.32 d8[0], r0
+; BE-I64-NEON-NEXT: mov r0, r6
+; BE-I64-NEON-NEXT: mov r1, r7
+; BE-I64-NEON-NEXT: ldr r5, [sp, #456]
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: add r3, sp, #620
+; BE-I64-NEON-NEXT: mov r6, r1
+; BE-I64-NEON-NEXT: vmov.32 d9[0], r0
+; BE-I64-NEON-NEXT: mov r0, r4
+; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: add r3, sp, #444
+; BE-I64-NEON-NEXT: mov r7, r1
+; BE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT: mov r0, r11
+; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: add r3, sp, #460
+; BE-I64-NEON-NEXT: mov r4, r1
+; BE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT: mov r0, r5
+; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: add r3, sp, #572
+; BE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEON-NEXT: ldr r0, [sp, #568]
+; BE-I64-NEON-NEXT: mov r5, r1
+; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: vldr d16, [sp, #16] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vldr d18, [sp, #56] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vrev64.32 d17, d16
+; BE-I64-NEON-NEXT: ldr r2, [sp, #304]
+; BE-I64-NEON-NEXT: vrev64.32 d16, d18
+; BE-I64-NEON-NEXT: ldr r3, [sp, #308]
+; BE-I64-NEON-NEXT: vldr d18, [sp, #144] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vldr d20, [sp, #64] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vrev64.32 d19, d18
+; BE-I64-NEON-NEXT: vrev64.32 d18, d20
+; BE-I64-NEON-NEXT: vldr d20, [sp, #40] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vldr d22, [sp, #32] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT: ldr r0, [sp, #296]
+; BE-I64-NEON-NEXT: vmov.32 d10[1], r7
+; BE-I64-NEON-NEXT: ldr r7, [sp, #412]
+; BE-I64-NEON-NEXT: vmov.32 d9[1], r6
+; BE-I64-NEON-NEXT: ldr r6, [sp, #408]
+; BE-I64-NEON-NEXT: vmov.32 d8[1], r8
+; BE-I64-NEON-NEXT: add r8, r9, #128
+; BE-I64-NEON-NEXT: vrev64.32 d21, d20
+; BE-I64-NEON-NEXT: vmov.32 d13[1], r5
+; BE-I64-NEON-NEXT: ldr r5, [sp, #300]
+; BE-I64-NEON-NEXT: vrev64.32 d20, d22
+; BE-I64-NEON-NEXT: vmov.32 d14[1], r1
+; BE-I64-NEON-NEXT: mov r1, r5
+; BE-I64-NEON-NEXT: vstr d10, [sp, #136] @ 8-byte Spill
+; BE-I64-NEON-NEXT: vstr d9, [sp, #128] @ 8-byte Spill
+; BE-I64-NEON-NEXT: vstr d8, [sp, #24] @ 8-byte Spill
+; BE-I64-NEON-NEXT: vst1.64 {d20, d21}, [r10:128]
+; BE-I64-NEON-NEXT: vst1.64 {d18, d19}, [r8:128]!
+; BE-I64-NEON-NEXT: vmov.32 d11[1], r4
+; BE-I64-NEON-NEXT: ldr r4, [sp, #424]
+; BE-I64-NEON-NEXT: ldr r10, [sp, #376]
+; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r8:128]!
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: ldr r2, [sp, #416]
+; BE-I64-NEON-NEXT: mov r11, r1
+; BE-I64-NEON-NEXT: ldr r3, [sp, #420]
+; BE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT: mov r0, r6
+; BE-I64-NEON-NEXT: mov r1, r7
+; BE-I64-NEON-NEXT: ldr r5, [sp, #392]
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: add r3, sp, #428
+; BE-I64-NEON-NEXT: mov r6, r1
+; BE-I64-NEON-NEXT: vmov.32 d8[0], r0
+; BE-I64-NEON-NEXT: mov r0, r4
+; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: add r3, sp, #380
+; BE-I64-NEON-NEXT: mov r7, r1
+; BE-I64-NEON-NEXT: vmov.32 d9[0], r0
+; BE-I64-NEON-NEXT: mov r0, r10
+; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: add r3, sp, #396
+; BE-I64-NEON-NEXT: mov r4, r1
+; BE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEON-NEXT: mov r0, r5
+; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: add r3, sp, #284
+; BE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT: ldr r0, [sp, #280]
+; BE-I64-NEON-NEXT: mov r5, r1
+; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT: bl lrintl
+; BE-I64-NEON-NEXT: vldr d16, [sp, #120] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vldr d18, [sp, #112] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vrev64.32 d17, d16
+; BE-I64-NEON-NEXT: vldr d26, [sp, #136] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vrev64.32 d16, d18
+; BE-I64-NEON-NEXT: vldr d18, [sp, #104] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vrev64.32 d31, d26
+; BE-I64-NEON-NEXT: vldr d26, [sp, #128] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vldr d20, [sp, #96] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vrev64.32 d19, d18
+; BE-I64-NEON-NEXT: vrev64.32 d18, d20
+; BE-I64-NEON-NEXT: vldr d20, [sp, #80] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vrev64.32 d30, d26
+; BE-I64-NEON-NEXT: vldr d26, [sp, #24] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vmov.32 d10[1], r5
+; BE-I64-NEON-NEXT: vldr d22, [sp, #72] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vrev64.32 d21, d20
+; BE-I64-NEON-NEXT: vrev64.32 d1, d26
+; BE-I64-NEON-NEXT: vmov.32 d9[1], r7
+; BE-I64-NEON-NEXT: vmov.32 d12[1], r4
+; BE-I64-NEON-NEXT: vrev64.32 d20, d22
+; BE-I64-NEON-NEXT: vldr d22, [sp, #88] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vmov.32 d8[1], r6
+; BE-I64-NEON-NEXT: vrev64.32 d0, d14
+; BE-I64-NEON-NEXT: vmov.32 d28[0], r0
+; BE-I64-NEON-NEXT: add r0, r9, #64
+; BE-I64-NEON-NEXT: vrev64.32 d3, d10
+; BE-I64-NEON-NEXT: vldr d24, [sp, #48] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vrev64.32 d23, d22
+; BE-I64-NEON-NEXT: vrev64.32 d5, d9
+; BE-I64-NEON-NEXT: vst1.64 {d0, d1}, [r8:128]!
+; BE-I64-NEON-NEXT: vrev64.32 d2, d12
+; BE-I64-NEON-NEXT: vmov.32 d15[1], r11
+; BE-I64-NEON-NEXT: vrev64.32 d22, d24
+; BE-I64-NEON-NEXT: vrev64.32 d25, d13
+; BE-I64-NEON-NEXT: vrev64.32 d4, d8
+; BE-I64-NEON-NEXT: vst1.64 {d30, d31}, [r8:128]
+; BE-I64-NEON-NEXT: vst1.64 {d2, d3}, [r0:128]!
+; BE-I64-NEON-NEXT: vmov.32 d28[1], r1
+; BE-I64-NEON-NEXT: vrev64.32 d24, d11
+; BE-I64-NEON-NEXT: vst1.64 {d4, d5}, [r0:128]!
+; BE-I64-NEON-NEXT: vrev64.32 d27, d15
+; BE-I64-NEON-NEXT: vst1.64 {d24, d25}, [r0:128]!
+; BE-I64-NEON-NEXT: vrev64.32 d26, d28
+; BE-I64-NEON-NEXT: vst1.64 {d22, d23}, [r0:128]
+; BE-I64-NEON-NEXT: vst1.64 {d20, d21}, [r9:128]!
+; BE-I64-NEON-NEXT: vst1.64 {d26, d27}, [r9:128]!
+; BE-I64-NEON-NEXT: vst1.64 {d18, d19}, [r9:128]!
+; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r9:128]
+; BE-I64-NEON-NEXT: add sp, sp, #152
+; BE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT: add sp, sp, #4
+; BE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+ %a = call <32 x iXLen> @llvm.lrint.v32iXLen.v16fp128(<32 x fp128> %x)
+ ret <32 x iXLen> %a
+}
+declare <32 x iXLen> @llvm.lrint.v32iXLen.v32fp128(<32 x fp128>)
diff --git a/llvm/test/CodeGen/AVR/llrint.ll b/llvm/test/CodeGen/AVR/llrint.ll
index 32b4c7ab12a4b..c55664f2d7353 100644
--- a/llvm/test/CodeGen/AVR/llrint.ll
+++ b/llvm/test/CodeGen/AVR/llrint.ll
@@ -1,6 +1,13 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=avr -mcpu=atmega328p | FileCheck %s
+; FIXME: crash "Input type needs to be promoted!"
+; define i64 @testmsxh_builtin(half %x) {
+; entry:
+; %0 = tail call i64 @llvm.llrint.f16(half %x)
+; ret i64 %0
+; }
+
define i64 @testmsxs_builtin(float %x) {
; CHECK-LABEL: testmsxs_builtin:
; CHECK: ; %bb.0: ; %entry
@@ -21,5 +28,16 @@ entry:
ret i64 %0
}
+; FIXME(#44744): incorrect libcall
+define i64 @testmsxq_builtin(fp128 %x) {
+; CHECK-LABEL: testmsxq_builtin:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: call llrintl
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call i64 @llvm.llrint.fp128(fp128 %x)
+ ret i64 %0
+}
+
declare i64 @llvm.llrint.f32(float) nounwind readnone
declare i64 @llvm.llrint.f64(double) nounwind readnone
diff --git a/llvm/test/CodeGen/AVR/lrint.ll b/llvm/test/CodeGen/AVR/lrint.ll
index d7568305f7b51..4ef656060bd10 100644
--- a/llvm/test/CodeGen/AVR/lrint.ll
+++ b/llvm/test/CodeGen/AVR/lrint.ll
@@ -1,6 +1,13 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=avr -mcpu=atmega328p | FileCheck %s
+; FIXME: crash "Input type needs to be promoted!"
+; define i32 @testmswh_builtin(half %x) {
+; entry:
+; %0 = tail call i32 @llvm.lrint.i32.f16(half %x)
+; ret i32 %0
+; }
+
define i32 @testmsws_builtin(float %x) {
; CHECK-LABEL: testmsws_builtin:
; CHECK: ; %bb.0: ; %entry
@@ -21,5 +28,16 @@ entry:
ret i32 %0
}
+; FIXME(#44744): incorrect libcall
+define i32 @testmswq_builtin(fp128 %x) {
+; CHECK-LABEL: testmswq_builtin:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: call lrint
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call i32 @llvm.lrint.i32.fp128(fp128 %x)
+ ret i32 %0
+}
+
declare i32 @llvm.lrint.i32.f32(float) nounwind readnone
declare i32 @llvm.lrint.i32.f64(double) nounwind readnone
diff --git a/llvm/test/CodeGen/LoongArch/lrint-conv.ll b/llvm/test/CodeGen/LoongArch/lrint-conv.ll
new file mode 100644
index 0000000000000..85de820025614
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lrint-conv.ll
@@ -0,0 +1,96 @@
+; Tests for lrint and llrint, with both i32 and i64 checked.
+
+; RUN: sed 's/ITy/i32/g' %s | llc -mtriple=loongarch32 | FileCheck %s --check-prefixes=LA32
+; RUN: sed 's/ITy/i64/g' %s | llc -mtriple=loongarch32 | FileCheck %s --check-prefixes=LA32
+; RUN: sed 's/ITy/i32/g' %s | llc -mtriple=loongarch64 | FileCheck %s --check-prefixes=LA64-I32
+; RUN: sed 's/ITy/i64/g' %s | llc -mtriple=loongarch64 | FileCheck %s --check-prefixes=LA64-I64
+
+; FIXME: crash
+; define ITy @test_lrint_ixx_f16(half %x) nounwind {
+; %res = tail call ITy @llvm.lrint.ITy.f16(half %x)
+; ret ITy %res
+; }
+
+; define ITy @test_llrint_ixx_f16(half %x) nounwind {
+; %res = tail call ITy @llvm.llrint.ITy.f16(half %x)
+; ret ITy %res
+; }
+
+define ITy @test_lrint_ixx_f32(float %x) nounwind {
+; LA32-LABEL: test_lrint_ixx_f32:
+; LA32: bl lrintf
+;
+; LA64-I32-LABEL: test_lrint_ixx_f32:
+; LA64-I32: pcaddu18i $ra, %call36(lrintf)
+;
+; LA64-I64-LABEL: test_lrint_ixx_f32:
+; LA64-I64: pcaddu18i $t8, %call36(lrintf)
+ %res = tail call ITy @llvm.lrint.ITy.f32(float %x)
+ ret ITy %res
+}
+
+define ITy @test_llrint_ixx_f32(float %x) nounwind {
+; LA32-LABEL: test_llrint_ixx_f32:
+; LA32: bl llrintf
+;
+; LA64-I32-LABEL: test_llrint_ixx_f32:
+; LA64-I32: pcaddu18i $ra, %call36(llrintf)
+;
+; LA64-I64-LABEL: test_llrint_ixx_f32:
+; LA64-I64: pcaddu18i $t8, %call36(llrintf)
+ %res = tail call ITy @llvm.llrint.ITy.f32(float %x)
+ ret ITy %res
+}
+
+define ITy @test_lrint_ixx_f64(double %x) nounwind {
+; LA32-LABEL: test_lrint_ixx_f64:
+; LA32: bl lrint
+;
+; LA64-I32-LABEL: test_lrint_ixx_f64:
+; LA64-I32: pcaddu18i $ra, %call36(lrint)
+;
+; LA64-I64-LABEL: test_lrint_ixx_f64:
+; LA64-I64: pcaddu18i $t8, %call36(lrint)
+ %res = tail call ITy @llvm.lrint.ITy.f64(double %x)
+ ret ITy %res
+}
+
+define ITy @test_llrint_ixx_f64(double %x) nounwind {
+; LA32-LABEL: test_llrint_ixx_f64:
+; LA32: bl llrint
+;
+; LA64-I32-LABEL: test_llrint_ixx_f64:
+; LA64-I32: pcaddu18i $ra, %call36(llrint)
+;
+; LA64-I64-LABEL: test_llrint_ixx_f64:
+; LA64-I64: pcaddu18i $t8, %call36(llrint)
+ %res = tail call ITy @llvm.llrint.ITy.f64(double %x)
+ ret ITy %res
+}
+
+; FIXME(#44744): incorrect libcall on loongarch32
+define ITy @test_lrint_ixx_f128(fp128 %x) nounwind {
+; LA32-LABEL: test_lrint_ixx_f128:
+; LA32: bl lrintl
+;
+; LA64-I32-LABEL: test_lrint_ixx_f128:
+; LA64-I32: pcaddu18i $ra, %call36(lrintl)
+;
+; LA64-I64-LABEL: test_lrint_ixx_f128:
+; LA64-I64: pcaddu18i $ra, %call36(lrintl)
+ %res = tail call ITy @llvm.lrint.ITy.f128(fp128 %x)
+ ret ITy %res
+}
+
+define ITy @test_llrint_ixx_f128(fp128 %x) nounwind {
+; LA32-LABEL: test_llrint_ixx_f128:
+; LA32: bl llrintl
+;
+; LA64-I32-LABEL: test_llrint_ixx_f128:
+; LA64-I32: pcaddu18i $ra, %call36(llrintl)
+;
+; LA64-I64-LABEL: test_llrint_ixx_f128:
+; LA64-I64: pcaddu18i $ra, %call36(llrintl)
+ %res = tail call ITy @llvm.llrint.ITy.f128(fp128 %x)
+ ret ITy %res
+}
diff --git a/llvm/test/CodeGen/MSP430/lrint-conv.ll b/llvm/test/CodeGen/MSP430/lrint-conv.ll
new file mode 100644
index 0000000000000..04ab2af6102a0
--- /dev/null
+++ b/llvm/test/CodeGen/MSP430/lrint-conv.ll
@@ -0,0 +1,60 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+
+; Tests for lrint and llrint, with both i32 and i64 checked.
+
+; RUN: sed 's/ITy/i32/g' %s | llc -mtriple=msp430-unknown-unknown | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/ITy/i64/g' %s | llc -mtriple=msp430-unknown-unknown | FileCheck %s --check-prefixes=CHECK
+
+; FIXME: crash "Input type needs to be promoted!"
+; define ITy @test_lrint_ixx_f16(half %x) nounwind {
+; %res = tail call ITy @llvm.lrint.ITy.f16(half %x)
+; ret ITy %res
+; }
+
+; define ITy @test_llrint_ixx_f16(half %x) nounwind {
+; %res = tail call ITy @llvm.llrint.ITy.f16(half %x)
+; ret ITy %res
+; }
+
+define ITy @test_lrint_ixx_f32(float %x) nounwind {
+; CHECK-LABEL: test_lrint_ixx_f32:
+; CHECK: call #lrintf
+ %res = tail call ITy @llvm.lrint.ITy.f32(float %x)
+ ret ITy %res
+}
+
+define ITy @test_llrint_ixx_f32(float %x) nounwind {
+; CHECK-LABEL: test_llrint_ixx_f32:
+; CHECK: call #llrintf
+ %res = tail call ITy @llvm.llrint.ITy.f32(float %x)
+ ret ITy %res
+}
+
+define ITy @test_lrint_ixx_f64(double %x) nounwind {
+; CHECK-LABEL: test_lrint_ixx_f64:
+; CHECK: call #lrint
+ %res = tail call ITy @llvm.lrint.ITy.f64(double %x)
+ ret ITy %res
+}
+
+define ITy @test_llrint_ixx_f64(double %x) nounwind {
+; CHECK-LABEL: test_llrint_ixx_f64:
+; CHECK: call #llrint
+ %res = tail call ITy @llvm.llrint.ITy.f64(double %x)
+ ret ITy %res
+}
+
+; FIXME(#44744): incorrect libcall
+define ITy @test_lrint_ixx_f128(fp128 %x) nounwind {
+; CHECK-LABEL: test_lrint_ixx_f128:
+; CHECK: call #lrintl
+ %res = tail call ITy @llvm.lrint.ITy.f128(fp128 %x)
+ ret ITy %res
+}
+
+define ITy @test_llrint_ixx_f128(fp128 %x) nounwind {
+; CHECK-LABEL: test_llrint_ixx_f128:
+; CHECK: call #llrintl
+ %res = tail call ITy @llvm.llrint.ITy.f128(fp128 %x)
+ ret ITy %res
+}
diff --git a/llvm/test/CodeGen/Mips/llrint-conv.ll b/llvm/test/CodeGen/Mips/llrint-conv.ll
index dcb4e5657e80b..ee3c0d99253a6 100644
--- a/llvm/test/CodeGen/Mips/llrint-conv.ll
+++ b/llvm/test/CodeGen/Mips/llrint-conv.ll
@@ -1,4 +1,19 @@
; RUN: llc < %s -mtriple=mips64el -mattr=+soft-float | FileCheck %s
+; RUN: llc < %s -mtriple=mips -mattr=+soft-float | FileCheck %s
+
+; FIXME: crash
+; define signext i32 @testmswh(half %x) {
+; entry:
+; %0 = tail call i64 @llvm.llrint.f16(half %x)
+; %conv = trunc i64 %0 to i32
+; ret i32 %conv
+; }
+
+; define i64 @testmsxh(half %x) {
+; entry:
+; %0 = tail call i64 @llvm.llrint.f16(half %x)
+; ret i64 %0
+; }
define signext i32 @testmsws(float %x) {
; CHECK-LABEL: testmsws:
diff --git a/llvm/test/CodeGen/Mips/lrint-conv.ll b/llvm/test/CodeGen/Mips/lrint-conv.ll
index bd3f7b3babe10..6d2e392675f1c 100644
--- a/llvm/test/CodeGen/Mips/lrint-conv.ll
+++ b/llvm/test/CodeGen/Mips/lrint-conv.ll
@@ -1,4 +1,19 @@
; RUN: llc < %s -mtriple=mips64el -mattr=+soft-float | FileCheck %s
+; RUN: llc < %s -mtriple=mips -mattr=+soft-float | FileCheck %s
+
+; FIXME: crash
+; define signext i32 @testmswh(half %x) {
+; entry:
+; %0 = tail call i64 @llvm.lrint.i64.f16(half %x)
+; %conv = trunc i64 %0 to i32
+; ret i32 %conv
+; }
+
+; define i64 @testmsxh(half %x) {
+; entry:
+; %0 = tail call i64 @llvm.lrint.i64.f16(half %x)
+; ret i64 %0
+; }
define signext i32 @testmsws(float %x) {
; CHECK-LABEL: testmsws:
diff --git a/llvm/test/CodeGen/PowerPC/llrint-conv.ll b/llvm/test/CodeGen/PowerPC/llrint-conv.ll
index daadf85b4085a..dcd3bd25a83c5 100644
--- a/llvm/test/CodeGen/PowerPC/llrint-conv.ll
+++ b/llvm/test/CodeGen/PowerPC/llrint-conv.ll
@@ -1,4 +1,19 @@
; RUN: llc < %s -mtriple=powerpc64le | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc | FileCheck %s
+
+; FIXME: crash "Input type needs to be promoted!"
+; define signext i32 @testmswh(half %x) {
+; entry:
+; %0 = tail call i64 @llvm.llrint.f16(half %x)
+; %conv = trunc i64 %0 to i32
+; ret i32 %conv
+; }
+
+; define i64 @testmsxh(half %x) {
+; entry:
+; %0 = tail call i64 @llvm.llrint.f16(half %x)
+; ret i64 %0
+; }
; CHECK-LABEL: testmsws:
; CHECK: bl llrintf
@@ -51,6 +66,23 @@ entry:
ret i64 %0
}
+; CHECK-LABEL: testmswq:
+; CHECK: bl llrintf128
+define signext i32 @testmswq(fp128 %x) {
+entry:
+ %0 = tail call i64 @llvm.llrint.f128(fp128 %x)
+ %conv = trunc i64 %0 to i32
+ ret i32 %conv
+}
+
+; CHECK-LABEL: testmslq:
+; CHECK: bl llrintf128
+define i64 @testmslq(fp128 %x) {
+entry:
+ %0 = tail call i64 @llvm.llrint.f128(fp128 %x)
+ ret i64 %0
+}
+
declare i64 @llvm.llrint.f32(float) nounwind readnone
declare i64 @llvm.llrint.f64(double) nounwind readnone
declare i64 @llvm.llrint.ppcf128(ppc_fp128) nounwind readnone
diff --git a/llvm/test/CodeGen/PowerPC/lrint-conv.ll b/llvm/test/CodeGen/PowerPC/lrint-conv.ll
index adfc994497323..bc77a200757f4 100644
--- a/llvm/test/CodeGen/PowerPC/lrint-conv.ll
+++ b/llvm/test/CodeGen/PowerPC/lrint-conv.ll
@@ -1,4 +1,19 @@
; RUN: llc < %s -mtriple=powerpc64le | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc | FileCheck %s
+
+; FIXME: crash "Input type needs to be promoted!"
+; define signext i32 @testmswh(half %x) {
+; entry:
+; %0 = tail call i64 @llvm.lrint.i64.f16(half %x)
+; %conv = trunc i64 %0 to i32
+; ret i32 %conv
+; }
+
+; define i64 @testmsxh(half %x) {
+; entry:
+; %0 = tail call i64 @llvm.lrint.i64.f16(half %x)
+; ret i64 %0
+; }
; CHECK-LABEL: testmsws:
; CHECK: bl lrintf
@@ -51,6 +66,23 @@ entry:
ret i64 %0
}
+; CHECK-LABEL: testmswq:
+; CHECK: bl lrintf128
+define signext i32 @testmswq(fp128 %x) {
+entry:
+ %0 = tail call i64 @llvm.lrint.i64.f128(fp128 %x)
+ %conv = trunc i64 %0 to i32
+ ret i32 %conv
+}
+
+; CHECK-LABEL: testmslq:
+; CHECK: bl lrintf128
+define i64 @testmslq(fp128 %x) {
+entry:
+ %0 = tail call i64 @llvm.lrint.i64.f128(fp128 %x)
+ ret i64 %0
+}
+
declare i64 @llvm.lrint.i64.f32(float) nounwind readnone
declare i64 @llvm.lrint.i64.f64(double) nounwind readnone
declare i64 @llvm.lrint.i64.ppcf128(ppc_fp128) nounwind readnone
diff --git a/llvm/test/CodeGen/PowerPC/vector-llrint.ll b/llvm/test/CodeGen/PowerPC/vector-llrint.ll
index 9229fefced67e..8a9e48e002381 100644
--- a/llvm/test/CodeGen/PowerPC/vector-llrint.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-llrint.ll
@@ -1,4 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; FIXME: crash "Input type needs to be promoted!"
+; SKIP: llc -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
+; SKIP: -mtriple=powerpc-unknown-unknown -verify-machineinstrs < %s | \
+; SKIP: FileCheck %s --check-prefix=PPC32
; RUN: llc -mcpu=pwr7 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
; RUN: -mtriple=powerpc64-unknown-unknown -verify-machineinstrs < %s | \
; RUN: FileCheck %s --check-prefix=BE
@@ -9,14 +13,12 @@
; RUN: -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs < %s \
; RUN: --enable-unsafe-fp-math | FileCheck %s --check-prefix=FAST
-define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) {
+define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) nounwind {
; BE-LABEL: llrint_v1i64_v1f16:
; BE: # %bb.0:
; BE-NEXT: mflr r0
; BE-NEXT: stdu r1, -112(r1)
; BE-NEXT: std r0, 128(r1)
-; BE-NEXT: .cfi_def_cfa_offset 112
-; BE-NEXT: .cfi_offset lr, 16
; BE-NEXT: bl __truncsfhf2
; BE-NEXT: nop
; BE-NEXT: clrldi r3, r3, 48
@@ -34,8 +36,6 @@ define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) {
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -32(r1)
; CHECK-NEXT: std r0, 48(r1)
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: .cfi_offset lr, 16
; CHECK-NEXT: bl __truncsfhf2
; CHECK-NEXT: nop
; CHECK-NEXT: clrldi r3, r3, 48
@@ -53,8 +53,6 @@ define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) {
; FAST-NEXT: mflr r0
; FAST-NEXT: stdu r1, -32(r1)
; FAST-NEXT: std r0, 48(r1)
-; FAST-NEXT: .cfi_def_cfa_offset 32
-; FAST-NEXT: .cfi_offset lr, 16
; FAST-NEXT: bl __truncsfhf2
; FAST-NEXT: nop
; FAST-NEXT: clrldi r3, r3, 48
@@ -71,16 +69,12 @@ define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) {
}
declare <1 x i64> @llvm.llrint.v1i64.v1f16(<1 x half>)
-define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) {
+define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) nounwind {
; BE-LABEL: llrint_v1i64_v2f16:
; BE: # %bb.0:
; BE-NEXT: mflr r0
; BE-NEXT: stdu r1, -160(r1)
; BE-NEXT: std r0, 176(r1)
-; BE-NEXT: .cfi_def_cfa_offset 160
-; BE-NEXT: .cfi_offset lr, 16
-; BE-NEXT: .cfi_offset r30, -24
-; BE-NEXT: .cfi_offset f31, -8
; BE-NEXT: stfd f31, 152(r1) # 8-byte Folded Spill
; BE-NEXT: fmr f31, f1
; BE-NEXT: fmr f1, f2
@@ -118,17 +112,12 @@ define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -96(r1)
-; CHECK-NEXT: std r0, 112(r1)
-; CHECK-NEXT: .cfi_def_cfa_offset 96
-; CHECK-NEXT: .cfi_offset lr, 16
-; CHECK-NEXT: .cfi_offset r30, -24
-; CHECK-NEXT: .cfi_offset f31, -8
-; CHECK-NEXT: .cfi_offset v31, -48
; CHECK-NEXT: li r3, 48
+; CHECK-NEXT: std r0, 112(r1)
; CHECK-NEXT: std r30, 72(r1) # 8-byte Folded Spill
; CHECK-NEXT: stfd f31, 88(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f31, f2
-; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: bl __truncsfhf2
; CHECK-NEXT: nop
; CHECK-NEXT: fmr f1, f31
@@ -153,7 +142,7 @@ define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) {
; CHECK-NEXT: lfd f31, 88(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r30, 72(r1) # 8-byte Folded Reload
; CHECK-NEXT: xxmrghd v2, vs0, v31
-; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: addi r1, r1, 96
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
@@ -162,10 +151,6 @@ define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) {
; FAST-LABEL: llrint_v1i64_v2f16:
; FAST: # %bb.0:
; FAST-NEXT: mflr r0
-; FAST-NEXT: .cfi_def_cfa_offset 48
-; FAST-NEXT: .cfi_offset lr, 16
-; FAST-NEXT: .cfi_offset f30, -16
-; FAST-NEXT: .cfi_offset f31, -8
; FAST-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill
; FAST-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
; FAST-NEXT: stdu r1, -48(r1)
@@ -202,20 +187,12 @@ define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) {
}
declare <2 x i64> @llvm.llrint.v2i64.v2f16(<2 x half>)
-define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) {
+define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) nounwind {
; BE-LABEL: llrint_v4i64_v4f16:
; BE: # %bb.0:
; BE-NEXT: mflr r0
; BE-NEXT: stdu r1, -208(r1)
; BE-NEXT: std r0, 224(r1)
-; BE-NEXT: .cfi_def_cfa_offset 208
-; BE-NEXT: .cfi_offset lr, 16
-; BE-NEXT: .cfi_offset r28, -56
-; BE-NEXT: .cfi_offset r29, -48
-; BE-NEXT: .cfi_offset r30, -40
-; BE-NEXT: .cfi_offset f29, -24
-; BE-NEXT: .cfi_offset f30, -16
-; BE-NEXT: .cfi_offset f31, -8
; BE-NEXT: stfd f29, 184(r1) # 8-byte Folded Spill
; BE-NEXT: fmr f29, f1
; BE-NEXT: fmr f1, f2
@@ -289,18 +266,8 @@ define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -144(r1)
-; CHECK-NEXT: std r0, 160(r1)
-; CHECK-NEXT: .cfi_def_cfa_offset 144
-; CHECK-NEXT: .cfi_offset lr, 16
-; CHECK-NEXT: .cfi_offset r28, -56
-; CHECK-NEXT: .cfi_offset r29, -48
-; CHECK-NEXT: .cfi_offset r30, -40
-; CHECK-NEXT: .cfi_offset f29, -24
-; CHECK-NEXT: .cfi_offset f30, -16
-; CHECK-NEXT: .cfi_offset f31, -8
-; CHECK-NEXT: .cfi_offset v30, -96
-; CHECK-NEXT: .cfi_offset v31, -80
; CHECK-NEXT: li r3, 48
+; CHECK-NEXT: std r0, 160(r1)
; CHECK-NEXT: std r28, 88(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r29, 96(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r30, 104(r1) # 8-byte Folded Spill
@@ -308,11 +275,11 @@ define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) {
; CHECK-NEXT: fmr f29, f2
; CHECK-NEXT: stfd f30, 128(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f30, f3
-; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: li r3, 64
; CHECK-NEXT: stfd f31, 136(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f31, f4
-; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: bl __truncsfhf2
; CHECK-NEXT: nop
; CHECK-NEXT: fmr f1, f29
@@ -365,11 +332,11 @@ define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) {
; CHECK-NEXT: lfd f29, 120(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r30, 104(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r29, 96(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 48
; CHECK-NEXT: ld r28, 88(r1) # 8-byte Folded Reload
; CHECK-NEXT: xxmrghd v3, vs0, v30
-; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: addi r1, r1, 144
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
@@ -378,12 +345,6 @@ define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) {
; FAST-LABEL: llrint_v4i64_v4f16:
; FAST: # %bb.0:
; FAST-NEXT: mflr r0
-; FAST-NEXT: .cfi_def_cfa_offset 64
-; FAST-NEXT: .cfi_offset lr, 16
-; FAST-NEXT: .cfi_offset f28, -32
-; FAST-NEXT: .cfi_offset f29, -24
-; FAST-NEXT: .cfi_offset f30, -16
-; FAST-NEXT: .cfi_offset f31, -8
; FAST-NEXT: stfd f28, -32(r1) # 8-byte Folded Spill
; FAST-NEXT: stfd f29, -24(r1) # 8-byte Folded Spill
; FAST-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill
@@ -447,28 +408,12 @@ define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) {
}
declare <4 x i64> @llvm.llrint.v4i64.v4f16(<4 x half>)
-define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) {
+define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) nounwind {
; BE-LABEL: llrint_v8i64_v8f16:
; BE: # %bb.0:
; BE-NEXT: mflr r0
; BE-NEXT: stdu r1, -304(r1)
; BE-NEXT: std r0, 320(r1)
-; BE-NEXT: .cfi_def_cfa_offset 304
-; BE-NEXT: .cfi_offset lr, 16
-; BE-NEXT: .cfi_offset r24, -120
-; BE-NEXT: .cfi_offset r25, -112
-; BE-NEXT: .cfi_offset r26, -104
-; BE-NEXT: .cfi_offset r27, -96
-; BE-NEXT: .cfi_offset r28, -88
-; BE-NEXT: .cfi_offset r29, -80
-; BE-NEXT: .cfi_offset r30, -72
-; BE-NEXT: .cfi_offset f25, -56
-; BE-NEXT: .cfi_offset f26, -48
-; BE-NEXT: .cfi_offset f27, -40
-; BE-NEXT: .cfi_offset f28, -32
-; BE-NEXT: .cfi_offset f29, -24
-; BE-NEXT: .cfi_offset f30, -16
-; BE-NEXT: .cfi_offset f31, -8
; BE-NEXT: stfd f25, 248(r1) # 8-byte Folded Spill
; BE-NEXT: fmr f25, f1
; BE-NEXT: fmr f1, f2
@@ -614,44 +559,24 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -240(r1)
-; CHECK-NEXT: std r0, 256(r1)
-; CHECK-NEXT: .cfi_def_cfa_offset 240
-; CHECK-NEXT: .cfi_offset lr, 16
-; CHECK-NEXT: .cfi_offset r24, -120
-; CHECK-NEXT: .cfi_offset r25, -112
-; CHECK-NEXT: .cfi_offset r26, -104
-; CHECK-NEXT: .cfi_offset r27, -96
-; CHECK-NEXT: .cfi_offset r28, -88
-; CHECK-NEXT: .cfi_offset r29, -80
-; CHECK-NEXT: .cfi_offset r30, -72
-; CHECK-NEXT: .cfi_offset f25, -56
-; CHECK-NEXT: .cfi_offset f26, -48
-; CHECK-NEXT: .cfi_offset f27, -40
-; CHECK-NEXT: .cfi_offset f28, -32
-; CHECK-NEXT: .cfi_offset f29, -24
-; CHECK-NEXT: .cfi_offset f30, -16
-; CHECK-NEXT: .cfi_offset f31, -8
-; CHECK-NEXT: .cfi_offset v28, -192
-; CHECK-NEXT: .cfi_offset v29, -176
-; CHECK-NEXT: .cfi_offset v30, -160
-; CHECK-NEXT: .cfi_offset v31, -144
; CHECK-NEXT: li r3, 48
+; CHECK-NEXT: std r0, 256(r1)
; CHECK-NEXT: std r24, 120(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r25, 128(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r26, 136(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r27, 144(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r28, 152(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r29, 160(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r30, 168(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: li r3, 64
+; CHECK-NEXT: std r30, 168(r1) # 8-byte Folded Spill
; CHECK-NEXT: stfd f25, 184(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f25, f2
; CHECK-NEXT: stfd f26, 192(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f26, f3
; CHECK-NEXT: stfd f27, 200(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f27, f4
-; CHECK-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: li r3, 80
; CHECK-NEXT: stfd f28, 208(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f28, f5
@@ -659,11 +584,11 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) {
; CHECK-NEXT: fmr f29, f6
; CHECK-NEXT: stfd f30, 224(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f30, f7
-; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: li r3, 96
; CHECK-NEXT: stfd f31, 232(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f31, f8
-; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: bl __truncsfhf2
; CHECK-NEXT: nop
; CHECK-NEXT: fmr f1, f25
@@ -766,7 +691,7 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) {
; CHECK-NEXT: vmr v4, v29
; CHECK-NEXT: lfd f30, 224(r1) # 8-byte Folded Reload
; CHECK-NEXT: lfd f29, 216(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 80
; CHECK-NEXT: lfd f28, 208(r1) # 8-byte Folded Reload
; CHECK-NEXT: lfd f27, 200(r1) # 8-byte Folded Reload
@@ -774,7 +699,7 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) {
; CHECK-NEXT: lfd f25, 184(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r30, 168(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r29, 160(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 64
; CHECK-NEXT: ld r28, 152(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r27, 144(r1) # 8-byte Folded Reload
@@ -782,9 +707,9 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) {
; CHECK-NEXT: ld r26, 136(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r25, 128(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r24, 120(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 48
-; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: addi r1, r1, 240
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
@@ -793,16 +718,6 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) {
; FAST-LABEL: llrint_v8i64_v8f16:
; FAST: # %bb.0:
; FAST-NEXT: mflr r0
-; FAST-NEXT: .cfi_def_cfa_offset 96
-; FAST-NEXT: .cfi_offset lr, 16
-; FAST-NEXT: .cfi_offset f24, -64
-; FAST-NEXT: .cfi_offset f25, -56
-; FAST-NEXT: .cfi_offset f26, -48
-; FAST-NEXT: .cfi_offset f27, -40
-; FAST-NEXT: .cfi_offset f28, -32
-; FAST-NEXT: .cfi_offset f29, -24
-; FAST-NEXT: .cfi_offset f30, -16
-; FAST-NEXT: .cfi_offset f31, -8
; FAST-NEXT: stfd f24, -64(r1) # 8-byte Folded Spill
; FAST-NEXT: stfd f25, -56(r1) # 8-byte Folded Spill
; FAST-NEXT: stfd f26, -48(r1) # 8-byte Folded Spill
@@ -920,44 +835,12 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) {
}
declare <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half>)
-define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) {
+define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) nounwind {
; BE-LABEL: llrint_v16i64_v16f16:
; BE: # %bb.0:
; BE-NEXT: mflr r0
; BE-NEXT: stdu r1, -496(r1)
; BE-NEXT: std r0, 512(r1)
-; BE-NEXT: .cfi_def_cfa_offset 496
-; BE-NEXT: .cfi_offset lr, 16
-; BE-NEXT: .cfi_offset r16, -248
-; BE-NEXT: .cfi_offset r17, -240
-; BE-NEXT: .cfi_offset r18, -232
-; BE-NEXT: .cfi_offset r19, -224
-; BE-NEXT: .cfi_offset r20, -216
-; BE-NEXT: .cfi_offset r21, -208
-; BE-NEXT: .cfi_offset r22, -200
-; BE-NEXT: .cfi_offset r23, -192
-; BE-NEXT: .cfi_offset r24, -184
-; BE-NEXT: .cfi_offset r25, -176
-; BE-NEXT: .cfi_offset r26, -168
-; BE-NEXT: .cfi_offset r27, -160
-; BE-NEXT: .cfi_offset r28, -152
-; BE-NEXT: .cfi_offset r29, -144
-; BE-NEXT: .cfi_offset r30, -136
-; BE-NEXT: .cfi_offset f17, -120
-; BE-NEXT: .cfi_offset f18, -112
-; BE-NEXT: .cfi_offset f19, -104
-; BE-NEXT: .cfi_offset f20, -96
-; BE-NEXT: .cfi_offset f21, -88
-; BE-NEXT: .cfi_offset f22, -80
-; BE-NEXT: .cfi_offset f23, -72
-; BE-NEXT: .cfi_offset f24, -64
-; BE-NEXT: .cfi_offset f25, -56
-; BE-NEXT: .cfi_offset f26, -48
-; BE-NEXT: .cfi_offset f27, -40
-; BE-NEXT: .cfi_offset f28, -32
-; BE-NEXT: .cfi_offset f29, -24
-; BE-NEXT: .cfi_offset f30, -16
-; BE-NEXT: .cfi_offset f31, -8
; BE-NEXT: stfd f20, 400(r1) # 8-byte Folded Spill
; BE-NEXT: fmr f20, f1
; BE-NEXT: fmr f1, f2
@@ -1244,105 +1127,65 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -432(r1)
-; CHECK-NEXT: std r0, 448(r1)
-; CHECK-NEXT: .cfi_def_cfa_offset 432
-; CHECK-NEXT: .cfi_offset lr, 16
-; CHECK-NEXT: .cfi_offset r16, -248
-; CHECK-NEXT: .cfi_offset r17, -240
-; CHECK-NEXT: .cfi_offset r18, -232
-; CHECK-NEXT: .cfi_offset r19, -224
-; CHECK-NEXT: .cfi_offset r20, -216
-; CHECK-NEXT: .cfi_offset r21, -208
-; CHECK-NEXT: .cfi_offset r22, -200
-; CHECK-NEXT: .cfi_offset r23, -192
-; CHECK-NEXT: .cfi_offset r24, -184
-; CHECK-NEXT: .cfi_offset r25, -176
-; CHECK-NEXT: .cfi_offset r26, -168
-; CHECK-NEXT: .cfi_offset r27, -160
-; CHECK-NEXT: .cfi_offset r28, -152
-; CHECK-NEXT: .cfi_offset r29, -144
-; CHECK-NEXT: .cfi_offset r30, -136
-; CHECK-NEXT: .cfi_offset f17, -120
-; CHECK-NEXT: .cfi_offset f18, -112
-; CHECK-NEXT: .cfi_offset f19, -104
-; CHECK-NEXT: .cfi_offset f20, -96
-; CHECK-NEXT: .cfi_offset f21, -88
-; CHECK-NEXT: .cfi_offset f22, -80
-; CHECK-NEXT: .cfi_offset f23, -72
-; CHECK-NEXT: .cfi_offset f24, -64
-; CHECK-NEXT: .cfi_offset f25, -56
-; CHECK-NEXT: .cfi_offset f26, -48
-; CHECK-NEXT: .cfi_offset f27, -40
-; CHECK-NEXT: .cfi_offset f28, -32
-; CHECK-NEXT: .cfi_offset f29, -24
-; CHECK-NEXT: .cfi_offset f30, -16
-; CHECK-NEXT: .cfi_offset f31, -8
-; CHECK-NEXT: .cfi_offset v24, -384
-; CHECK-NEXT: .cfi_offset v25, -368
-; CHECK-NEXT: .cfi_offset v26, -352
-; CHECK-NEXT: .cfi_offset v27, -336
-; CHECK-NEXT: .cfi_offset v28, -320
-; CHECK-NEXT: .cfi_offset v29, -304
-; CHECK-NEXT: .cfi_offset v30, -288
-; CHECK-NEXT: .cfi_offset v31, -272
; CHECK-NEXT: li r3, 48
+; CHECK-NEXT: std r0, 448(r1)
; CHECK-NEXT: std r16, 184(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r17, 192(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r18, 200(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r19, 208(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r20, 216(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r21, 224(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r22, 232(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stvx v24, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v24, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: li r3, 64
+; CHECK-NEXT: std r22, 232(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r23, 240(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r24, 248(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r25, 256(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r26, 264(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r27, 272(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r28, 280(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stvx v25, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v25, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: li r3, 80
+; CHECK-NEXT: std r28, 280(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r29, 288(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r30, 296(r1) # 8-byte Folded Spill
; CHECK-NEXT: stfd f17, 312(r1) # 8-byte Folded Spill
; CHECK-NEXT: stfd f18, 320(r1) # 8-byte Folded Spill
; CHECK-NEXT: stfd f19, 328(r1) # 8-byte Folded Spill
+; CHECK-NEXT: stxvd2x v26, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: li r3, 96
; CHECK-NEXT: stfd f20, 336(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f20, f2
-; CHECK-NEXT: stvx v26, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT: li r3, 96
; CHECK-NEXT: stfd f21, 344(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f21, f3
; CHECK-NEXT: stfd f22, 352(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f22, f4
+; CHECK-NEXT: stxvd2x v27, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: li r3, 112
; CHECK-NEXT: stfd f23, 360(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f23, f5
-; CHECK-NEXT: stvx v27, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT: li r3, 112
; CHECK-NEXT: stfd f24, 368(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f24, f6
; CHECK-NEXT: stfd f25, 376(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f25, f7
+; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: li r3, 128
; CHECK-NEXT: stfd f26, 384(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f26, f8
-; CHECK-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT: li r3, 128
; CHECK-NEXT: stfd f27, 392(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f27, f9
; CHECK-NEXT: stfd f28, 400(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f28, f10
+; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: li r3, 144
; CHECK-NEXT: stfd f29, 408(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f29, f11
-; CHECK-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT: li r3, 144
; CHECK-NEXT: stfd f30, 416(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f30, f12
; CHECK-NEXT: stfd f31, 424(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f31, f13
-; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: li r3, 160
-; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: bl __truncsfhf2
; CHECK-NEXT: nop
; CHECK-NEXT: fmr f1, f20
@@ -1545,7 +1388,7 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) {
; CHECK-NEXT: vmr v4, v29
; CHECK-NEXT: lfd f30, 416(r1) # 8-byte Folded Reload
; CHECK-NEXT: lfd f29, 408(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 144
; CHECK-NEXT: vmr v5, v28
; CHECK-NEXT: vmr v6, v27
@@ -1553,7 +1396,7 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) {
; CHECK-NEXT: vmr v8, v25
; CHECK-NEXT: lfd f28, 400(r1) # 8-byte Folded Reload
; CHECK-NEXT: lfd f27, 392(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 128
; CHECK-NEXT: lfd f26, 384(r1) # 8-byte Folded Reload
; CHECK-NEXT: lfd f25, 376(r1) # 8-byte Folded Reload
@@ -1561,7 +1404,7 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) {
; CHECK-NEXT: lfd f24, 368(r1) # 8-byte Folded Reload
; CHECK-NEXT: lfd f23, 360(r1) # 8-byte Folded Reload
; CHECK-NEXT: lfd f22, 352(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 112
; CHECK-NEXT: lfd f21, 344(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r30, 296(r1) # 8-byte Folded Reload
@@ -1569,7 +1412,7 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) {
; CHECK-NEXT: lfd f19, 328(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r29, 288(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r28, 280(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 96
; CHECK-NEXT: lfd f18, 320(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r27, 272(r1) # 8-byte Folded Reload
@@ -1577,7 +1420,7 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) {
; CHECK-NEXT: ld r26, 264(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r25, 256(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r24, 248(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lvx v27, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 80
; CHECK-NEXT: ld r23, 240(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r22, 232(r1) # 8-byte Folded Reload
@@ -1585,13 +1428,13 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) {
; CHECK-NEXT: ld r20, 216(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r19, 208(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r18, 200(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lvx v26, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 64
; CHECK-NEXT: ld r17, 192(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r16, 184(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lvx v25, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 48
-; CHECK-NEXT: lvx v24, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: addi r1, r1, 432
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
@@ -1600,24 +1443,6 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) {
; FAST-LABEL: llrint_v16i64_v16f16:
; FAST: # %bb.0:
; FAST-NEXT: mflr r0
-; FAST-NEXT: .cfi_def_cfa_offset 160
-; FAST-NEXT: .cfi_offset lr, 16
-; FAST-NEXT: .cfi_offset f16, -128
-; FAST-NEXT: .cfi_offset f17, -120
-; FAST-NEXT: .cfi_offset f18, -112
-; FAST-NEXT: .cfi_offset f19, -104
-; FAST-NEXT: .cfi_offset f20, -96
-; FAST-NEXT: .cfi_offset f21, -88
-; FAST-NEXT: .cfi_offset f22, -80
-; FAST-NEXT: .cfi_offset f23, -72
-; FAST-NEXT: .cfi_offset f24, -64
-; FAST-NEXT: .cfi_offset f25, -56
-; FAST-NEXT: .cfi_offset f26, -48
-; FAST-NEXT: .cfi_offset f27, -40
-; FAST-NEXT: .cfi_offset f28, -32
-; FAST-NEXT: .cfi_offset f29, -24
-; FAST-NEXT: .cfi_offset f30, -16
-; FAST-NEXT: .cfi_offset f31, -8
; FAST-NEXT: stfd f16, -128(r1) # 8-byte Folded Spill
; FAST-NEXT: stfd f17, -120(r1) # 8-byte Folded Spill
; FAST-NEXT: stfd f18, -112(r1) # 8-byte Folded Spill
@@ -1841,50 +1666,12 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) {
}
declare <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half>)
-define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
+define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) nounwind {
; BE-LABEL: llrint_v32i64_v32f16:
; BE: # %bb.0:
; BE-NEXT: mflr r0
; BE-NEXT: stdu r1, -864(r1)
; BE-NEXT: std r0, 880(r1)
-; BE-NEXT: .cfi_def_cfa_offset 864
-; BE-NEXT: .cfi_offset lr, 16
-; BE-NEXT: .cfi_offset r14, -288
-; BE-NEXT: .cfi_offset r15, -280
-; BE-NEXT: .cfi_offset r16, -272
-; BE-NEXT: .cfi_offset r17, -264
-; BE-NEXT: .cfi_offset r18, -256
-; BE-NEXT: .cfi_offset r19, -248
-; BE-NEXT: .cfi_offset r20, -240
-; BE-NEXT: .cfi_offset r21, -232
-; BE-NEXT: .cfi_offset r22, -224
-; BE-NEXT: .cfi_offset r23, -216
-; BE-NEXT: .cfi_offset r24, -208
-; BE-NEXT: .cfi_offset r25, -200
-; BE-NEXT: .cfi_offset r26, -192
-; BE-NEXT: .cfi_offset r27, -184
-; BE-NEXT: .cfi_offset r28, -176
-; BE-NEXT: .cfi_offset r29, -168
-; BE-NEXT: .cfi_offset r30, -160
-; BE-NEXT: .cfi_offset r31, -152
-; BE-NEXT: .cfi_offset f14, -144
-; BE-NEXT: .cfi_offset f15, -136
-; BE-NEXT: .cfi_offset f16, -128
-; BE-NEXT: .cfi_offset f17, -120
-; BE-NEXT: .cfi_offset f18, -112
-; BE-NEXT: .cfi_offset f19, -104
-; BE-NEXT: .cfi_offset f20, -96
-; BE-NEXT: .cfi_offset f21, -88
-; BE-NEXT: .cfi_offset f22, -80
-; BE-NEXT: .cfi_offset f23, -72
-; BE-NEXT: .cfi_offset f24, -64
-; BE-NEXT: .cfi_offset f25, -56
-; BE-NEXT: .cfi_offset f26, -48
-; BE-NEXT: .cfi_offset f27, -40
-; BE-NEXT: .cfi_offset f28, -32
-; BE-NEXT: .cfi_offset f29, -24
-; BE-NEXT: .cfi_offset f30, -16
-; BE-NEXT: .cfi_offset f31, -8
; BE-NEXT: stfd f20, 768(r1) # 8-byte Folded Spill
; BE-NEXT: fmr f20, f1
; BE-NEXT: fmr f1, f2
@@ -1924,6 +1711,7 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
; BE-NEXT: stfd f30, 848(r1) # 8-byte Folded Spill
; BE-NEXT: stfd f31, 856(r1) # 8-byte Folded Spill
; BE-NEXT: fmr f31, f13
+; BE-NEXT: mr r30, r3
; BE-NEXT: fmr f29, f12
; BE-NEXT: fmr f30, f11
; BE-NEXT: fmr f28, f10
@@ -1934,7 +1722,6 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
; BE-NEXT: fmr f23, f5
; BE-NEXT: fmr f22, f4
; BE-NEXT: fmr f21, f3
-; BE-NEXT: mr r30, r3
; BE-NEXT: bl __truncsfhf2
; BE-NEXT: nop
; BE-NEXT: fmr f1, f20
@@ -2437,98 +2224,48 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -688(r1)
-; CHECK-NEXT: std r0, 704(r1)
-; CHECK-NEXT: .cfi_def_cfa_offset 688
-; CHECK-NEXT: .cfi_offset lr, 16
-; CHECK-NEXT: .cfi_offset r14, -288
-; CHECK-NEXT: .cfi_offset r15, -280
-; CHECK-NEXT: .cfi_offset r16, -272
-; CHECK-NEXT: .cfi_offset r17, -264
-; CHECK-NEXT: .cfi_offset r18, -256
-; CHECK-NEXT: .cfi_offset r19, -248
-; CHECK-NEXT: .cfi_offset r20, -240
-; CHECK-NEXT: .cfi_offset r21, -232
-; CHECK-NEXT: .cfi_offset r22, -224
-; CHECK-NEXT: .cfi_offset r23, -216
-; CHECK-NEXT: .cfi_offset r24, -208
-; CHECK-NEXT: .cfi_offset r25, -200
-; CHECK-NEXT: .cfi_offset r26, -192
-; CHECK-NEXT: .cfi_offset r27, -184
-; CHECK-NEXT: .cfi_offset r28, -176
-; CHECK-NEXT: .cfi_offset r29, -168
-; CHECK-NEXT: .cfi_offset r30, -160
-; CHECK-NEXT: .cfi_offset r31, -152
-; CHECK-NEXT: .cfi_offset f14, -144
-; CHECK-NEXT: .cfi_offset f15, -136
-; CHECK-NEXT: .cfi_offset f16, -128
-; CHECK-NEXT: .cfi_offset f17, -120
-; CHECK-NEXT: .cfi_offset f18, -112
-; CHECK-NEXT: .cfi_offset f19, -104
-; CHECK-NEXT: .cfi_offset f20, -96
-; CHECK-NEXT: .cfi_offset f21, -88
-; CHECK-NEXT: .cfi_offset f22, -80
-; CHECK-NEXT: .cfi_offset f23, -72
-; CHECK-NEXT: .cfi_offset f24, -64
-; CHECK-NEXT: .cfi_offset f25, -56
-; CHECK-NEXT: .cfi_offset f26, -48
-; CHECK-NEXT: .cfi_offset f27, -40
-; CHECK-NEXT: .cfi_offset f28, -32
-; CHECK-NEXT: .cfi_offset f29, -24
-; CHECK-NEXT: .cfi_offset f30, -16
-; CHECK-NEXT: .cfi_offset f31, -8
-; CHECK-NEXT: .cfi_offset v20, -480
-; CHECK-NEXT: .cfi_offset v21, -464
-; CHECK-NEXT: .cfi_offset v22, -448
-; CHECK-NEXT: .cfi_offset v23, -432
-; CHECK-NEXT: .cfi_offset v24, -416
-; CHECK-NEXT: .cfi_offset v25, -400
-; CHECK-NEXT: .cfi_offset v26, -384
-; CHECK-NEXT: .cfi_offset v27, -368
-; CHECK-NEXT: .cfi_offset v28, -352
-; CHECK-NEXT: .cfi_offset v29, -336
-; CHECK-NEXT: .cfi_offset v30, -320
-; CHECK-NEXT: .cfi_offset v31, -304
; CHECK-NEXT: li r4, 208
+; CHECK-NEXT: std r0, 704(r1)
; CHECK-NEXT: std r14, 400(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r15, 408(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r16, 416(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r17, 424(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r18, 432(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r19, 440(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r20, 448(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stvx v20, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v20, r1, r4 # 16-byte Folded Spill
; CHECK-NEXT: li r4, 224
+; CHECK-NEXT: std r20, 448(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r21, 456(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r22, 464(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r23, 472(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r24, 480(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r25, 488(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r26, 496(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stvx v21, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v21, r1, r4 # 16-byte Folded Spill
; CHECK-NEXT: li r4, 240
+; CHECK-NEXT: std r26, 496(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r27, 504(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r28, 512(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r29, 520(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r30, 528(r1) # 8-byte Folded Spill
; CHECK-NEXT: mr r30, r3
-; CHECK-NEXT: std r31, 536(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stvx v22, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v22, r1, r4 # 16-byte Folded Spill
; CHECK-NEXT: li r4, 256
+; CHECK-NEXT: std r31, 536(r1) # 8-byte Folded Spill
; CHECK-NEXT: stfd f14, 544(r1) # 8-byte Folded Spill
; CHECK-NEXT: stfd f15, 552(r1) # 8-byte Folded Spill
; CHECK-NEXT: stfd f16, 560(r1) # 8-byte Folded Spill
; CHECK-NEXT: stfd f17, 568(r1) # 8-byte Folded Spill
; CHECK-NEXT: stfd f18, 576(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stfd f19, 584(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stvx v23, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v23, r1, r4 # 16-byte Folded Spill
; CHECK-NEXT: li r4, 272
+; CHECK-NEXT: stfd f19, 584(r1) # 8-byte Folded Spill
; CHECK-NEXT: stfd f20, 592(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f20, f2
; CHECK-NEXT: stfd f21, 600(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f21, f3
; CHECK-NEXT: stfd f22, 608(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f22, f4
-; CHECK-NEXT: stvx v24, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v24, r1, r4 # 16-byte Folded Spill
; CHECK-NEXT: li r4, 288
; CHECK-NEXT: stfd f23, 616(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f23, f5
@@ -2536,7 +2273,7 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
; CHECK-NEXT: fmr f24, f6
; CHECK-NEXT: stfd f25, 632(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f25, f7
-; CHECK-NEXT: stvx v25, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v25, r1, r4 # 16-byte Folded Spill
; CHECK-NEXT: li r4, 304
; CHECK-NEXT: stfd f26, 640(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f26, f8
@@ -2544,7 +2281,7 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
; CHECK-NEXT: fmr f27, f9
; CHECK-NEXT: stfd f28, 656(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f28, f10
-; CHECK-NEXT: stvx v26, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v26, r1, r4 # 16-byte Folded Spill
; CHECK-NEXT: li r4, 320
; CHECK-NEXT: stfd f29, 664(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f29, f11
@@ -2552,15 +2289,15 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
; CHECK-NEXT: fmr f30, f12
; CHECK-NEXT: stfd f31, 680(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f31, f13
-; CHECK-NEXT: stvx v27, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v27, r1, r4 # 16-byte Folded Spill
; CHECK-NEXT: li r4, 336
-; CHECK-NEXT: stvx v28, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v28, r1, r4 # 16-byte Folded Spill
; CHECK-NEXT: li r4, 352
-; CHECK-NEXT: stvx v29, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v29, r1, r4 # 16-byte Folded Spill
; CHECK-NEXT: li r4, 368
-; CHECK-NEXT: stvx v30, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v30, r1, r4 # 16-byte Folded Spill
; CHECK-NEXT: li r4, 384
-; CHECK-NEXT: stvx v31, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v31, r1, r4 # 16-byte Folded Spill
; CHECK-NEXT: bl __truncsfhf2
; CHECK-NEXT: nop
; CHECK-NEXT: fmr f1, f20
@@ -3039,7 +2776,7 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
; CHECK-NEXT: li r3, 384
; CHECK-NEXT: xxswapd vs4, vs4
; CHECK-NEXT: stxvd2x vs4, 0, r30
-; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 368
; CHECK-NEXT: lfd f31, 680(r1) # 8-byte Folded Reload
; CHECK-NEXT: lfd f30, 672(r1) # 8-byte Folded Reload
@@ -3057,7 +2794,7 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
; CHECK-NEXT: lfd f18, 576(r1) # 8-byte Folded Reload
; CHECK-NEXT: lfd f17, 568(r1) # 8-byte Folded Reload
; CHECK-NEXT: lfd f16, 560(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 352
; CHECK-NEXT: lfd f15, 552(r1) # 8-byte Folded Reload
; CHECK-NEXT: lfd f14, 544(r1) # 8-byte Folded Reload
@@ -3065,7 +2802,7 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
; CHECK-NEXT: ld r30, 528(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r29, 520(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r28, 512(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 336
; CHECK-NEXT: ld r27, 504(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r26, 496(r1) # 8-byte Folded Reload
@@ -3073,7 +2810,7 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
; CHECK-NEXT: ld r24, 480(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r23, 472(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r22, 464(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 320
; CHECK-NEXT: ld r21, 456(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r20, 448(r1) # 8-byte Folded Reload
@@ -3081,23 +2818,23 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
; CHECK-NEXT: ld r18, 432(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r17, 424(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r16, 416(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lvx v27, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 304
; CHECK-NEXT: ld r15, 408(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r14, 400(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lvx v26, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 288
-; CHECK-NEXT: lvx v25, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 272
-; CHECK-NEXT: lvx v24, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 256
-; CHECK-NEXT: lvx v23, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v23, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 240
-; CHECK-NEXT: lvx v22, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v22, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 224
-; CHECK-NEXT: lvx v21, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v21, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 208
-; CHECK-NEXT: lvx v20, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v20, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: addi r1, r1, 688
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
@@ -3107,95 +2844,62 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
; FAST: # %bb.0:
; FAST-NEXT: mflr r0
; FAST-NEXT: stdu r1, -480(r1)
-; FAST-NEXT: std r0, 496(r1)
-; FAST-NEXT: .cfi_def_cfa_offset 480
-; FAST-NEXT: .cfi_offset lr, 16
-; FAST-NEXT: .cfi_offset r30, -160
-; FAST-NEXT: .cfi_offset f14, -144
-; FAST-NEXT: .cfi_offset f15, -136
-; FAST-NEXT: .cfi_offset f16, -128
-; FAST-NEXT: .cfi_offset f17, -120
-; FAST-NEXT: .cfi_offset f18, -112
-; FAST-NEXT: .cfi_offset f19, -104
-; FAST-NEXT: .cfi_offset f20, -96
-; FAST-NEXT: .cfi_offset f21, -88
-; FAST-NEXT: .cfi_offset f22, -80
-; FAST-NEXT: .cfi_offset f23, -72
-; FAST-NEXT: .cfi_offset f24, -64
-; FAST-NEXT: .cfi_offset f25, -56
-; FAST-NEXT: .cfi_offset f26, -48
-; FAST-NEXT: .cfi_offset f27, -40
-; FAST-NEXT: .cfi_offset f28, -32
-; FAST-NEXT: .cfi_offset f29, -24
-; FAST-NEXT: .cfi_offset f30, -16
-; FAST-NEXT: .cfi_offset f31, -8
-; FAST-NEXT: .cfi_offset v20, -352
-; FAST-NEXT: .cfi_offset v21, -336
-; FAST-NEXT: .cfi_offset v22, -320
-; FAST-NEXT: .cfi_offset v23, -304
-; FAST-NEXT: .cfi_offset v24, -288
-; FAST-NEXT: .cfi_offset v25, -272
-; FAST-NEXT: .cfi_offset v26, -256
-; FAST-NEXT: .cfi_offset v27, -240
-; FAST-NEXT: .cfi_offset v28, -224
-; FAST-NEXT: .cfi_offset v29, -208
-; FAST-NEXT: .cfi_offset v30, -192
-; FAST-NEXT: .cfi_offset v31, -176
; FAST-NEXT: li r4, 128
+; FAST-NEXT: std r0, 496(r1)
; FAST-NEXT: std r30, 320(r1) # 8-byte Folded Spill
+; FAST-NEXT: mr r30, r3
; FAST-NEXT: stfd f14, 336(r1) # 8-byte Folded Spill
-; FAST-NEXT: fmr f14, f5
; FAST-NEXT: stfd f15, 344(r1) # 8-byte Folded Spill
+; FAST-NEXT: fmr f14, f5
; FAST-NEXT: stfd f16, 352(r1) # 8-byte Folded Spill
-; FAST-NEXT: fmr f16, f4
-; FAST-NEXT: mr r30, r3
-; FAST-NEXT: stvx v20, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: stxvd2x v20, r1, r4 # 16-byte Folded Spill
; FAST-NEXT: li r4, 144
+; FAST-NEXT: fmr f16, f4
; FAST-NEXT: stfd f17, 360(r1) # 8-byte Folded Spill
; FAST-NEXT: stfd f18, 368(r1) # 8-byte Folded Spill
; FAST-NEXT: stfd f19, 376(r1) # 8-byte Folded Spill
; FAST-NEXT: stfd f20, 384(r1) # 8-byte Folded Spill
; FAST-NEXT: stfd f21, 392(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f22, 400(r1) # 8-byte Folded Spill
-; FAST-NEXT: stvx v21, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: stxvd2x v21, r1, r4 # 16-byte Folded Spill
; FAST-NEXT: li r4, 160
+; FAST-NEXT: stfd f22, 400(r1) # 8-byte Folded Spill
; FAST-NEXT: stfd f23, 408(r1) # 8-byte Folded Spill
; FAST-NEXT: stfd f24, 416(r1) # 8-byte Folded Spill
; FAST-NEXT: stfd f25, 424(r1) # 8-byte Folded Spill
; FAST-NEXT: stfd f26, 432(r1) # 8-byte Folded Spill
; FAST-NEXT: stfd f27, 440(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f28, 448(r1) # 8-byte Folded Spill
-; FAST-NEXT: stvx v22, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: stxvd2x v22, r1, r4 # 16-byte Folded Spill
; FAST-NEXT: li r4, 176
; FAST-NEXT: xxlor v22, f3, f3
+; FAST-NEXT: stfd f28, 448(r1) # 8-byte Folded Spill
; FAST-NEXT: stfd f29, 456(r1) # 8-byte Folded Spill
; FAST-NEXT: fmr f29, f9
; FAST-NEXT: stfd f30, 464(r1) # 8-byte Folded Spill
; FAST-NEXT: stfd f31, 472(r1) # 8-byte Folded Spill
-; FAST-NEXT: stvx v23, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: stxvd2x v23, r1, r4 # 16-byte Folded Spill
; FAST-NEXT: li r4, 192
; FAST-NEXT: xxlor v23, f2, f2
-; FAST-NEXT: stvx v24, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: stxvd2x v24, r1, r4 # 16-byte Folded Spill
; FAST-NEXT: li r4, 208
-; FAST-NEXT: stvx v25, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: stxvd2x v25, r1, r4 # 16-byte Folded Spill
; FAST-NEXT: li r4, 224
; FAST-NEXT: xxlor v25, f13, f13
-; FAST-NEXT: stvx v26, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: stxvd2x v26, r1, r4 # 16-byte Folded Spill
; FAST-NEXT: li r4, 240
; FAST-NEXT: xxlor v26, f12, f12
-; FAST-NEXT: stvx v27, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: stxvd2x v27, r1, r4 # 16-byte Folded Spill
; FAST-NEXT: li r4, 256
; FAST-NEXT: xxlor v27, f11, f11
-; FAST-NEXT: stvx v28, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: stxvd2x v28, r1, r4 # 16-byte Folded Spill
; FAST-NEXT: li r4, 272
; FAST-NEXT: xxlor v28, f10, f10
-; FAST-NEXT: stvx v29, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: stxvd2x v29, r1, r4 # 16-byte Folded Spill
; FAST-NEXT: li r4, 288
; FAST-NEXT: xxlor v29, f8, f8
-; FAST-NEXT: stvx v30, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: stxvd2x v30, r1, r4 # 16-byte Folded Spill
; FAST-NEXT: li r4, 304
; FAST-NEXT: xxlor v30, f7, f7
-; FAST-NEXT: stvx v31, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: stxvd2x v31, r1, r4 # 16-byte Folded Spill
; FAST-NEXT: li r4, 44
; FAST-NEXT: xxlor v31, f6, f6
; FAST-NEXT: stxsspx f1, r1, r4 # 4-byte Folded Spill
@@ -3624,30 +3328,30 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
; FAST-NEXT: lfd f16, 352(r1) # 8-byte Folded Reload
; FAST-NEXT: lfd f15, 344(r1) # 8-byte Folded Reload
; FAST-NEXT: lfd f14, 336(r1) # 8-byte Folded Reload
-; FAST-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
; FAST-NEXT: li r3, 288
; FAST-NEXT: ld r30, 320(r1) # 8-byte Folded Reload
-; FAST-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
; FAST-NEXT: li r3, 272
-; FAST-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload
; FAST-NEXT: li r3, 256
-; FAST-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload
; FAST-NEXT: li r3, 240
-; FAST-NEXT: lvx v27, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload
; FAST-NEXT: li r3, 224
-; FAST-NEXT: lvx v26, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload
; FAST-NEXT: li r3, 208
-; FAST-NEXT: lvx v25, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload
; FAST-NEXT: li r3, 192
-; FAST-NEXT: lvx v24, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload
; FAST-NEXT: li r3, 176
-; FAST-NEXT: lvx v23, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: lxvd2x v23, r1, r3 # 16-byte Folded Reload
; FAST-NEXT: li r3, 160
-; FAST-NEXT: lvx v22, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: lxvd2x v22, r1, r3 # 16-byte Folded Reload
; FAST-NEXT: li r3, 144
-; FAST-NEXT: lvx v21, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: lxvd2x v21, r1, r3 # 16-byte Folded Reload
; FAST-NEXT: li r3, 128
-; FAST-NEXT: lvx v20, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: lxvd2x v20, r1, r3 # 16-byte Folded Reload
; FAST-NEXT: addi r1, r1, 480
; FAST-NEXT: ld r0, 16(r1)
; FAST-NEXT: mtlr r0
@@ -3657,14 +3361,12 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
}
declare <32 x i64> @llvm.llrint.v32i64.v32f16(<32 x half>)
-define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) {
+define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) nounwind {
; BE-LABEL: llrint_v1i64_v1f32:
; BE: # %bb.0:
; BE-NEXT: mflr r0
; BE-NEXT: stdu r1, -112(r1)
; BE-NEXT: std r0, 128(r1)
-; BE-NEXT: .cfi_def_cfa_offset 112
-; BE-NEXT: .cfi_offset lr, 16
; BE-NEXT: bl llrintf
; BE-NEXT: nop
; BE-NEXT: addi r1, r1, 112
@@ -3677,8 +3379,6 @@ define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) {
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -32(r1)
; CHECK-NEXT: std r0, 48(r1)
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: .cfi_offset lr, 16
; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
; CHECK-NEXT: addi r1, r1, 32
@@ -3696,15 +3396,13 @@ define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) {
}
declare <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float>)
-define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) {
+define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) nounwind {
; BE-LABEL: llrint_v2i64_v2f32:
; BE: # %bb.0:
; BE-NEXT: mflr r0
; BE-NEXT: stdu r1, -144(r1)
-; BE-NEXT: std r0, 160(r1)
-; BE-NEXT: .cfi_def_cfa_offset 144
-; BE-NEXT: .cfi_offset lr, 16
; BE-NEXT: addi r3, r1, 112
+; BE-NEXT: std r0, 160(r1)
; BE-NEXT: stxvw4x v2, 0, r3
; BE-NEXT: lfs f1, 116(r1)
; BE-NEXT: bl llrintf
@@ -3725,14 +3423,11 @@ define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -64(r1)
-; CHECK-NEXT: std r0, 80(r1)
-; CHECK-NEXT: .cfi_def_cfa_offset 64
-; CHECK-NEXT: .cfi_offset lr, 16
-; CHECK-NEXT: .cfi_offset v31, -16
; CHECK-NEXT: xxsldwi vs0, v2, v2, 3
; CHECK-NEXT: li r3, 48
+; CHECK-NEXT: std r0, 80(r1)
; CHECK-NEXT: xscvspdpn f1, vs0
-; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: vmr v31, v2
; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
@@ -3744,7 +3439,7 @@ define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) {
; CHECK-NEXT: mtfprd f0, r3
; CHECK-NEXT: li r3, 48
; CHECK-NEXT: xxmrghd v2, vs0, v31
-; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: addi r1, r1, 64
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
@@ -3769,15 +3464,13 @@ define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) {
}
declare <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float>)
-define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) {
+define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) nounwind {
; BE-LABEL: llrint_v4i64_v4f32:
; BE: # %bb.0:
; BE-NEXT: mflr r0
; BE-NEXT: stdu r1, -160(r1)
-; BE-NEXT: std r0, 176(r1)
-; BE-NEXT: .cfi_def_cfa_offset 160
-; BE-NEXT: .cfi_offset lr, 16
; BE-NEXT: addi r3, r1, 112
+; BE-NEXT: std r0, 176(r1)
; BE-NEXT: stxvw4x v2, 0, r3
; BE-NEXT: lfs f1, 116(r1)
; BE-NEXT: bl llrintf
@@ -3808,17 +3501,13 @@ define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -80(r1)
-; CHECK-NEXT: std r0, 96(r1)
-; CHECK-NEXT: .cfi_def_cfa_offset 80
-; CHECK-NEXT: .cfi_offset lr, 16
-; CHECK-NEXT: .cfi_offset v30, -32
-; CHECK-NEXT: .cfi_offset v31, -16
; CHECK-NEXT: xxsldwi vs0, v2, v2, 3
; CHECK-NEXT: li r3, 48
+; CHECK-NEXT: std r0, 96(r1)
; CHECK-NEXT: xscvspdpn f1, vs0
-; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: li r3, 64
-; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: vmr v31, v2
; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
@@ -3841,9 +3530,9 @@ define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) {
; CHECK-NEXT: li r3, 64
; CHECK-NEXT: vmr v2, v30
; CHECK-NEXT: xxmrghd v3, v31, vs0
-; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 48
-; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: addi r1, r1, 80
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
@@ -3879,15 +3568,13 @@ define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) {
}
declare <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float>)
-define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) {
+define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) nounwind {
; BE-LABEL: llrint_v8i64_v8f32:
; BE: # %bb.0:
; BE-NEXT: mflr r0
; BE-NEXT: stdu r1, -208(r1)
-; BE-NEXT: std r0, 224(r1)
-; BE-NEXT: .cfi_def_cfa_offset 208
-; BE-NEXT: .cfi_offset lr, 16
; BE-NEXT: addi r3, r1, 112
+; BE-NEXT: std r0, 224(r1)
; BE-NEXT: stxvw4x v2, 0, r3
; BE-NEXT: addi r3, r1, 128
; BE-NEXT: stxvw4x v3, 0, r3
@@ -3940,24 +3627,18 @@ define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -112(r1)
-; CHECK-NEXT: std r0, 128(r1)
-; CHECK-NEXT: .cfi_def_cfa_offset 112
-; CHECK-NEXT: .cfi_offset lr, 16
-; CHECK-NEXT: .cfi_offset v28, -64
-; CHECK-NEXT: .cfi_offset v29, -48
-; CHECK-NEXT: .cfi_offset v30, -32
-; CHECK-NEXT: .cfi_offset v31, -16
; CHECK-NEXT: li r3, 48
; CHECK-NEXT: xxsldwi vs0, v2, v2, 3
-; CHECK-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: std r0, 128(r1)
+; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: li r3, 64
; CHECK-NEXT: xscvspdpn f1, vs0
-; CHECK-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: li r3, 80
-; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: li r3, 96
; CHECK-NEXT: vmr v30, v2
-; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: vmr v31, v3
; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
@@ -4003,13 +3684,13 @@ define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) {
; CHECK-NEXT: vmr v2, v29
; CHECK-NEXT: vmr v4, v28
; CHECK-NEXT: xxmrghd v5, v31, vs0
-; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 80
-; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 64
-; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 48
-; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: addi r1, r1, 112
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
@@ -4067,15 +3748,13 @@ define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) {
}
declare <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float>)
-define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) {
+define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) nounwind {
; BE-LABEL: llrint_v16i64_v16f32:
; BE: # %bb.0:
; BE-NEXT: mflr r0
; BE-NEXT: stdu r1, -304(r1)
-; BE-NEXT: std r0, 320(r1)
-; BE-NEXT: .cfi_def_cfa_offset 304
-; BE-NEXT: .cfi_offset lr, 16
; BE-NEXT: addi r3, r1, 112
+; BE-NEXT: std r0, 320(r1)
; BE-NEXT: stxvw4x v2, 0, r3
; BE-NEXT: addi r3, r1, 128
; BE-NEXT: stxvw4x v3, 0, r3
@@ -4172,38 +3851,28 @@ define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -176(r1)
-; CHECK-NEXT: std r0, 192(r1)
-; CHECK-NEXT: .cfi_def_cfa_offset 176
-; CHECK-NEXT: .cfi_offset lr, 16
-; CHECK-NEXT: .cfi_offset v24, -128
-; CHECK-NEXT: .cfi_offset v25, -112
-; CHECK-NEXT: .cfi_offset v26, -96
-; CHECK-NEXT: .cfi_offset v27, -80
-; CHECK-NEXT: .cfi_offset v28, -64
-; CHECK-NEXT: .cfi_offset v29, -48
-; CHECK-NEXT: .cfi_offset v30, -32
-; CHECK-NEXT: .cfi_offset v31, -16
; CHECK-NEXT: li r3, 48
+; CHECK-NEXT: std r0, 192(r1)
; CHECK-NEXT: xxsldwi vs0, v2, v2, 3
-; CHECK-NEXT: stvx v24, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v24, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: li r3, 64
; CHECK-NEXT: xscvspdpn f1, vs0
-; CHECK-NEXT: stvx v25, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v25, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: li r3, 80
-; CHECK-NEXT: stvx v26, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v26, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: li r3, 96
; CHECK-NEXT: vmr v26, v3
-; CHECK-NEXT: stvx v27, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v27, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: li r3, 112
-; CHECK-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: li r3, 128
; CHECK-NEXT: vmr v28, v4
-; CHECK-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: li r3, 144
; CHECK-NEXT: vmr v29, v2
-; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: li r3, 160
-; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: vmr v31, v5
; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
@@ -4295,21 +3964,21 @@ define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) {
; CHECK-NEXT: vmr v6, v25
; CHECK-NEXT: vmr v8, v24
; CHECK-NEXT: xxmrghd v9, v31, vs0
-; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 144
-; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 128
-; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 112
-; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 96
-; CHECK-NEXT: lvx v27, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 80
-; CHECK-NEXT: lvx v26, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 64
-; CHECK-NEXT: lvx v25, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 48
-; CHECK-NEXT: lvx v24, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: addi r1, r1, 176
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
@@ -4411,14 +4080,12 @@ define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) {
}
declare <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float>)
-define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) {
+define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) nounwind {
; BE-LABEL: llrint_v1i64_v1f64:
; BE: # %bb.0:
; BE-NEXT: mflr r0
; BE-NEXT: stdu r1, -112(r1)
; BE-NEXT: std r0, 128(r1)
-; BE-NEXT: .cfi_def_cfa_offset 112
-; BE-NEXT: .cfi_offset lr, 16
; BE-NEXT: bl llrint
; BE-NEXT: nop
; BE-NEXT: addi r1, r1, 112
@@ -4431,8 +4098,6 @@ define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) {
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -32(r1)
; CHECK-NEXT: std r0, 48(r1)
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: .cfi_offset lr, 16
; CHECK-NEXT: bl llrint
; CHECK-NEXT: nop
; CHECK-NEXT: addi r1, r1, 32
@@ -4450,16 +4115,13 @@ define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) {
}
declare <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double>)
-define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) {
+define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) nounwind {
; BE-LABEL: llrint_v2i64_v2f64:
; BE: # %bb.0:
; BE-NEXT: mflr r0
; BE-NEXT: stdu r1, -160(r1)
-; BE-NEXT: std r0, 176(r1)
-; BE-NEXT: .cfi_def_cfa_offset 160
-; BE-NEXT: .cfi_offset lr, 16
-; BE-NEXT: .cfi_offset v31, -16
; BE-NEXT: li r3, 144
+; BE-NEXT: std r0, 176(r1)
; BE-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
; BE-NEXT: vmr v31, v2
; BE-NEXT: xxlor f1, v31, v31
@@ -4483,12 +4145,9 @@ define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -64(r1)
-; CHECK-NEXT: std r0, 80(r1)
-; CHECK-NEXT: .cfi_def_cfa_offset 64
-; CHECK-NEXT: .cfi_offset lr, 16
-; CHECK-NEXT: .cfi_offset v31, -16
; CHECK-NEXT: li r3, 48
-; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: std r0, 80(r1)
+; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: vmr v31, v2
; CHECK-NEXT: xxlor f1, v31, v31
; CHECK-NEXT: bl llrint
@@ -4500,7 +4159,7 @@ define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) {
; CHECK-NEXT: mtfprd f0, r3
; CHECK-NEXT: li r3, 48
; CHECK-NEXT: xxmrghd v2, v31, vs0
-; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: addi r1, r1, 64
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
@@ -4523,17 +4182,13 @@ define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) {
}
declare <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double>)
-define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) {
+define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) nounwind {
; BE-LABEL: llrint_v4i64_v4f64:
; BE: # %bb.0:
; BE-NEXT: mflr r0
; BE-NEXT: stdu r1, -192(r1)
-; BE-NEXT: std r0, 208(r1)
-; BE-NEXT: .cfi_def_cfa_offset 192
-; BE-NEXT: .cfi_offset lr, 16
-; BE-NEXT: .cfi_offset v30, -32
-; BE-NEXT: .cfi_offset v31, -16
; BE-NEXT: li r3, 160
+; BE-NEXT: std r0, 208(r1)
; BE-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill
; BE-NEXT: vmr v30, v2
; BE-NEXT: li r3, 176
@@ -4572,17 +4227,13 @@ define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -80(r1)
-; CHECK-NEXT: std r0, 96(r1)
-; CHECK-NEXT: .cfi_def_cfa_offset 80
-; CHECK-NEXT: .cfi_offset lr, 16
-; CHECK-NEXT: .cfi_offset v30, -32
-; CHECK-NEXT: .cfi_offset v31, -16
; CHECK-NEXT: li r3, 48
-; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: std r0, 96(r1)
+; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: vmr v30, v2
; CHECK-NEXT: li r3, 64
; CHECK-NEXT: xxlor f1, v30, v30
-; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: vmr v31, v3
; CHECK-NEXT: bl llrint
; CHECK-NEXT: nop
@@ -4603,9 +4254,9 @@ define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) {
; CHECK-NEXT: li r3, 64
; CHECK-NEXT: vmr v2, v30
; CHECK-NEXT: xxmrghd v3, v31, vs0
-; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 48
-; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: addi r1, r1, 80
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
@@ -4637,25 +4288,19 @@ define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) {
}
declare <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double>)
-define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) {
+define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) nounwind {
; BE-LABEL: llrint_v8i64_v8f64:
; BE: # %bb.0:
; BE-NEXT: mflr r0
; BE-NEXT: stdu r1, -256(r1)
-; BE-NEXT: std r0, 272(r1)
-; BE-NEXT: .cfi_def_cfa_offset 256
-; BE-NEXT: .cfi_offset lr, 16
-; BE-NEXT: .cfi_offset v28, -64
-; BE-NEXT: .cfi_offset v29, -48
-; BE-NEXT: .cfi_offset v30, -32
-; BE-NEXT: .cfi_offset v31, -16
; BE-NEXT: li r3, 192
+; BE-NEXT: std r0, 272(r1)
; BE-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill
; BE-NEXT: li r3, 208
; BE-NEXT: vmr v28, v2
-; BE-NEXT: xxlor f1, v28, v28
; BE-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill
; BE-NEXT: li r3, 224
+; BE-NEXT: xxlor f1, v28, v28
; BE-NEXT: vmr v29, v3
; BE-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill
; BE-NEXT: li r3, 240
@@ -4718,25 +4363,19 @@ define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -112(r1)
-; CHECK-NEXT: std r0, 128(r1)
-; CHECK-NEXT: .cfi_def_cfa_offset 112
-; CHECK-NEXT: .cfi_offset lr, 16
-; CHECK-NEXT: .cfi_offset v28, -64
-; CHECK-NEXT: .cfi_offset v29, -48
-; CHECK-NEXT: .cfi_offset v30, -32
-; CHECK-NEXT: .cfi_offset v31, -16
; CHECK-NEXT: li r3, 48
-; CHECK-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: std r0, 128(r1)
+; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: li r3, 64
; CHECK-NEXT: vmr v28, v2
-; CHECK-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: li r3, 80
; CHECK-NEXT: xxlor f1, v28, v28
; CHECK-NEXT: vmr v29, v3
-; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: li r3, 96
; CHECK-NEXT: vmr v30, v4
-; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: vmr v31, v5
; CHECK-NEXT: bl llrint
; CHECK-NEXT: nop
@@ -4777,13 +4416,13 @@ define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) {
; CHECK-NEXT: vmr v3, v29
; CHECK-NEXT: vmr v2, v28
; CHECK-NEXT: xxmrghd v5, v31, vs0
-; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 80
-; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 64
-; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 48
-; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: addi r1, r1, 112
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
@@ -4832,3 +4471,536 @@ define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) {
ret <8 x i64> %a
}
declare <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double>)
+
+define <1 x i64> @llrint_v1i64_v1f128(<1 x fp128> %x) nounwind {
+; BE-LABEL: llrint_v1i64_v1f128:
+; BE: # %bb.0:
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -112(r1)
+; BE-NEXT: std r0, 128(r1)
+; BE-NEXT: bl llrintf128
+; BE-NEXT: nop
+; BE-NEXT: addi r1, r1, 112
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+;
+; CHECK-LABEL: llrint_v1i64_v1f128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mflr r0
+; CHECK-NEXT: stdu r1, -32(r1)
+; CHECK-NEXT: std r0, 48(r1)
+; CHECK-NEXT: bl llrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: addi r1, r1, 32
+; CHECK-NEXT: ld r0, 16(r1)
+; CHECK-NEXT: mtlr r0
+; CHECK-NEXT: blr
+;
+; FAST-LABEL: llrint_v1i64_v1f128:
+; FAST: # %bb.0:
+; FAST-NEXT: mflr r0
+; FAST-NEXT: stdu r1, -32(r1)
+; FAST-NEXT: std r0, 48(r1)
+; FAST-NEXT: bl llrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: addi r1, r1, 32
+; FAST-NEXT: ld r0, 16(r1)
+; FAST-NEXT: mtlr r0
+; FAST-NEXT: blr
+ %a = call <1 x i64> @llvm.llrint.v1i64.v1f128(<1 x fp128> %x)
+ ret <1 x i64> %a
+}
+declare <1 x i64> @llvm.llrint.v1i64.v1f128(<1 x fp128>)
+
+define <2 x i64> @llrint_v2i64_v2f128(<2 x fp128> %x) nounwind {
+; BE-LABEL: llrint_v2i64_v2f128:
+; BE: # %bb.0:
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -160(r1)
+; BE-NEXT: li r3, 144
+; BE-NEXT: std r0, 176(r1)
+; BE-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
+; BE-NEXT: vmr v31, v2
+; BE-NEXT: vmr v2, v3
+; BE-NEXT: bl llrintf128
+; BE-NEXT: nop
+; BE-NEXT: vmr v2, v31
+; BE-NEXT: std r3, 136(r1)
+; BE-NEXT: bl llrintf128
+; BE-NEXT: nop
+; BE-NEXT: std r3, 128(r1)
+; BE-NEXT: addi r3, r1, 128
+; BE-NEXT: lxvd2x v2, 0, r3
+; BE-NEXT: li r3, 144
+; BE-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: addi r1, r1, 160
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+;
+; CHECK-LABEL: llrint_v2i64_v2f128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mflr r0
+; CHECK-NEXT: stdu r1, -80(r1)
+; CHECK-NEXT: li r3, 48
+; CHECK-NEXT: std r0, 96(r1)
+; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: li r3, 64
+; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: vmr v31, v3
+; CHECK-NEXT: bl llrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: vmr v2, v31
+; CHECK-NEXT: mtvsrd v30, r3
+; CHECK-NEXT: bl llrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: li r3, 64
+; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: li r3, 48
+; CHECK-NEXT: xxmrghd v2, vs0, v30
+; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: addi r1, r1, 80
+; CHECK-NEXT: ld r0, 16(r1)
+; CHECK-NEXT: mtlr r0
+; CHECK-NEXT: blr
+;
+; FAST-LABEL: llrint_v2i64_v2f128:
+; FAST: # %bb.0:
+; FAST-NEXT: mflr r0
+; FAST-NEXT: stdu r1, -80(r1)
+; FAST-NEXT: li r3, 48
+; FAST-NEXT: std r0, 96(r1)
+; FAST-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: li r3, 64
+; FAST-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: vmr v31, v3
+; FAST-NEXT: bl llrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: vmr v2, v31
+; FAST-NEXT: mtvsrd v30, r3
+; FAST-NEXT: bl llrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: li r3, 64
+; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 48
+; FAST-NEXT: xxmrghd v2, vs0, v30
+; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: addi r1, r1, 80
+; FAST-NEXT: ld r0, 16(r1)
+; FAST-NEXT: mtlr r0
+; FAST-NEXT: blr
+ %a = call <2 x i64> @llvm.llrint.v2i64.v2f128(<2 x fp128> %x)
+ ret <2 x i64> %a
+}
+declare <2 x i64> @llvm.llrint.v2i64.v2f128(<2 x fp128>)
+
+define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) nounwind {
+; BE-LABEL: llrint_v4i64_v4f128:
+; BE: # %bb.0:
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -208(r1)
+; BE-NEXT: li r3, 160
+; BE-NEXT: std r0, 224(r1)
+; BE-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill
+; BE-NEXT: li r3, 176
+; BE-NEXT: vmr v29, v2
+; BE-NEXT: vmr v2, v3
+; BE-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill
+; BE-NEXT: li r3, 192
+; BE-NEXT: vmr v30, v4
+; BE-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
+; BE-NEXT: vmr v31, v5
+; BE-NEXT: bl llrintf128
+; BE-NEXT: nop
+; BE-NEXT: vmr v2, v29
+; BE-NEXT: std r3, 136(r1)
+; BE-NEXT: bl llrintf128
+; BE-NEXT: nop
+; BE-NEXT: vmr v2, v31
+; BE-NEXT: std r3, 128(r1)
+; BE-NEXT: bl llrintf128
+; BE-NEXT: nop
+; BE-NEXT: vmr v2, v30
+; BE-NEXT: std r3, 152(r1)
+; BE-NEXT: bl llrintf128
+; BE-NEXT: nop
+; BE-NEXT: std r3, 144(r1)
+; BE-NEXT: addi r3, r1, 128
+; BE-NEXT: lxvd2x v2, 0, r3
+; BE-NEXT: addi r3, r1, 144
+; BE-NEXT: lxvd2x v3, 0, r3
+; BE-NEXT: li r3, 192
+; BE-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: li r3, 176
+; BE-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: li r3, 160
+; BE-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: addi r1, r1, 208
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+;
+; CHECK-LABEL: llrint_v4i64_v4f128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mflr r0
+; CHECK-NEXT: stdu r1, -112(r1)
+; CHECK-NEXT: li r3, 48
+; CHECK-NEXT: std r0, 128(r1)
+; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: li r3, 64
+; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: li r3, 80
+; CHECK-NEXT: vmr v29, v3
+; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: li r3, 96
+; CHECK-NEXT: vmr v30, v4
+; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: vmr v31, v5
+; CHECK-NEXT: bl llrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: vmr v2, v29
+; CHECK-NEXT: mtvsrd v28, r3
+; CHECK-NEXT: bl llrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: vmr v2, v30
+; CHECK-NEXT: xxmrghd v29, vs0, v28
+; CHECK-NEXT: bl llrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: vmr v2, v31
+; CHECK-NEXT: mtvsrd v30, r3
+; CHECK-NEXT: bl llrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: li r3, 96
+; CHECK-NEXT: vmr v2, v29
+; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: li r3, 80
+; CHECK-NEXT: xxmrghd v3, vs0, v30
+; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: li r3, 64
+; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: li r3, 48
+; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: addi r1, r1, 112
+; CHECK-NEXT: ld r0, 16(r1)
+; CHECK-NEXT: mtlr r0
+; CHECK-NEXT: blr
+;
+; FAST-LABEL: llrint_v4i64_v4f128:
+; FAST: # %bb.0:
+; FAST-NEXT: mflr r0
+; FAST-NEXT: stdu r1, -112(r1)
+; FAST-NEXT: li r3, 48
+; FAST-NEXT: std r0, 128(r1)
+; FAST-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: li r3, 64
+; FAST-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: li r3, 80
+; FAST-NEXT: vmr v29, v3
+; FAST-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: li r3, 96
+; FAST-NEXT: vmr v30, v4
+; FAST-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: vmr v31, v5
+; FAST-NEXT: bl llrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: vmr v2, v29
+; FAST-NEXT: mtvsrd v28, r3
+; FAST-NEXT: bl llrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: vmr v2, v30
+; FAST-NEXT: xxmrghd v29, vs0, v28
+; FAST-NEXT: bl llrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: vmr v2, v31
+; FAST-NEXT: mtvsrd v30, r3
+; FAST-NEXT: bl llrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: li r3, 96
+; FAST-NEXT: vmr v2, v29
+; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 80
+; FAST-NEXT: xxmrghd v3, vs0, v30
+; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 64
+; FAST-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 48
+; FAST-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: addi r1, r1, 112
+; FAST-NEXT: ld r0, 16(r1)
+; FAST-NEXT: mtlr r0
+; FAST-NEXT: blr
+ %a = call <4 x i64> @llvm.llrint.v4i64.v4f128(<4 x fp128> %x)
+ ret <4 x i64> %a
+}
+declare <4 x i64> @llvm.llrint.v4i64.v4f128(<4 x fp128>)
+
+define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) nounwind {
+; BE-LABEL: llrint_v8i64_v8f128:
+; BE: # %bb.0:
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -304(r1)
+; BE-NEXT: li r3, 192
+; BE-NEXT: std r0, 320(r1)
+; BE-NEXT: stxvd2x v25, r1, r3 # 16-byte Folded Spill
+; BE-NEXT: li r3, 208
+; BE-NEXT: vmr v25, v2
+; BE-NEXT: vmr v2, v3
+; BE-NEXT: stxvd2x v26, r1, r3 # 16-byte Folded Spill
+; BE-NEXT: li r3, 224
+; BE-NEXT: vmr v26, v4
+; BE-NEXT: stxvd2x v27, r1, r3 # 16-byte Folded Spill
+; BE-NEXT: li r3, 240
+; BE-NEXT: vmr v27, v5
+; BE-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill
+; BE-NEXT: li r3, 256
+; BE-NEXT: vmr v28, v6
+; BE-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill
+; BE-NEXT: li r3, 272
+; BE-NEXT: vmr v29, v7
+; BE-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill
+; BE-NEXT: li r3, 288
+; BE-NEXT: vmr v30, v8
+; BE-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
+; BE-NEXT: vmr v31, v9
+; BE-NEXT: bl llrintf128
+; BE-NEXT: nop
+; BE-NEXT: vmr v2, v25
+; BE-NEXT: std r3, 136(r1)
+; BE-NEXT: bl llrintf128
+; BE-NEXT: nop
+; BE-NEXT: vmr v2, v27
+; BE-NEXT: std r3, 128(r1)
+; BE-NEXT: bl llrintf128
+; BE-NEXT: nop
+; BE-NEXT: vmr v2, v26
+; BE-NEXT: std r3, 152(r1)
+; BE-NEXT: bl llrintf128
+; BE-NEXT: nop
+; BE-NEXT: vmr v2, v29
+; BE-NEXT: std r3, 144(r1)
+; BE-NEXT: bl llrintf128
+; BE-NEXT: nop
+; BE-NEXT: vmr v2, v28
+; BE-NEXT: std r3, 168(r1)
+; BE-NEXT: bl llrintf128
+; BE-NEXT: nop
+; BE-NEXT: vmr v2, v31
+; BE-NEXT: std r3, 160(r1)
+; BE-NEXT: bl llrintf128
+; BE-NEXT: nop
+; BE-NEXT: vmr v2, v30
+; BE-NEXT: std r3, 184(r1)
+; BE-NEXT: bl llrintf128
+; BE-NEXT: nop
+; BE-NEXT: std r3, 176(r1)
+; BE-NEXT: addi r3, r1, 128
+; BE-NEXT: lxvd2x v2, 0, r3
+; BE-NEXT: addi r3, r1, 144
+; BE-NEXT: lxvd2x v3, 0, r3
+; BE-NEXT: addi r3, r1, 160
+; BE-NEXT: lxvd2x v4, 0, r3
+; BE-NEXT: addi r3, r1, 176
+; BE-NEXT: lxvd2x v5, 0, r3
+; BE-NEXT: li r3, 288
+; BE-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: li r3, 272
+; BE-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: li r3, 256
+; BE-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: li r3, 240
+; BE-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: li r3, 224
+; BE-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: li r3, 208
+; BE-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: li r3, 192
+; BE-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: addi r1, r1, 304
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+;
+; CHECK-LABEL: llrint_v8i64_v8f128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mflr r0
+; CHECK-NEXT: stdu r1, -176(r1)
+; CHECK-NEXT: li r3, 48
+; CHECK-NEXT: std r0, 192(r1)
+; CHECK-NEXT: stxvd2x v24, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: li r3, 64
+; CHECK-NEXT: stxvd2x v25, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: li r3, 80
+; CHECK-NEXT: vmr v25, v3
+; CHECK-NEXT: stxvd2x v26, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: li r3, 96
+; CHECK-NEXT: vmr v26, v4
+; CHECK-NEXT: stxvd2x v27, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: li r3, 112
+; CHECK-NEXT: vmr v27, v5
+; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: li r3, 128
+; CHECK-NEXT: vmr v28, v6
+; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: li r3, 144
+; CHECK-NEXT: vmr v29, v7
+; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: li r3, 160
+; CHECK-NEXT: vmr v30, v8
+; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: vmr v31, v9
+; CHECK-NEXT: bl llrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: vmr v2, v25
+; CHECK-NEXT: mtvsrd v24, r3
+; CHECK-NEXT: bl llrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: vmr v2, v26
+; CHECK-NEXT: xxmrghd v25, vs0, v24
+; CHECK-NEXT: bl llrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: vmr v2, v27
+; CHECK-NEXT: mtvsrd v26, r3
+; CHECK-NEXT: bl llrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: vmr v2, v28
+; CHECK-NEXT: xxmrghd v27, vs0, v26
+; CHECK-NEXT: bl llrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: vmr v2, v29
+; CHECK-NEXT: mtvsrd v28, r3
+; CHECK-NEXT: bl llrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: vmr v2, v30
+; CHECK-NEXT: xxmrghd v29, vs0, v28
+; CHECK-NEXT: bl llrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: vmr v2, v31
+; CHECK-NEXT: mtvsrd v30, r3
+; CHECK-NEXT: bl llrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: li r3, 160
+; CHECK-NEXT: vmr v4, v29
+; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: li r3, 144
+; CHECK-NEXT: vmr v3, v27
+; CHECK-NEXT: vmr v2, v25
+; CHECK-NEXT: xxmrghd v5, vs0, v30
+; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: li r3, 128
+; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: li r3, 112
+; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: li r3, 96
+; CHECK-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: li r3, 80
+; CHECK-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: li r3, 64
+; CHECK-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: li r3, 48
+; CHECK-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: addi r1, r1, 176
+; CHECK-NEXT: ld r0, 16(r1)
+; CHECK-NEXT: mtlr r0
+; CHECK-NEXT: blr
+;
+; FAST-LABEL: llrint_v8i64_v8f128:
+; FAST: # %bb.0:
+; FAST-NEXT: mflr r0
+; FAST-NEXT: stdu r1, -176(r1)
+; FAST-NEXT: li r3, 48
+; FAST-NEXT: std r0, 192(r1)
+; FAST-NEXT: stxvd2x v24, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: li r3, 64
+; FAST-NEXT: stxvd2x v25, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: li r3, 80
+; FAST-NEXT: vmr v25, v3
+; FAST-NEXT: stxvd2x v26, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: li r3, 96
+; FAST-NEXT: vmr v26, v4
+; FAST-NEXT: stxvd2x v27, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: li r3, 112
+; FAST-NEXT: vmr v27, v5
+; FAST-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: li r3, 128
+; FAST-NEXT: vmr v28, v6
+; FAST-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: li r3, 144
+; FAST-NEXT: vmr v29, v7
+; FAST-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: li r3, 160
+; FAST-NEXT: vmr v30, v8
+; FAST-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: vmr v31, v9
+; FAST-NEXT: bl llrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: vmr v2, v25
+; FAST-NEXT: mtvsrd v24, r3
+; FAST-NEXT: bl llrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: vmr v2, v26
+; FAST-NEXT: xxmrghd v25, vs0, v24
+; FAST-NEXT: bl llrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: vmr v2, v27
+; FAST-NEXT: mtvsrd v26, r3
+; FAST-NEXT: bl llrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: vmr v2, v28
+; FAST-NEXT: xxmrghd v27, vs0, v26
+; FAST-NEXT: bl llrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: vmr v2, v29
+; FAST-NEXT: mtvsrd v28, r3
+; FAST-NEXT: bl llrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: vmr v2, v30
+; FAST-NEXT: xxmrghd v29, vs0, v28
+; FAST-NEXT: bl llrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: vmr v2, v31
+; FAST-NEXT: mtvsrd v30, r3
+; FAST-NEXT: bl llrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: li r3, 160
+; FAST-NEXT: vmr v4, v29
+; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 144
+; FAST-NEXT: vmr v3, v27
+; FAST-NEXT: vmr v2, v25
+; FAST-NEXT: xxmrghd v5, vs0, v30
+; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 128
+; FAST-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 112
+; FAST-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 96
+; FAST-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 80
+; FAST-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 64
+; FAST-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 48
+; FAST-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: addi r1, r1, 176
+; FAST-NEXT: ld r0, 16(r1)
+; FAST-NEXT: mtlr r0
+; FAST-NEXT: blr
+ %a = call <8 x i64> @llvm.llrint.v8i64.v8f128(<8 x fp128> %x)
+ ret <8 x i64> %a
+}
+declare <8 x i64> @llvm.llrint.v8i64.v8f128(<8 x fp128>)
diff --git a/llvm/test/CodeGen/PowerPC/vector-lrint.ll b/llvm/test/CodeGen/PowerPC/vector-lrint.ll
index c2576d4631db8..6c824be017e81 100644
--- a/llvm/test/CodeGen/PowerPC/vector-lrint.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-lrint.ll
@@ -1,4 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; FIXME: crash "Input type needs to be promoted!"
+; SKIP: sed 's/iXLen/i32/g' %s | llc -ppc-asm-full-reg-names \
+; SKIP: -ppc-vsr-nums-as-vr -mtriple=powerpc-unknown-unknown \
+; SKIP: -verify-machineinstrs | FileCheck %s --check-prefixes=PPC32
; RUN: sed 's/iXLen/i32/g' %s | llc -mcpu=pwr7 -ppc-asm-full-reg-names \
; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-unknown \
; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=BE
@@ -9,6 +13,10 @@
; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-unknown \
; RUN: -verify-machineinstrs --enable-unsafe-fp-math | \
; RUN: FileCheck %s --check-prefixes=FAST
+; FIXME: crash "Input type needs to be promoted!"
+; SKIP: sed 's/iXLen/i64/g' %s | llc -ppc-asm-full-reg-names \
+; SKIP: -ppc-vsr-nums-as-vr -mtriple=powerpc-unknown-unknown \
+; SKIP: -verify-machineinstrs | FileCheck %s --check-prefixes=PPC32
; RUN: sed 's/iXLen/i64/g' %s | llc -mcpu=pwr7 -ppc-asm-full-reg-names \
; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-unknown \
; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=BE
@@ -20,14 +28,12 @@
; RUN: -verify-machineinstrs --enable-unsafe-fp-math | \
; RUN: FileCheck %s --check-prefixes=FAST
-define <1 x i64> @lrint_v1f16(<1 x half> %x) {
+define <1 x i64> @lrint_v1f16(<1 x half> %x) nounwind {
; BE-LABEL: lrint_v1f16:
; BE: # %bb.0:
; BE-NEXT: mflr r0
; BE-NEXT: stdu r1, -112(r1)
; BE-NEXT: std r0, 128(r1)
-; BE-NEXT: .cfi_def_cfa_offset 112
-; BE-NEXT: .cfi_offset lr, 16
; BE-NEXT: bl __truncsfhf2
; BE-NEXT: nop
; BE-NEXT: clrldi r3, r3, 48
@@ -45,8 +51,6 @@ define <1 x i64> @lrint_v1f16(<1 x half> %x) {
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -32(r1)
; CHECK-NEXT: std r0, 48(r1)
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: .cfi_offset lr, 16
; CHECK-NEXT: bl __truncsfhf2
; CHECK-NEXT: nop
; CHECK-NEXT: clrldi r3, r3, 48
@@ -64,8 +68,6 @@ define <1 x i64> @lrint_v1f16(<1 x half> %x) {
; FAST-NEXT: mflr r0
; FAST-NEXT: stdu r1, -32(r1)
; FAST-NEXT: std r0, 48(r1)
-; FAST-NEXT: .cfi_def_cfa_offset 32
-; FAST-NEXT: .cfi_offset lr, 16
; FAST-NEXT: bl __truncsfhf2
; FAST-NEXT: nop
; FAST-NEXT: clrldi r3, r3, 48
@@ -82,16 +84,12 @@ define <1 x i64> @lrint_v1f16(<1 x half> %x) {
}
declare <1 x i64> @llvm.lrint.v1i64.v1f16(<1 x half>)
-define <2 x i64> @lrint_v2f16(<2 x half> %x) {
+define <2 x i64> @lrint_v2f16(<2 x half> %x) nounwind {
; BE-LABEL: lrint_v2f16:
; BE: # %bb.0:
; BE-NEXT: mflr r0
; BE-NEXT: stdu r1, -160(r1)
; BE-NEXT: std r0, 176(r1)
-; BE-NEXT: .cfi_def_cfa_offset 160
-; BE-NEXT: .cfi_offset lr, 16
-; BE-NEXT: .cfi_offset r30, -24
-; BE-NEXT: .cfi_offset f31, -8
; BE-NEXT: stfd f31, 152(r1) # 8-byte Folded Spill
; BE-NEXT: fmr f31, f1
; BE-NEXT: fmr f1, f2
@@ -129,17 +127,12 @@ define <2 x i64> @lrint_v2f16(<2 x half> %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -96(r1)
-; CHECK-NEXT: std r0, 112(r1)
-; CHECK-NEXT: .cfi_def_cfa_offset 96
-; CHECK-NEXT: .cfi_offset lr, 16
-; CHECK-NEXT: .cfi_offset r30, -24
-; CHECK-NEXT: .cfi_offset f31, -8
-; CHECK-NEXT: .cfi_offset v31, -48
; CHECK-NEXT: li r3, 48
+; CHECK-NEXT: std r0, 112(r1)
; CHECK-NEXT: std r30, 72(r1) # 8-byte Folded Spill
; CHECK-NEXT: stfd f31, 88(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f31, f2
-; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: bl __truncsfhf2
; CHECK-NEXT: nop
; CHECK-NEXT: fmr f1, f31
@@ -164,7 +157,7 @@ define <2 x i64> @lrint_v2f16(<2 x half> %x) {
; CHECK-NEXT: lfd f31, 88(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r30, 72(r1) # 8-byte Folded Reload
; CHECK-NEXT: xxmrghd v2, vs0, v31
-; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: addi r1, r1, 96
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
@@ -173,10 +166,6 @@ define <2 x i64> @lrint_v2f16(<2 x half> %x) {
; FAST-LABEL: lrint_v2f16:
; FAST: # %bb.0:
; FAST-NEXT: mflr r0
-; FAST-NEXT: .cfi_def_cfa_offset 48
-; FAST-NEXT: .cfi_offset lr, 16
-; FAST-NEXT: .cfi_offset f30, -16
-; FAST-NEXT: .cfi_offset f31, -8
; FAST-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill
; FAST-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
; FAST-NEXT: stdu r1, -48(r1)
@@ -213,20 +202,12 @@ define <2 x i64> @lrint_v2f16(<2 x half> %x) {
}
declare <2 x i64> @llvm.lrint.v2i64.v2f16(<2 x half>)
-define <4 x i64> @lrint_v4f16(<4 x half> %x) {
+define <4 x i64> @lrint_v4f16(<4 x half> %x) nounwind {
; BE-LABEL: lrint_v4f16:
; BE: # %bb.0:
; BE-NEXT: mflr r0
; BE-NEXT: stdu r1, -208(r1)
; BE-NEXT: std r0, 224(r1)
-; BE-NEXT: .cfi_def_cfa_offset 208
-; BE-NEXT: .cfi_offset lr, 16
-; BE-NEXT: .cfi_offset r28, -56
-; BE-NEXT: .cfi_offset r29, -48
-; BE-NEXT: .cfi_offset r30, -40
-; BE-NEXT: .cfi_offset f29, -24
-; BE-NEXT: .cfi_offset f30, -16
-; BE-NEXT: .cfi_offset f31, -8
; BE-NEXT: stfd f29, 184(r1) # 8-byte Folded Spill
; BE-NEXT: fmr f29, f1
; BE-NEXT: fmr f1, f2
@@ -300,18 +281,8 @@ define <4 x i64> @lrint_v4f16(<4 x half> %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -144(r1)
-; CHECK-NEXT: std r0, 160(r1)
-; CHECK-NEXT: .cfi_def_cfa_offset 144
-; CHECK-NEXT: .cfi_offset lr, 16
-; CHECK-NEXT: .cfi_offset r28, -56
-; CHECK-NEXT: .cfi_offset r29, -48
-; CHECK-NEXT: .cfi_offset r30, -40
-; CHECK-NEXT: .cfi_offset f29, -24
-; CHECK-NEXT: .cfi_offset f30, -16
-; CHECK-NEXT: .cfi_offset f31, -8
-; CHECK-NEXT: .cfi_offset v30, -96
-; CHECK-NEXT: .cfi_offset v31, -80
; CHECK-NEXT: li r3, 48
+; CHECK-NEXT: std r0, 160(r1)
; CHECK-NEXT: std r28, 88(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r29, 96(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r30, 104(r1) # 8-byte Folded Spill
@@ -319,11 +290,11 @@ define <4 x i64> @lrint_v4f16(<4 x half> %x) {
; CHECK-NEXT: fmr f29, f2
; CHECK-NEXT: stfd f30, 128(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f30, f3
-; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: li r3, 64
; CHECK-NEXT: stfd f31, 136(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f31, f4
-; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: bl __truncsfhf2
; CHECK-NEXT: nop
; CHECK-NEXT: fmr f1, f29
@@ -376,11 +347,11 @@ define <4 x i64> @lrint_v4f16(<4 x half> %x) {
; CHECK-NEXT: lfd f29, 120(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r30, 104(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r29, 96(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 48
; CHECK-NEXT: ld r28, 88(r1) # 8-byte Folded Reload
; CHECK-NEXT: xxmrghd v3, vs0, v30
-; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: addi r1, r1, 144
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
@@ -389,12 +360,6 @@ define <4 x i64> @lrint_v4f16(<4 x half> %x) {
; FAST-LABEL: lrint_v4f16:
; FAST: # %bb.0:
; FAST-NEXT: mflr r0
-; FAST-NEXT: .cfi_def_cfa_offset 64
-; FAST-NEXT: .cfi_offset lr, 16
-; FAST-NEXT: .cfi_offset f28, -32
-; FAST-NEXT: .cfi_offset f29, -24
-; FAST-NEXT: .cfi_offset f30, -16
-; FAST-NEXT: .cfi_offset f31, -8
; FAST-NEXT: stfd f28, -32(r1) # 8-byte Folded Spill
; FAST-NEXT: stfd f29, -24(r1) # 8-byte Folded Spill
; FAST-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill
@@ -458,28 +423,12 @@ define <4 x i64> @lrint_v4f16(<4 x half> %x) {
}
declare <4 x i64> @llvm.lrint.v4i64.v4f16(<4 x half>)
-define <8 x i64> @lrint_v8f16(<8 x half> %x) {
+define <8 x i64> @lrint_v8f16(<8 x half> %x) nounwind {
; BE-LABEL: lrint_v8f16:
; BE: # %bb.0:
; BE-NEXT: mflr r0
; BE-NEXT: stdu r1, -304(r1)
; BE-NEXT: std r0, 320(r1)
-; BE-NEXT: .cfi_def_cfa_offset 304
-; BE-NEXT: .cfi_offset lr, 16
-; BE-NEXT: .cfi_offset r24, -120
-; BE-NEXT: .cfi_offset r25, -112
-; BE-NEXT: .cfi_offset r26, -104
-; BE-NEXT: .cfi_offset r27, -96
-; BE-NEXT: .cfi_offset r28, -88
-; BE-NEXT: .cfi_offset r29, -80
-; BE-NEXT: .cfi_offset r30, -72
-; BE-NEXT: .cfi_offset f25, -56
-; BE-NEXT: .cfi_offset f26, -48
-; BE-NEXT: .cfi_offset f27, -40
-; BE-NEXT: .cfi_offset f28, -32
-; BE-NEXT: .cfi_offset f29, -24
-; BE-NEXT: .cfi_offset f30, -16
-; BE-NEXT: .cfi_offset f31, -8
; BE-NEXT: stfd f25, 248(r1) # 8-byte Folded Spill
; BE-NEXT: fmr f25, f1
; BE-NEXT: fmr f1, f2
@@ -625,44 +574,24 @@ define <8 x i64> @lrint_v8f16(<8 x half> %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -240(r1)
-; CHECK-NEXT: std r0, 256(r1)
-; CHECK-NEXT: .cfi_def_cfa_offset 240
-; CHECK-NEXT: .cfi_offset lr, 16
-; CHECK-NEXT: .cfi_offset r24, -120
-; CHECK-NEXT: .cfi_offset r25, -112
-; CHECK-NEXT: .cfi_offset r26, -104
-; CHECK-NEXT: .cfi_offset r27, -96
-; CHECK-NEXT: .cfi_offset r28, -88
-; CHECK-NEXT: .cfi_offset r29, -80
-; CHECK-NEXT: .cfi_offset r30, -72
-; CHECK-NEXT: .cfi_offset f25, -56
-; CHECK-NEXT: .cfi_offset f26, -48
-; CHECK-NEXT: .cfi_offset f27, -40
-; CHECK-NEXT: .cfi_offset f28, -32
-; CHECK-NEXT: .cfi_offset f29, -24
-; CHECK-NEXT: .cfi_offset f30, -16
-; CHECK-NEXT: .cfi_offset f31, -8
-; CHECK-NEXT: .cfi_offset v28, -192
-; CHECK-NEXT: .cfi_offset v29, -176
-; CHECK-NEXT: .cfi_offset v30, -160
-; CHECK-NEXT: .cfi_offset v31, -144
; CHECK-NEXT: li r3, 48
+; CHECK-NEXT: std r0, 256(r1)
; CHECK-NEXT: std r24, 120(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r25, 128(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r26, 136(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r27, 144(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r28, 152(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r29, 160(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r30, 168(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: li r3, 64
+; CHECK-NEXT: std r30, 168(r1) # 8-byte Folded Spill
; CHECK-NEXT: stfd f25, 184(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f25, f2
; CHECK-NEXT: stfd f26, 192(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f26, f3
; CHECK-NEXT: stfd f27, 200(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f27, f4
-; CHECK-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: li r3, 80
; CHECK-NEXT: stfd f28, 208(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f28, f5
@@ -670,11 +599,11 @@ define <8 x i64> @lrint_v8f16(<8 x half> %x) {
; CHECK-NEXT: fmr f29, f6
; CHECK-NEXT: stfd f30, 224(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f30, f7
-; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: li r3, 96
; CHECK-NEXT: stfd f31, 232(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f31, f8
-; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: bl __truncsfhf2
; CHECK-NEXT: nop
; CHECK-NEXT: fmr f1, f25
@@ -777,7 +706,7 @@ define <8 x i64> @lrint_v8f16(<8 x half> %x) {
; CHECK-NEXT: vmr v4, v29
; CHECK-NEXT: lfd f30, 224(r1) # 8-byte Folded Reload
; CHECK-NEXT: lfd f29, 216(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 80
; CHECK-NEXT: lfd f28, 208(r1) # 8-byte Folded Reload
; CHECK-NEXT: lfd f27, 200(r1) # 8-byte Folded Reload
@@ -785,7 +714,7 @@ define <8 x i64> @lrint_v8f16(<8 x half> %x) {
; CHECK-NEXT: lfd f25, 184(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r30, 168(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r29, 160(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 64
; CHECK-NEXT: ld r28, 152(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r27, 144(r1) # 8-byte Folded Reload
@@ -793,9 +722,9 @@ define <8 x i64> @lrint_v8f16(<8 x half> %x) {
; CHECK-NEXT: ld r26, 136(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r25, 128(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r24, 120(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 48
-; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: addi r1, r1, 240
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
@@ -804,16 +733,6 @@ define <8 x i64> @lrint_v8f16(<8 x half> %x) {
; FAST-LABEL: lrint_v8f16:
; FAST: # %bb.0:
; FAST-NEXT: mflr r0
-; FAST-NEXT: .cfi_def_cfa_offset 96
-; FAST-NEXT: .cfi_offset lr, 16
-; FAST-NEXT: .cfi_offset f24, -64
-; FAST-NEXT: .cfi_offset f25, -56
-; FAST-NEXT: .cfi_offset f26, -48
-; FAST-NEXT: .cfi_offset f27, -40
-; FAST-NEXT: .cfi_offset f28, -32
-; FAST-NEXT: .cfi_offset f29, -24
-; FAST-NEXT: .cfi_offset f30, -16
-; FAST-NEXT: .cfi_offset f31, -8
; FAST-NEXT: stfd f24, -64(r1) # 8-byte Folded Spill
; FAST-NEXT: stfd f25, -56(r1) # 8-byte Folded Spill
; FAST-NEXT: stfd f26, -48(r1) # 8-byte Folded Spill
@@ -931,44 +850,12 @@ define <8 x i64> @lrint_v8f16(<8 x half> %x) {
}
declare <8 x i64> @llvm.lrint.v8i64.v8f16(<8 x half>)
-define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) {
+define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) nounwind {
; BE-LABEL: lrint_v16i64_v16f16:
; BE: # %bb.0:
; BE-NEXT: mflr r0
; BE-NEXT: stdu r1, -496(r1)
; BE-NEXT: std r0, 512(r1)
-; BE-NEXT: .cfi_def_cfa_offset 496
-; BE-NEXT: .cfi_offset lr, 16
-; BE-NEXT: .cfi_offset r16, -248
-; BE-NEXT: .cfi_offset r17, -240
-; BE-NEXT: .cfi_offset r18, -232
-; BE-NEXT: .cfi_offset r19, -224
-; BE-NEXT: .cfi_offset r20, -216
-; BE-NEXT: .cfi_offset r21, -208
-; BE-NEXT: .cfi_offset r22, -200
-; BE-NEXT: .cfi_offset r23, -192
-; BE-NEXT: .cfi_offset r24, -184
-; BE-NEXT: .cfi_offset r25, -176
-; BE-NEXT: .cfi_offset r26, -168
-; BE-NEXT: .cfi_offset r27, -160
-; BE-NEXT: .cfi_offset r28, -152
-; BE-NEXT: .cfi_offset r29, -144
-; BE-NEXT: .cfi_offset r30, -136
-; BE-NEXT: .cfi_offset f17, -120
-; BE-NEXT: .cfi_offset f18, -112
-; BE-NEXT: .cfi_offset f19, -104
-; BE-NEXT: .cfi_offset f20, -96
-; BE-NEXT: .cfi_offset f21, -88
-; BE-NEXT: .cfi_offset f22, -80
-; BE-NEXT: .cfi_offset f23, -72
-; BE-NEXT: .cfi_offset f24, -64
-; BE-NEXT: .cfi_offset f25, -56
-; BE-NEXT: .cfi_offset f26, -48
-; BE-NEXT: .cfi_offset f27, -40
-; BE-NEXT: .cfi_offset f28, -32
-; BE-NEXT: .cfi_offset f29, -24
-; BE-NEXT: .cfi_offset f30, -16
-; BE-NEXT: .cfi_offset f31, -8
; BE-NEXT: stfd f20, 400(r1) # 8-byte Folded Spill
; BE-NEXT: fmr f20, f1
; BE-NEXT: fmr f1, f2
@@ -1255,105 +1142,65 @@ define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -432(r1)
-; CHECK-NEXT: std r0, 448(r1)
-; CHECK-NEXT: .cfi_def_cfa_offset 432
-; CHECK-NEXT: .cfi_offset lr, 16
-; CHECK-NEXT: .cfi_offset r16, -248
-; CHECK-NEXT: .cfi_offset r17, -240
-; CHECK-NEXT: .cfi_offset r18, -232
-; CHECK-NEXT: .cfi_offset r19, -224
-; CHECK-NEXT: .cfi_offset r20, -216
-; CHECK-NEXT: .cfi_offset r21, -208
-; CHECK-NEXT: .cfi_offset r22, -200
-; CHECK-NEXT: .cfi_offset r23, -192
-; CHECK-NEXT: .cfi_offset r24, -184
-; CHECK-NEXT: .cfi_offset r25, -176
-; CHECK-NEXT: .cfi_offset r26, -168
-; CHECK-NEXT: .cfi_offset r27, -160
-; CHECK-NEXT: .cfi_offset r28, -152
-; CHECK-NEXT: .cfi_offset r29, -144
-; CHECK-NEXT: .cfi_offset r30, -136
-; CHECK-NEXT: .cfi_offset f17, -120
-; CHECK-NEXT: .cfi_offset f18, -112
-; CHECK-NEXT: .cfi_offset f19, -104
-; CHECK-NEXT: .cfi_offset f20, -96
-; CHECK-NEXT: .cfi_offset f21, -88
-; CHECK-NEXT: .cfi_offset f22, -80
-; CHECK-NEXT: .cfi_offset f23, -72
-; CHECK-NEXT: .cfi_offset f24, -64
-; CHECK-NEXT: .cfi_offset f25, -56
-; CHECK-NEXT: .cfi_offset f26, -48
-; CHECK-NEXT: .cfi_offset f27, -40
-; CHECK-NEXT: .cfi_offset f28, -32
-; CHECK-NEXT: .cfi_offset f29, -24
-; CHECK-NEXT: .cfi_offset f30, -16
-; CHECK-NEXT: .cfi_offset f31, -8
-; CHECK-NEXT: .cfi_offset v24, -384
-; CHECK-NEXT: .cfi_offset v25, -368
-; CHECK-NEXT: .cfi_offset v26, -352
-; CHECK-NEXT: .cfi_offset v27, -336
-; CHECK-NEXT: .cfi_offset v28, -320
-; CHECK-NEXT: .cfi_offset v29, -304
-; CHECK-NEXT: .cfi_offset v30, -288
-; CHECK-NEXT: .cfi_offset v31, -272
; CHECK-NEXT: li r3, 48
+; CHECK-NEXT: std r0, 448(r1)
; CHECK-NEXT: std r16, 184(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r17, 192(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r18, 200(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r19, 208(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r20, 216(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r21, 224(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r22, 232(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stvx v24, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v24, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: li r3, 64
+; CHECK-NEXT: std r22, 232(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r23, 240(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r24, 248(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r25, 256(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r26, 264(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r27, 272(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r28, 280(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stvx v25, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v25, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: li r3, 80
+; CHECK-NEXT: std r28, 280(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r29, 288(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r30, 296(r1) # 8-byte Folded Spill
; CHECK-NEXT: stfd f17, 312(r1) # 8-byte Folded Spill
; CHECK-NEXT: stfd f18, 320(r1) # 8-byte Folded Spill
; CHECK-NEXT: stfd f19, 328(r1) # 8-byte Folded Spill
+; CHECK-NEXT: stxvd2x v26, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: li r3, 96
; CHECK-NEXT: stfd f20, 336(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f20, f2
-; CHECK-NEXT: stvx v26, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT: li r3, 96
; CHECK-NEXT: stfd f21, 344(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f21, f3
; CHECK-NEXT: stfd f22, 352(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f22, f4
+; CHECK-NEXT: stxvd2x v27, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: li r3, 112
; CHECK-NEXT: stfd f23, 360(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f23, f5
-; CHECK-NEXT: stvx v27, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT: li r3, 112
; CHECK-NEXT: stfd f24, 368(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f24, f6
; CHECK-NEXT: stfd f25, 376(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f25, f7
+; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: li r3, 128
; CHECK-NEXT: stfd f26, 384(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f26, f8
-; CHECK-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT: li r3, 128
; CHECK-NEXT: stfd f27, 392(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f27, f9
; CHECK-NEXT: stfd f28, 400(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f28, f10
+; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: li r3, 144
; CHECK-NEXT: stfd f29, 408(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f29, f11
-; CHECK-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT: li r3, 144
; CHECK-NEXT: stfd f30, 416(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f30, f12
; CHECK-NEXT: stfd f31, 424(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f31, f13
-; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: li r3, 160
-; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: bl __truncsfhf2
; CHECK-NEXT: nop
; CHECK-NEXT: fmr f1, f20
@@ -1556,7 +1403,7 @@ define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) {
; CHECK-NEXT: vmr v4, v29
; CHECK-NEXT: lfd f30, 416(r1) # 8-byte Folded Reload
; CHECK-NEXT: lfd f29, 408(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 144
; CHECK-NEXT: vmr v5, v28
; CHECK-NEXT: vmr v6, v27
@@ -1564,7 +1411,7 @@ define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) {
; CHECK-NEXT: vmr v8, v25
; CHECK-NEXT: lfd f28, 400(r1) # 8-byte Folded Reload
; CHECK-NEXT: lfd f27, 392(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 128
; CHECK-NEXT: lfd f26, 384(r1) # 8-byte Folded Reload
; CHECK-NEXT: lfd f25, 376(r1) # 8-byte Folded Reload
@@ -1572,7 +1419,7 @@ define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) {
; CHECK-NEXT: lfd f24, 368(r1) # 8-byte Folded Reload
; CHECK-NEXT: lfd f23, 360(r1) # 8-byte Folded Reload
; CHECK-NEXT: lfd f22, 352(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 112
; CHECK-NEXT: lfd f21, 344(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r30, 296(r1) # 8-byte Folded Reload
@@ -1580,7 +1427,7 @@ define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) {
; CHECK-NEXT: lfd f19, 328(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r29, 288(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r28, 280(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 96
; CHECK-NEXT: lfd f18, 320(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r27, 272(r1) # 8-byte Folded Reload
@@ -1588,7 +1435,7 @@ define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) {
; CHECK-NEXT: ld r26, 264(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r25, 256(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r24, 248(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lvx v27, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 80
; CHECK-NEXT: ld r23, 240(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r22, 232(r1) # 8-byte Folded Reload
@@ -1596,13 +1443,13 @@ define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) {
; CHECK-NEXT: ld r20, 216(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r19, 208(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r18, 200(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lvx v26, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 64
; CHECK-NEXT: ld r17, 192(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r16, 184(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lvx v25, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 48
-; CHECK-NEXT: lvx v24, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: addi r1, r1, 432
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
@@ -1611,24 +1458,6 @@ define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) {
; FAST-LABEL: lrint_v16i64_v16f16:
; FAST: # %bb.0:
; FAST-NEXT: mflr r0
-; FAST-NEXT: .cfi_def_cfa_offset 160
-; FAST-NEXT: .cfi_offset lr, 16
-; FAST-NEXT: .cfi_offset f16, -128
-; FAST-NEXT: .cfi_offset f17, -120
-; FAST-NEXT: .cfi_offset f18, -112
-; FAST-NEXT: .cfi_offset f19, -104
-; FAST-NEXT: .cfi_offset f20, -96
-; FAST-NEXT: .cfi_offset f21, -88
-; FAST-NEXT: .cfi_offset f22, -80
-; FAST-NEXT: .cfi_offset f23, -72
-; FAST-NEXT: .cfi_offset f24, -64
-; FAST-NEXT: .cfi_offset f25, -56
-; FAST-NEXT: .cfi_offset f26, -48
-; FAST-NEXT: .cfi_offset f27, -40
-; FAST-NEXT: .cfi_offset f28, -32
-; FAST-NEXT: .cfi_offset f29, -24
-; FAST-NEXT: .cfi_offset f30, -16
-; FAST-NEXT: .cfi_offset f31, -8
; FAST-NEXT: stfd f16, -128(r1) # 8-byte Folded Spill
; FAST-NEXT: stfd f17, -120(r1) # 8-byte Folded Spill
; FAST-NEXT: stfd f18, -112(r1) # 8-byte Folded Spill
@@ -1852,50 +1681,12 @@ define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) {
}
declare <16 x i64> @llvm.lrint.v16i64.v16f16(<16 x half>)
-define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) {
+define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) nounwind {
; BE-LABEL: lrint_v32i64_v32f16:
; BE: # %bb.0:
; BE-NEXT: mflr r0
; BE-NEXT: stdu r1, -864(r1)
; BE-NEXT: std r0, 880(r1)
-; BE-NEXT: .cfi_def_cfa_offset 864
-; BE-NEXT: .cfi_offset lr, 16
-; BE-NEXT: .cfi_offset r14, -288
-; BE-NEXT: .cfi_offset r15, -280
-; BE-NEXT: .cfi_offset r16, -272
-; BE-NEXT: .cfi_offset r17, -264
-; BE-NEXT: .cfi_offset r18, -256
-; BE-NEXT: .cfi_offset r19, -248
-; BE-NEXT: .cfi_offset r20, -240
-; BE-NEXT: .cfi_offset r21, -232
-; BE-NEXT: .cfi_offset r22, -224
-; BE-NEXT: .cfi_offset r23, -216
-; BE-NEXT: .cfi_offset r24, -208
-; BE-NEXT: .cfi_offset r25, -200
-; BE-NEXT: .cfi_offset r26, -192
-; BE-NEXT: .cfi_offset r27, -184
-; BE-NEXT: .cfi_offset r28, -176
-; BE-NEXT: .cfi_offset r29, -168
-; BE-NEXT: .cfi_offset r30, -160
-; BE-NEXT: .cfi_offset r31, -152
-; BE-NEXT: .cfi_offset f14, -144
-; BE-NEXT: .cfi_offset f15, -136
-; BE-NEXT: .cfi_offset f16, -128
-; BE-NEXT: .cfi_offset f17, -120
-; BE-NEXT: .cfi_offset f18, -112
-; BE-NEXT: .cfi_offset f19, -104
-; BE-NEXT: .cfi_offset f20, -96
-; BE-NEXT: .cfi_offset f21, -88
-; BE-NEXT: .cfi_offset f22, -80
-; BE-NEXT: .cfi_offset f23, -72
-; BE-NEXT: .cfi_offset f24, -64
-; BE-NEXT: .cfi_offset f25, -56
-; BE-NEXT: .cfi_offset f26, -48
-; BE-NEXT: .cfi_offset f27, -40
-; BE-NEXT: .cfi_offset f28, -32
-; BE-NEXT: .cfi_offset f29, -24
-; BE-NEXT: .cfi_offset f30, -16
-; BE-NEXT: .cfi_offset f31, -8
; BE-NEXT: stfd f20, 768(r1) # 8-byte Folded Spill
; BE-NEXT: fmr f20, f1
; BE-NEXT: fmr f1, f2
@@ -1935,6 +1726,7 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) {
; BE-NEXT: stfd f30, 848(r1) # 8-byte Folded Spill
; BE-NEXT: stfd f31, 856(r1) # 8-byte Folded Spill
; BE-NEXT: fmr f31, f13
+; BE-NEXT: mr r30, r3
; BE-NEXT: fmr f29, f12
; BE-NEXT: fmr f30, f11
; BE-NEXT: fmr f28, f10
@@ -1945,7 +1737,6 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) {
; BE-NEXT: fmr f23, f5
; BE-NEXT: fmr f22, f4
; BE-NEXT: fmr f21, f3
-; BE-NEXT: mr r30, r3
; BE-NEXT: bl __truncsfhf2
; BE-NEXT: nop
; BE-NEXT: fmr f1, f20
@@ -2448,98 +2239,48 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -688(r1)
-; CHECK-NEXT: std r0, 704(r1)
-; CHECK-NEXT: .cfi_def_cfa_offset 688
-; CHECK-NEXT: .cfi_offset lr, 16
-; CHECK-NEXT: .cfi_offset r14, -288
-; CHECK-NEXT: .cfi_offset r15, -280
-; CHECK-NEXT: .cfi_offset r16, -272
-; CHECK-NEXT: .cfi_offset r17, -264
-; CHECK-NEXT: .cfi_offset r18, -256
-; CHECK-NEXT: .cfi_offset r19, -248
-; CHECK-NEXT: .cfi_offset r20, -240
-; CHECK-NEXT: .cfi_offset r21, -232
-; CHECK-NEXT: .cfi_offset r22, -224
-; CHECK-NEXT: .cfi_offset r23, -216
-; CHECK-NEXT: .cfi_offset r24, -208
-; CHECK-NEXT: .cfi_offset r25, -200
-; CHECK-NEXT: .cfi_offset r26, -192
-; CHECK-NEXT: .cfi_offset r27, -184
-; CHECK-NEXT: .cfi_offset r28, -176
-; CHECK-NEXT: .cfi_offset r29, -168
-; CHECK-NEXT: .cfi_offset r30, -160
-; CHECK-NEXT: .cfi_offset r31, -152
-; CHECK-NEXT: .cfi_offset f14, -144
-; CHECK-NEXT: .cfi_offset f15, -136
-; CHECK-NEXT: .cfi_offset f16, -128
-; CHECK-NEXT: .cfi_offset f17, -120
-; CHECK-NEXT: .cfi_offset f18, -112
-; CHECK-NEXT: .cfi_offset f19, -104
-; CHECK-NEXT: .cfi_offset f20, -96
-; CHECK-NEXT: .cfi_offset f21, -88
-; CHECK-NEXT: .cfi_offset f22, -80
-; CHECK-NEXT: .cfi_offset f23, -72
-; CHECK-NEXT: .cfi_offset f24, -64
-; CHECK-NEXT: .cfi_offset f25, -56
-; CHECK-NEXT: .cfi_offset f26, -48
-; CHECK-NEXT: .cfi_offset f27, -40
-; CHECK-NEXT: .cfi_offset f28, -32
-; CHECK-NEXT: .cfi_offset f29, -24
-; CHECK-NEXT: .cfi_offset f30, -16
-; CHECK-NEXT: .cfi_offset f31, -8
-; CHECK-NEXT: .cfi_offset v20, -480
-; CHECK-NEXT: .cfi_offset v21, -464
-; CHECK-NEXT: .cfi_offset v22, -448
-; CHECK-NEXT: .cfi_offset v23, -432
-; CHECK-NEXT: .cfi_offset v24, -416
-; CHECK-NEXT: .cfi_offset v25, -400
-; CHECK-NEXT: .cfi_offset v26, -384
-; CHECK-NEXT: .cfi_offset v27, -368
-; CHECK-NEXT: .cfi_offset v28, -352
-; CHECK-NEXT: .cfi_offset v29, -336
-; CHECK-NEXT: .cfi_offset v30, -320
-; CHECK-NEXT: .cfi_offset v31, -304
; CHECK-NEXT: li r4, 208
+; CHECK-NEXT: std r0, 704(r1)
; CHECK-NEXT: std r14, 400(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r15, 408(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r16, 416(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r17, 424(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r18, 432(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r19, 440(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r20, 448(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stvx v20, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v20, r1, r4 # 16-byte Folded Spill
; CHECK-NEXT: li r4, 224
+; CHECK-NEXT: std r20, 448(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r21, 456(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r22, 464(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r23, 472(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r24, 480(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r25, 488(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r26, 496(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stvx v21, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v21, r1, r4 # 16-byte Folded Spill
; CHECK-NEXT: li r4, 240
+; CHECK-NEXT: std r26, 496(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r27, 504(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r28, 512(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r29, 520(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r30, 528(r1) # 8-byte Folded Spill
; CHECK-NEXT: mr r30, r3
-; CHECK-NEXT: std r31, 536(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stvx v22, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v22, r1, r4 # 16-byte Folded Spill
; CHECK-NEXT: li r4, 256
+; CHECK-NEXT: std r31, 536(r1) # 8-byte Folded Spill
; CHECK-NEXT: stfd f14, 544(r1) # 8-byte Folded Spill
; CHECK-NEXT: stfd f15, 552(r1) # 8-byte Folded Spill
; CHECK-NEXT: stfd f16, 560(r1) # 8-byte Folded Spill
; CHECK-NEXT: stfd f17, 568(r1) # 8-byte Folded Spill
; CHECK-NEXT: stfd f18, 576(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stfd f19, 584(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stvx v23, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v23, r1, r4 # 16-byte Folded Spill
; CHECK-NEXT: li r4, 272
+; CHECK-NEXT: stfd f19, 584(r1) # 8-byte Folded Spill
; CHECK-NEXT: stfd f20, 592(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f20, f2
; CHECK-NEXT: stfd f21, 600(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f21, f3
; CHECK-NEXT: stfd f22, 608(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f22, f4
-; CHECK-NEXT: stvx v24, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v24, r1, r4 # 16-byte Folded Spill
; CHECK-NEXT: li r4, 288
; CHECK-NEXT: stfd f23, 616(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f23, f5
@@ -2547,7 +2288,7 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) {
; CHECK-NEXT: fmr f24, f6
; CHECK-NEXT: stfd f25, 632(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f25, f7
-; CHECK-NEXT: stvx v25, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v25, r1, r4 # 16-byte Folded Spill
; CHECK-NEXT: li r4, 304
; CHECK-NEXT: stfd f26, 640(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f26, f8
@@ -2555,7 +2296,7 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) {
; CHECK-NEXT: fmr f27, f9
; CHECK-NEXT: stfd f28, 656(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f28, f10
-; CHECK-NEXT: stvx v26, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v26, r1, r4 # 16-byte Folded Spill
; CHECK-NEXT: li r4, 320
; CHECK-NEXT: stfd f29, 664(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f29, f11
@@ -2563,15 +2304,15 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) {
; CHECK-NEXT: fmr f30, f12
; CHECK-NEXT: stfd f31, 680(r1) # 8-byte Folded Spill
; CHECK-NEXT: fmr f31, f13
-; CHECK-NEXT: stvx v27, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v27, r1, r4 # 16-byte Folded Spill
; CHECK-NEXT: li r4, 336
-; CHECK-NEXT: stvx v28, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v28, r1, r4 # 16-byte Folded Spill
; CHECK-NEXT: li r4, 352
-; CHECK-NEXT: stvx v29, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v29, r1, r4 # 16-byte Folded Spill
; CHECK-NEXT: li r4, 368
-; CHECK-NEXT: stvx v30, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v30, r1, r4 # 16-byte Folded Spill
; CHECK-NEXT: li r4, 384
-; CHECK-NEXT: stvx v31, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v31, r1, r4 # 16-byte Folded Spill
; CHECK-NEXT: bl __truncsfhf2
; CHECK-NEXT: nop
; CHECK-NEXT: fmr f1, f20
@@ -3050,7 +2791,7 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) {
; CHECK-NEXT: li r3, 384
; CHECK-NEXT: xxswapd vs4, vs4
; CHECK-NEXT: stxvd2x vs4, 0, r30
-; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 368
; CHECK-NEXT: lfd f31, 680(r1) # 8-byte Folded Reload
; CHECK-NEXT: lfd f30, 672(r1) # 8-byte Folded Reload
@@ -3068,7 +2809,7 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) {
; CHECK-NEXT: lfd f18, 576(r1) # 8-byte Folded Reload
; CHECK-NEXT: lfd f17, 568(r1) # 8-byte Folded Reload
; CHECK-NEXT: lfd f16, 560(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 352
; CHECK-NEXT: lfd f15, 552(r1) # 8-byte Folded Reload
; CHECK-NEXT: lfd f14, 544(r1) # 8-byte Folded Reload
@@ -3076,7 +2817,7 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) {
; CHECK-NEXT: ld r30, 528(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r29, 520(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r28, 512(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 336
; CHECK-NEXT: ld r27, 504(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r26, 496(r1) # 8-byte Folded Reload
@@ -3084,7 +2825,7 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) {
; CHECK-NEXT: ld r24, 480(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r23, 472(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r22, 464(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 320
; CHECK-NEXT: ld r21, 456(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r20, 448(r1) # 8-byte Folded Reload
@@ -3092,23 +2833,23 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) {
; CHECK-NEXT: ld r18, 432(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r17, 424(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r16, 416(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lvx v27, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 304
; CHECK-NEXT: ld r15, 408(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r14, 400(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lvx v26, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 288
-; CHECK-NEXT: lvx v25, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 272
-; CHECK-NEXT: lvx v24, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 256
-; CHECK-NEXT: lvx v23, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v23, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 240
-; CHECK-NEXT: lvx v22, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v22, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 224
-; CHECK-NEXT: lvx v21, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v21, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 208
-; CHECK-NEXT: lvx v20, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v20, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: addi r1, r1, 688
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
@@ -3118,95 +2859,62 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) {
; FAST: # %bb.0:
; FAST-NEXT: mflr r0
; FAST-NEXT: stdu r1, -480(r1)
-; FAST-NEXT: std r0, 496(r1)
-; FAST-NEXT: .cfi_def_cfa_offset 480
-; FAST-NEXT: .cfi_offset lr, 16
-; FAST-NEXT: .cfi_offset r30, -160
-; FAST-NEXT: .cfi_offset f14, -144
-; FAST-NEXT: .cfi_offset f15, -136
-; FAST-NEXT: .cfi_offset f16, -128
-; FAST-NEXT: .cfi_offset f17, -120
-; FAST-NEXT: .cfi_offset f18, -112
-; FAST-NEXT: .cfi_offset f19, -104
-; FAST-NEXT: .cfi_offset f20, -96
-; FAST-NEXT: .cfi_offset f21, -88
-; FAST-NEXT: .cfi_offset f22, -80
-; FAST-NEXT: .cfi_offset f23, -72
-; FAST-NEXT: .cfi_offset f24, -64
-; FAST-NEXT: .cfi_offset f25, -56
-; FAST-NEXT: .cfi_offset f26, -48
-; FAST-NEXT: .cfi_offset f27, -40
-; FAST-NEXT: .cfi_offset f28, -32
-; FAST-NEXT: .cfi_offset f29, -24
-; FAST-NEXT: .cfi_offset f30, -16
-; FAST-NEXT: .cfi_offset f31, -8
-; FAST-NEXT: .cfi_offset v20, -352
-; FAST-NEXT: .cfi_offset v21, -336
-; FAST-NEXT: .cfi_offset v22, -320
-; FAST-NEXT: .cfi_offset v23, -304
-; FAST-NEXT: .cfi_offset v24, -288
-; FAST-NEXT: .cfi_offset v25, -272
-; FAST-NEXT: .cfi_offset v26, -256
-; FAST-NEXT: .cfi_offset v27, -240
-; FAST-NEXT: .cfi_offset v28, -224
-; FAST-NEXT: .cfi_offset v29, -208
-; FAST-NEXT: .cfi_offset v30, -192
-; FAST-NEXT: .cfi_offset v31, -176
; FAST-NEXT: li r4, 128
+; FAST-NEXT: std r0, 496(r1)
; FAST-NEXT: std r30, 320(r1) # 8-byte Folded Spill
+; FAST-NEXT: mr r30, r3
; FAST-NEXT: stfd f14, 336(r1) # 8-byte Folded Spill
-; FAST-NEXT: fmr f14, f5
; FAST-NEXT: stfd f15, 344(r1) # 8-byte Folded Spill
+; FAST-NEXT: fmr f14, f5
; FAST-NEXT: stfd f16, 352(r1) # 8-byte Folded Spill
-; FAST-NEXT: fmr f16, f4
-; FAST-NEXT: mr r30, r3
-; FAST-NEXT: stvx v20, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: stxvd2x v20, r1, r4 # 16-byte Folded Spill
; FAST-NEXT: li r4, 144
+; FAST-NEXT: fmr f16, f4
; FAST-NEXT: stfd f17, 360(r1) # 8-byte Folded Spill
; FAST-NEXT: stfd f18, 368(r1) # 8-byte Folded Spill
; FAST-NEXT: stfd f19, 376(r1) # 8-byte Folded Spill
; FAST-NEXT: stfd f20, 384(r1) # 8-byte Folded Spill
; FAST-NEXT: stfd f21, 392(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f22, 400(r1) # 8-byte Folded Spill
-; FAST-NEXT: stvx v21, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: stxvd2x v21, r1, r4 # 16-byte Folded Spill
; FAST-NEXT: li r4, 160
+; FAST-NEXT: stfd f22, 400(r1) # 8-byte Folded Spill
; FAST-NEXT: stfd f23, 408(r1) # 8-byte Folded Spill
; FAST-NEXT: stfd f24, 416(r1) # 8-byte Folded Spill
; FAST-NEXT: stfd f25, 424(r1) # 8-byte Folded Spill
; FAST-NEXT: stfd f26, 432(r1) # 8-byte Folded Spill
; FAST-NEXT: stfd f27, 440(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f28, 448(r1) # 8-byte Folded Spill
-; FAST-NEXT: stvx v22, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: stxvd2x v22, r1, r4 # 16-byte Folded Spill
; FAST-NEXT: li r4, 176
; FAST-NEXT: xxlor v22, f3, f3
+; FAST-NEXT: stfd f28, 448(r1) # 8-byte Folded Spill
; FAST-NEXT: stfd f29, 456(r1) # 8-byte Folded Spill
; FAST-NEXT: fmr f29, f9
; FAST-NEXT: stfd f30, 464(r1) # 8-byte Folded Spill
; FAST-NEXT: stfd f31, 472(r1) # 8-byte Folded Spill
-; FAST-NEXT: stvx v23, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: stxvd2x v23, r1, r4 # 16-byte Folded Spill
; FAST-NEXT: li r4, 192
; FAST-NEXT: xxlor v23, f2, f2
-; FAST-NEXT: stvx v24, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: stxvd2x v24, r1, r4 # 16-byte Folded Spill
; FAST-NEXT: li r4, 208
-; FAST-NEXT: stvx v25, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: stxvd2x v25, r1, r4 # 16-byte Folded Spill
; FAST-NEXT: li r4, 224
; FAST-NEXT: xxlor v25, f13, f13
-; FAST-NEXT: stvx v26, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: stxvd2x v26, r1, r4 # 16-byte Folded Spill
; FAST-NEXT: li r4, 240
; FAST-NEXT: xxlor v26, f12, f12
-; FAST-NEXT: stvx v27, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: stxvd2x v27, r1, r4 # 16-byte Folded Spill
; FAST-NEXT: li r4, 256
; FAST-NEXT: xxlor v27, f11, f11
-; FAST-NEXT: stvx v28, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: stxvd2x v28, r1, r4 # 16-byte Folded Spill
; FAST-NEXT: li r4, 272
; FAST-NEXT: xxlor v28, f10, f10
-; FAST-NEXT: stvx v29, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: stxvd2x v29, r1, r4 # 16-byte Folded Spill
; FAST-NEXT: li r4, 288
; FAST-NEXT: xxlor v29, f8, f8
-; FAST-NEXT: stvx v30, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: stxvd2x v30, r1, r4 # 16-byte Folded Spill
; FAST-NEXT: li r4, 304
; FAST-NEXT: xxlor v30, f7, f7
-; FAST-NEXT: stvx v31, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: stxvd2x v31, r1, r4 # 16-byte Folded Spill
; FAST-NEXT: li r4, 44
; FAST-NEXT: xxlor v31, f6, f6
; FAST-NEXT: stxsspx f1, r1, r4 # 4-byte Folded Spill
@@ -3635,30 +3343,30 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) {
; FAST-NEXT: lfd f16, 352(r1) # 8-byte Folded Reload
; FAST-NEXT: lfd f15, 344(r1) # 8-byte Folded Reload
; FAST-NEXT: lfd f14, 336(r1) # 8-byte Folded Reload
-; FAST-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
; FAST-NEXT: li r3, 288
; FAST-NEXT: ld r30, 320(r1) # 8-byte Folded Reload
-; FAST-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
; FAST-NEXT: li r3, 272
-; FAST-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload
; FAST-NEXT: li r3, 256
-; FAST-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload
; FAST-NEXT: li r3, 240
-; FAST-NEXT: lvx v27, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload
; FAST-NEXT: li r3, 224
-; FAST-NEXT: lvx v26, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload
; FAST-NEXT: li r3, 208
-; FAST-NEXT: lvx v25, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload
; FAST-NEXT: li r3, 192
-; FAST-NEXT: lvx v24, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload
; FAST-NEXT: li r3, 176
-; FAST-NEXT: lvx v23, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: lxvd2x v23, r1, r3 # 16-byte Folded Reload
; FAST-NEXT: li r3, 160
-; FAST-NEXT: lvx v22, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: lxvd2x v22, r1, r3 # 16-byte Folded Reload
; FAST-NEXT: li r3, 144
-; FAST-NEXT: lvx v21, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: lxvd2x v21, r1, r3 # 16-byte Folded Reload
; FAST-NEXT: li r3, 128
-; FAST-NEXT: lvx v20, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: lxvd2x v20, r1, r3 # 16-byte Folded Reload
; FAST-NEXT: addi r1, r1, 480
; FAST-NEXT: ld r0, 16(r1)
; FAST-NEXT: mtlr r0
@@ -3668,14 +3376,12 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) {
}
declare <32 x i64> @llvm.lrint.v32i64.v32f16(<32 x half>)
-define <1 x i64> @lrint_v1f32(<1 x float> %x) {
+define <1 x i64> @lrint_v1f32(<1 x float> %x) nounwind {
; BE-LABEL: lrint_v1f32:
; BE: # %bb.0:
; BE-NEXT: mflr r0
; BE-NEXT: stdu r1, -112(r1)
; BE-NEXT: std r0, 128(r1)
-; BE-NEXT: .cfi_def_cfa_offset 112
-; BE-NEXT: .cfi_offset lr, 16
; BE-NEXT: bl lrintf
; BE-NEXT: nop
; BE-NEXT: addi r1, r1, 112
@@ -3688,8 +3394,6 @@ define <1 x i64> @lrint_v1f32(<1 x float> %x) {
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -32(r1)
; CHECK-NEXT: std r0, 48(r1)
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: .cfi_offset lr, 16
; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
; CHECK-NEXT: addi r1, r1, 32
@@ -3707,15 +3411,13 @@ define <1 x i64> @lrint_v1f32(<1 x float> %x) {
}
declare <1 x i64> @llvm.lrint.v1i64.v1f32(<1 x float>)
-define <2 x i64> @lrint_v2f32(<2 x float> %x) {
+define <2 x i64> @lrint_v2f32(<2 x float> %x) nounwind {
; BE-LABEL: lrint_v2f32:
; BE: # %bb.0:
; BE-NEXT: mflr r0
; BE-NEXT: stdu r1, -144(r1)
-; BE-NEXT: std r0, 160(r1)
-; BE-NEXT: .cfi_def_cfa_offset 144
-; BE-NEXT: .cfi_offset lr, 16
; BE-NEXT: addi r3, r1, 112
+; BE-NEXT: std r0, 160(r1)
; BE-NEXT: stxvw4x v2, 0, r3
; BE-NEXT: lfs f1, 116(r1)
; BE-NEXT: bl lrintf
@@ -3736,14 +3438,11 @@ define <2 x i64> @lrint_v2f32(<2 x float> %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -64(r1)
-; CHECK-NEXT: std r0, 80(r1)
-; CHECK-NEXT: .cfi_def_cfa_offset 64
-; CHECK-NEXT: .cfi_offset lr, 16
-; CHECK-NEXT: .cfi_offset v31, -16
; CHECK-NEXT: xxsldwi vs0, v2, v2, 3
; CHECK-NEXT: li r3, 48
+; CHECK-NEXT: std r0, 80(r1)
; CHECK-NEXT: xscvspdpn f1, vs0
-; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: vmr v31, v2
; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
@@ -3755,7 +3454,7 @@ define <2 x i64> @lrint_v2f32(<2 x float> %x) {
; CHECK-NEXT: mtfprd f0, r3
; CHECK-NEXT: li r3, 48
; CHECK-NEXT: xxmrghd v2, vs0, v31
-; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: addi r1, r1, 64
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
@@ -3780,15 +3479,13 @@ define <2 x i64> @lrint_v2f32(<2 x float> %x) {
}
declare <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float>)
-define <4 x i64> @lrint_v4f32(<4 x float> %x) {
+define <4 x i64> @lrint_v4f32(<4 x float> %x) nounwind {
; BE-LABEL: lrint_v4f32:
; BE: # %bb.0:
; BE-NEXT: mflr r0
; BE-NEXT: stdu r1, -160(r1)
-; BE-NEXT: std r0, 176(r1)
-; BE-NEXT: .cfi_def_cfa_offset 160
-; BE-NEXT: .cfi_offset lr, 16
; BE-NEXT: addi r3, r1, 112
+; BE-NEXT: std r0, 176(r1)
; BE-NEXT: stxvw4x v2, 0, r3
; BE-NEXT: lfs f1, 116(r1)
; BE-NEXT: bl lrintf
@@ -3819,17 +3516,13 @@ define <4 x i64> @lrint_v4f32(<4 x float> %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -80(r1)
-; CHECK-NEXT: std r0, 96(r1)
-; CHECK-NEXT: .cfi_def_cfa_offset 80
-; CHECK-NEXT: .cfi_offset lr, 16
-; CHECK-NEXT: .cfi_offset v30, -32
-; CHECK-NEXT: .cfi_offset v31, -16
; CHECK-NEXT: xxsldwi vs0, v2, v2, 3
; CHECK-NEXT: li r3, 48
+; CHECK-NEXT: std r0, 96(r1)
; CHECK-NEXT: xscvspdpn f1, vs0
-; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: li r3, 64
-; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: vmr v31, v2
; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
@@ -3852,9 +3545,9 @@ define <4 x i64> @lrint_v4f32(<4 x float> %x) {
; CHECK-NEXT: li r3, 64
; CHECK-NEXT: vmr v2, v30
; CHECK-NEXT: xxmrghd v3, v31, vs0
-; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 48
-; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: addi r1, r1, 80
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
@@ -3890,15 +3583,13 @@ define <4 x i64> @lrint_v4f32(<4 x float> %x) {
}
declare <4 x i64> @llvm.lrint.v4i64.v4f32(<4 x float>)
-define <8 x i64> @lrint_v8f32(<8 x float> %x) {
+define <8 x i64> @lrint_v8f32(<8 x float> %x) nounwind {
; BE-LABEL: lrint_v8f32:
; BE: # %bb.0:
; BE-NEXT: mflr r0
; BE-NEXT: stdu r1, -208(r1)
-; BE-NEXT: std r0, 224(r1)
-; BE-NEXT: .cfi_def_cfa_offset 208
-; BE-NEXT: .cfi_offset lr, 16
; BE-NEXT: addi r3, r1, 112
+; BE-NEXT: std r0, 224(r1)
; BE-NEXT: stxvw4x v2, 0, r3
; BE-NEXT: addi r3, r1, 128
; BE-NEXT: stxvw4x v3, 0, r3
@@ -3951,24 +3642,18 @@ define <8 x i64> @lrint_v8f32(<8 x float> %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -112(r1)
-; CHECK-NEXT: std r0, 128(r1)
-; CHECK-NEXT: .cfi_def_cfa_offset 112
-; CHECK-NEXT: .cfi_offset lr, 16
-; CHECK-NEXT: .cfi_offset v28, -64
-; CHECK-NEXT: .cfi_offset v29, -48
-; CHECK-NEXT: .cfi_offset v30, -32
-; CHECK-NEXT: .cfi_offset v31, -16
; CHECK-NEXT: li r3, 48
; CHECK-NEXT: xxsldwi vs0, v2, v2, 3
-; CHECK-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: std r0, 128(r1)
+; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: li r3, 64
; CHECK-NEXT: xscvspdpn f1, vs0
-; CHECK-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: li r3, 80
-; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: li r3, 96
; CHECK-NEXT: vmr v30, v2
-; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: vmr v31, v3
; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
@@ -4014,13 +3699,13 @@ define <8 x i64> @lrint_v8f32(<8 x float> %x) {
; CHECK-NEXT: vmr v2, v29
; CHECK-NEXT: vmr v4, v28
; CHECK-NEXT: xxmrghd v5, v31, vs0
-; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 80
-; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 64
-; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 48
-; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: addi r1, r1, 112
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
@@ -4078,15 +3763,13 @@ define <8 x i64> @lrint_v8f32(<8 x float> %x) {
}
declare <8 x i64> @llvm.lrint.v8i64.v8f32(<8 x float>)
-define <16 x i64> @lrint_v16i64_v16f32(<16 x float> %x) {
+define <16 x i64> @lrint_v16i64_v16f32(<16 x float> %x) nounwind {
; BE-LABEL: lrint_v16i64_v16f32:
; BE: # %bb.0:
; BE-NEXT: mflr r0
; BE-NEXT: stdu r1, -304(r1)
-; BE-NEXT: std r0, 320(r1)
-; BE-NEXT: .cfi_def_cfa_offset 304
-; BE-NEXT: .cfi_offset lr, 16
; BE-NEXT: addi r3, r1, 112
+; BE-NEXT: std r0, 320(r1)
; BE-NEXT: stxvw4x v2, 0, r3
; BE-NEXT: addi r3, r1, 128
; BE-NEXT: stxvw4x v3, 0, r3
@@ -4183,38 +3866,28 @@ define <16 x i64> @lrint_v16i64_v16f32(<16 x float> %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -176(r1)
-; CHECK-NEXT: std r0, 192(r1)
-; CHECK-NEXT: .cfi_def_cfa_offset 176
-; CHECK-NEXT: .cfi_offset lr, 16
-; CHECK-NEXT: .cfi_offset v24, -128
-; CHECK-NEXT: .cfi_offset v25, -112
-; CHECK-NEXT: .cfi_offset v26, -96
-; CHECK-NEXT: .cfi_offset v27, -80
-; CHECK-NEXT: .cfi_offset v28, -64
-; CHECK-NEXT: .cfi_offset v29, -48
-; CHECK-NEXT: .cfi_offset v30, -32
-; CHECK-NEXT: .cfi_offset v31, -16
; CHECK-NEXT: li r3, 48
+; CHECK-NEXT: std r0, 192(r1)
; CHECK-NEXT: xxsldwi vs0, v2, v2, 3
-; CHECK-NEXT: stvx v24, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v24, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: li r3, 64
; CHECK-NEXT: xscvspdpn f1, vs0
-; CHECK-NEXT: stvx v25, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v25, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: li r3, 80
-; CHECK-NEXT: stvx v26, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v26, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: li r3, 96
; CHECK-NEXT: vmr v26, v3
-; CHECK-NEXT: stvx v27, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v27, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: li r3, 112
-; CHECK-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: li r3, 128
; CHECK-NEXT: vmr v28, v4
-; CHECK-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: li r3, 144
; CHECK-NEXT: vmr v29, v2
-; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: li r3, 160
-; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: vmr v31, v5
; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
@@ -4306,21 +3979,21 @@ define <16 x i64> @lrint_v16i64_v16f32(<16 x float> %x) {
; CHECK-NEXT: vmr v6, v25
; CHECK-NEXT: vmr v8, v24
; CHECK-NEXT: xxmrghd v9, v31, vs0
-; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 144
-; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 128
-; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 112
-; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 96
-; CHECK-NEXT: lvx v27, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 80
-; CHECK-NEXT: lvx v26, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 64
-; CHECK-NEXT: lvx v25, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 48
-; CHECK-NEXT: lvx v24, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: addi r1, r1, 176
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
@@ -4422,14 +4095,12 @@ define <16 x i64> @lrint_v16i64_v16f32(<16 x float> %x) {
}
declare <16 x i64> @llvm.lrint.v16i64.v16f32(<16 x float>)
-define <1 x i64> @lrint_v1f64(<1 x double> %x) {
+define <1 x i64> @lrint_v1f64(<1 x double> %x) nounwind {
; BE-LABEL: lrint_v1f64:
; BE: # %bb.0:
; BE-NEXT: mflr r0
; BE-NEXT: stdu r1, -112(r1)
; BE-NEXT: std r0, 128(r1)
-; BE-NEXT: .cfi_def_cfa_offset 112
-; BE-NEXT: .cfi_offset lr, 16
; BE-NEXT: bl lrint
; BE-NEXT: nop
; BE-NEXT: addi r1, r1, 112
@@ -4442,8 +4113,6 @@ define <1 x i64> @lrint_v1f64(<1 x double> %x) {
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -32(r1)
; CHECK-NEXT: std r0, 48(r1)
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: .cfi_offset lr, 16
; CHECK-NEXT: bl lrint
; CHECK-NEXT: nop
; CHECK-NEXT: addi r1, r1, 32
@@ -4461,16 +4130,13 @@ define <1 x i64> @lrint_v1f64(<1 x double> %x) {
}
declare <1 x i64> @llvm.lrint.v1i64.v1f64(<1 x double>)
-define <2 x i64> @lrint_v2f64(<2 x double> %x) {
+define <2 x i64> @lrint_v2f64(<2 x double> %x) nounwind {
; BE-LABEL: lrint_v2f64:
; BE: # %bb.0:
; BE-NEXT: mflr r0
; BE-NEXT: stdu r1, -160(r1)
-; BE-NEXT: std r0, 176(r1)
-; BE-NEXT: .cfi_def_cfa_offset 160
-; BE-NEXT: .cfi_offset lr, 16
-; BE-NEXT: .cfi_offset v31, -16
; BE-NEXT: li r3, 144
+; BE-NEXT: std r0, 176(r1)
; BE-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
; BE-NEXT: vmr v31, v2
; BE-NEXT: xxlor f1, v31, v31
@@ -4494,12 +4160,9 @@ define <2 x i64> @lrint_v2f64(<2 x double> %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -64(r1)
-; CHECK-NEXT: std r0, 80(r1)
-; CHECK-NEXT: .cfi_def_cfa_offset 64
-; CHECK-NEXT: .cfi_offset lr, 16
-; CHECK-NEXT: .cfi_offset v31, -16
; CHECK-NEXT: li r3, 48
-; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: std r0, 80(r1)
+; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: vmr v31, v2
; CHECK-NEXT: xxlor f1, v31, v31
; CHECK-NEXT: bl lrint
@@ -4511,7 +4174,7 @@ define <2 x i64> @lrint_v2f64(<2 x double> %x) {
; CHECK-NEXT: mtfprd f0, r3
; CHECK-NEXT: li r3, 48
; CHECK-NEXT: xxmrghd v2, v31, vs0
-; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: addi r1, r1, 64
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
@@ -4534,17 +4197,13 @@ define <2 x i64> @lrint_v2f64(<2 x double> %x) {
}
declare <2 x i64> @llvm.lrint.v2i64.v2f64(<2 x double>)
-define <4 x i64> @lrint_v4f64(<4 x double> %x) {
+define <4 x i64> @lrint_v4f64(<4 x double> %x) nounwind {
; BE-LABEL: lrint_v4f64:
; BE: # %bb.0:
; BE-NEXT: mflr r0
; BE-NEXT: stdu r1, -192(r1)
-; BE-NEXT: std r0, 208(r1)
-; BE-NEXT: .cfi_def_cfa_offset 192
-; BE-NEXT: .cfi_offset lr, 16
-; BE-NEXT: .cfi_offset v30, -32
-; BE-NEXT: .cfi_offset v31, -16
; BE-NEXT: li r3, 160
+; BE-NEXT: std r0, 208(r1)
; BE-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill
; BE-NEXT: vmr v30, v2
; BE-NEXT: li r3, 176
@@ -4583,17 +4242,13 @@ define <4 x i64> @lrint_v4f64(<4 x double> %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -80(r1)
-; CHECK-NEXT: std r0, 96(r1)
-; CHECK-NEXT: .cfi_def_cfa_offset 80
-; CHECK-NEXT: .cfi_offset lr, 16
-; CHECK-NEXT: .cfi_offset v30, -32
-; CHECK-NEXT: .cfi_offset v31, -16
; CHECK-NEXT: li r3, 48
-; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: std r0, 96(r1)
+; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: vmr v30, v2
; CHECK-NEXT: li r3, 64
; CHECK-NEXT: xxlor f1, v30, v30
-; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: vmr v31, v3
; CHECK-NEXT: bl lrint
; CHECK-NEXT: nop
@@ -4614,9 +4269,9 @@ define <4 x i64> @lrint_v4f64(<4 x double> %x) {
; CHECK-NEXT: li r3, 64
; CHECK-NEXT: vmr v2, v30
; CHECK-NEXT: xxmrghd v3, v31, vs0
-; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 48
-; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: addi r1, r1, 80
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
@@ -4648,25 +4303,19 @@ define <4 x i64> @lrint_v4f64(<4 x double> %x) {
}
declare <4 x i64> @llvm.lrint.v4i64.v4f64(<4 x double>)
-define <8 x i64> @lrint_v8f64(<8 x double> %x) {
+define <8 x i64> @lrint_v8f64(<8 x double> %x) nounwind {
; BE-LABEL: lrint_v8f64:
; BE: # %bb.0:
; BE-NEXT: mflr r0
; BE-NEXT: stdu r1, -256(r1)
-; BE-NEXT: std r0, 272(r1)
-; BE-NEXT: .cfi_def_cfa_offset 256
-; BE-NEXT: .cfi_offset lr, 16
-; BE-NEXT: .cfi_offset v28, -64
-; BE-NEXT: .cfi_offset v29, -48
-; BE-NEXT: .cfi_offset v30, -32
-; BE-NEXT: .cfi_offset v31, -16
; BE-NEXT: li r3, 192
+; BE-NEXT: std r0, 272(r1)
; BE-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill
; BE-NEXT: li r3, 208
; BE-NEXT: vmr v28, v2
-; BE-NEXT: xxlor f1, v28, v28
; BE-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill
; BE-NEXT: li r3, 224
+; BE-NEXT: xxlor f1, v28, v28
; BE-NEXT: vmr v29, v3
; BE-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill
; BE-NEXT: li r3, 240
@@ -4729,25 +4378,19 @@ define <8 x i64> @lrint_v8f64(<8 x double> %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -112(r1)
-; CHECK-NEXT: std r0, 128(r1)
-; CHECK-NEXT: .cfi_def_cfa_offset 112
-; CHECK-NEXT: .cfi_offset lr, 16
-; CHECK-NEXT: .cfi_offset v28, -64
-; CHECK-NEXT: .cfi_offset v29, -48
-; CHECK-NEXT: .cfi_offset v30, -32
-; CHECK-NEXT: .cfi_offset v31, -16
; CHECK-NEXT: li r3, 48
-; CHECK-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: std r0, 128(r1)
+; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: li r3, 64
; CHECK-NEXT: vmr v28, v2
-; CHECK-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: li r3, 80
; CHECK-NEXT: xxlor f1, v28, v28
; CHECK-NEXT: vmr v29, v3
-; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: li r3, 96
; CHECK-NEXT: vmr v30, v4
-; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: vmr v31, v5
; CHECK-NEXT: bl lrint
; CHECK-NEXT: nop
@@ -4788,13 +4431,13 @@ define <8 x i64> @lrint_v8f64(<8 x double> %x) {
; CHECK-NEXT: vmr v3, v29
; CHECK-NEXT: vmr v2, v28
; CHECK-NEXT: xxmrghd v5, v31, vs0
-; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 80
-; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 64
-; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 48
-; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: addi r1, r1, 112
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
@@ -4843,3 +4486,2177 @@ define <8 x i64> @lrint_v8f64(<8 x double> %x) {
ret <8 x i64> %a
}
declare <8 x i64> @llvm.lrint.v8i64.v8f64(<8 x double>)
+
+define <1 x i64> @lrint_v1f128(<1 x fp128> %x) nounwind {
+; BE-LABEL: lrint_v1f128:
+; BE: # %bb.0:
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -112(r1)
+; BE-NEXT: std r0, 128(r1)
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: addi r1, r1, 112
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+;
+; CHECK-LABEL: lrint_v1f128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mflr r0
+; CHECK-NEXT: stdu r1, -32(r1)
+; CHECK-NEXT: std r0, 48(r1)
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: addi r1, r1, 32
+; CHECK-NEXT: ld r0, 16(r1)
+; CHECK-NEXT: mtlr r0
+; CHECK-NEXT: blr
+;
+; FAST-LABEL: lrint_v1f128:
+; FAST: # %bb.0:
+; FAST-NEXT: mflr r0
+; FAST-NEXT: stdu r1, -32(r1)
+; FAST-NEXT: std r0, 48(r1)
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: addi r1, r1, 32
+; FAST-NEXT: ld r0, 16(r1)
+; FAST-NEXT: mtlr r0
+; FAST-NEXT: blr
+ %a = call <1 x i64> @llvm.lrint.v1i64.v1f128(<1 x fp128> %x)
+ ret <1 x i64> %a
+}
+declare <1 x i64> @llvm.lrint.v1i64.v1f128(<1 x fp128>)
+
+define <2 x i64> @lrint_v2f128(<2 x fp128> %x) nounwind {
+; BE-LABEL: lrint_v2f128:
+; BE: # %bb.0:
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -160(r1)
+; BE-NEXT: li r3, 144
+; BE-NEXT: std r0, 176(r1)
+; BE-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
+; BE-NEXT: vmr v31, v2
+; BE-NEXT: vmr v2, v3
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: vmr v2, v31
+; BE-NEXT: std r3, 136(r1)
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: std r3, 128(r1)
+; BE-NEXT: addi r3, r1, 128
+; BE-NEXT: lxvd2x v2, 0, r3
+; BE-NEXT: li r3, 144
+; BE-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: addi r1, r1, 160
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+;
+; CHECK-LABEL: lrint_v2f128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mflr r0
+; CHECK-NEXT: stdu r1, -80(r1)
+; CHECK-NEXT: li r3, 48
+; CHECK-NEXT: std r0, 96(r1)
+; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: li r3, 64
+; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: vmr v31, v3
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: vmr v2, v31
+; CHECK-NEXT: mtvsrd v30, r3
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: li r3, 64
+; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: li r3, 48
+; CHECK-NEXT: xxmrghd v2, vs0, v30
+; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: addi r1, r1, 80
+; CHECK-NEXT: ld r0, 16(r1)
+; CHECK-NEXT: mtlr r0
+; CHECK-NEXT: blr
+;
+; FAST-LABEL: lrint_v2f128:
+; FAST: # %bb.0:
+; FAST-NEXT: mflr r0
+; FAST-NEXT: stdu r1, -80(r1)
+; FAST-NEXT: li r3, 48
+; FAST-NEXT: std r0, 96(r1)
+; FAST-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: li r3, 64
+; FAST-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: vmr v31, v3
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: vmr v2, v31
+; FAST-NEXT: mtvsrd v30, r3
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: li r3, 64
+; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 48
+; FAST-NEXT: xxmrghd v2, vs0, v30
+; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: addi r1, r1, 80
+; FAST-NEXT: ld r0, 16(r1)
+; FAST-NEXT: mtlr r0
+; FAST-NEXT: blr
+ %a = call <2 x i64> @llvm.lrint.v2i64.v2f128(<2 x fp128> %x)
+ ret <2 x i64> %a
+}
+declare <2 x i64> @llvm.lrint.v2i64.v2f128(<2 x fp128>)
+
+define <4 x i64> @lrint_v4f128(<4 x fp128> %x) nounwind {
+; BE-LABEL: lrint_v4f128:
+; BE: # %bb.0:
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -208(r1)
+; BE-NEXT: li r3, 160
+; BE-NEXT: std r0, 224(r1)
+; BE-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill
+; BE-NEXT: li r3, 176
+; BE-NEXT: vmr v29, v2
+; BE-NEXT: vmr v2, v3
+; BE-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill
+; BE-NEXT: li r3, 192
+; BE-NEXT: vmr v30, v4
+; BE-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
+; BE-NEXT: vmr v31, v5
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: vmr v2, v29
+; BE-NEXT: std r3, 136(r1)
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: vmr v2, v31
+; BE-NEXT: std r3, 128(r1)
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: vmr v2, v30
+; BE-NEXT: std r3, 152(r1)
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: std r3, 144(r1)
+; BE-NEXT: addi r3, r1, 128
+; BE-NEXT: lxvd2x v2, 0, r3
+; BE-NEXT: addi r3, r1, 144
+; BE-NEXT: lxvd2x v3, 0, r3
+; BE-NEXT: li r3, 192
+; BE-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: li r3, 176
+; BE-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: li r3, 160
+; BE-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: addi r1, r1, 208
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+;
+; CHECK-LABEL: lrint_v4f128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mflr r0
+; CHECK-NEXT: stdu r1, -112(r1)
+; CHECK-NEXT: li r3, 48
+; CHECK-NEXT: std r0, 128(r1)
+; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: li r3, 64
+; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: li r3, 80
+; CHECK-NEXT: vmr v29, v3
+; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: li r3, 96
+; CHECK-NEXT: vmr v30, v4
+; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: vmr v31, v5
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: vmr v2, v29
+; CHECK-NEXT: mtvsrd v28, r3
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: vmr v2, v30
+; CHECK-NEXT: xxmrghd v29, vs0, v28
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: vmr v2, v31
+; CHECK-NEXT: mtvsrd v30, r3
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: li r3, 96
+; CHECK-NEXT: vmr v2, v29
+; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: li r3, 80
+; CHECK-NEXT: xxmrghd v3, vs0, v30
+; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: li r3, 64
+; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: li r3, 48
+; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: addi r1, r1, 112
+; CHECK-NEXT: ld r0, 16(r1)
+; CHECK-NEXT: mtlr r0
+; CHECK-NEXT: blr
+;
+; FAST-LABEL: lrint_v4f128:
+; FAST: # %bb.0:
+; FAST-NEXT: mflr r0
+; FAST-NEXT: stdu r1, -112(r1)
+; FAST-NEXT: li r3, 48
+; FAST-NEXT: std r0, 128(r1)
+; FAST-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: li r3, 64
+; FAST-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: li r3, 80
+; FAST-NEXT: vmr v29, v3
+; FAST-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: li r3, 96
+; FAST-NEXT: vmr v30, v4
+; FAST-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: vmr v31, v5
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: vmr v2, v29
+; FAST-NEXT: mtvsrd v28, r3
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: vmr v2, v30
+; FAST-NEXT: xxmrghd v29, vs0, v28
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: vmr v2, v31
+; FAST-NEXT: mtvsrd v30, r3
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: li r3, 96
+; FAST-NEXT: vmr v2, v29
+; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 80
+; FAST-NEXT: xxmrghd v3, vs0, v30
+; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 64
+; FAST-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 48
+; FAST-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: addi r1, r1, 112
+; FAST-NEXT: ld r0, 16(r1)
+; FAST-NEXT: mtlr r0
+; FAST-NEXT: blr
+ %a = call <4 x i64> @llvm.lrint.v4i64.v4f128(<4 x fp128> %x)
+ ret <4 x i64> %a
+}
+declare <4 x i64> @llvm.lrint.v4i64.v4f128(<4 x fp128>)
+
+define <8 x i64> @lrint_v8f128(<8 x fp128> %x) nounwind {
+; BE-LABEL: lrint_v8f128:
+; BE: # %bb.0:
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -304(r1)
+; BE-NEXT: li r3, 192
+; BE-NEXT: std r0, 320(r1)
+; BE-NEXT: stxvd2x v25, r1, r3 # 16-byte Folded Spill
+; BE-NEXT: li r3, 208
+; BE-NEXT: vmr v25, v2
+; BE-NEXT: vmr v2, v3
+; BE-NEXT: stxvd2x v26, r1, r3 # 16-byte Folded Spill
+; BE-NEXT: li r3, 224
+; BE-NEXT: vmr v26, v4
+; BE-NEXT: stxvd2x v27, r1, r3 # 16-byte Folded Spill
+; BE-NEXT: li r3, 240
+; BE-NEXT: vmr v27, v5
+; BE-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill
+; BE-NEXT: li r3, 256
+; BE-NEXT: vmr v28, v6
+; BE-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill
+; BE-NEXT: li r3, 272
+; BE-NEXT: vmr v29, v7
+; BE-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill
+; BE-NEXT: li r3, 288
+; BE-NEXT: vmr v30, v8
+; BE-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
+; BE-NEXT: vmr v31, v9
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: vmr v2, v25
+; BE-NEXT: std r3, 136(r1)
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: vmr v2, v27
+; BE-NEXT: std r3, 128(r1)
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: vmr v2, v26
+; BE-NEXT: std r3, 152(r1)
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: vmr v2, v29
+; BE-NEXT: std r3, 144(r1)
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: vmr v2, v28
+; BE-NEXT: std r3, 168(r1)
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: vmr v2, v31
+; BE-NEXT: std r3, 160(r1)
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: vmr v2, v30
+; BE-NEXT: std r3, 184(r1)
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: std r3, 176(r1)
+; BE-NEXT: addi r3, r1, 128
+; BE-NEXT: lxvd2x v2, 0, r3
+; BE-NEXT: addi r3, r1, 144
+; BE-NEXT: lxvd2x v3, 0, r3
+; BE-NEXT: addi r3, r1, 160
+; BE-NEXT: lxvd2x v4, 0, r3
+; BE-NEXT: addi r3, r1, 176
+; BE-NEXT: lxvd2x v5, 0, r3
+; BE-NEXT: li r3, 288
+; BE-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: li r3, 272
+; BE-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: li r3, 256
+; BE-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: li r3, 240
+; BE-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: li r3, 224
+; BE-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: li r3, 208
+; BE-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: li r3, 192
+; BE-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: addi r1, r1, 304
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+;
+; CHECK-LABEL: lrint_v8f128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mflr r0
+; CHECK-NEXT: stdu r1, -176(r1)
+; CHECK-NEXT: li r3, 48
+; CHECK-NEXT: std r0, 192(r1)
+; CHECK-NEXT: stxvd2x v24, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: li r3, 64
+; CHECK-NEXT: stxvd2x v25, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: li r3, 80
+; CHECK-NEXT: vmr v25, v3
+; CHECK-NEXT: stxvd2x v26, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: li r3, 96
+; CHECK-NEXT: vmr v26, v4
+; CHECK-NEXT: stxvd2x v27, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: li r3, 112
+; CHECK-NEXT: vmr v27, v5
+; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: li r3, 128
+; CHECK-NEXT: vmr v28, v6
+; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: li r3, 144
+; CHECK-NEXT: vmr v29, v7
+; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: li r3, 160
+; CHECK-NEXT: vmr v30, v8
+; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: vmr v31, v9
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: vmr v2, v25
+; CHECK-NEXT: mtvsrd v24, r3
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: vmr v2, v26
+; CHECK-NEXT: xxmrghd v25, vs0, v24
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: vmr v2, v27
+; CHECK-NEXT: mtvsrd v26, r3
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: vmr v2, v28
+; CHECK-NEXT: xxmrghd v27, vs0, v26
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: vmr v2, v29
+; CHECK-NEXT: mtvsrd v28, r3
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: vmr v2, v30
+; CHECK-NEXT: xxmrghd v29, vs0, v28
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: vmr v2, v31
+; CHECK-NEXT: mtvsrd v30, r3
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: li r3, 160
+; CHECK-NEXT: vmr v4, v29
+; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: li r3, 144
+; CHECK-NEXT: vmr v3, v27
+; CHECK-NEXT: vmr v2, v25
+; CHECK-NEXT: xxmrghd v5, vs0, v30
+; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: li r3, 128
+; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: li r3, 112
+; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: li r3, 96
+; CHECK-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: li r3, 80
+; CHECK-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: li r3, 64
+; CHECK-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: li r3, 48
+; CHECK-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: addi r1, r1, 176
+; CHECK-NEXT: ld r0, 16(r1)
+; CHECK-NEXT: mtlr r0
+; CHECK-NEXT: blr
+;
+; FAST-LABEL: lrint_v8f128:
+; FAST: # %bb.0:
+; FAST-NEXT: mflr r0
+; FAST-NEXT: stdu r1, -176(r1)
+; FAST-NEXT: li r3, 48
+; FAST-NEXT: std r0, 192(r1)
+; FAST-NEXT: stxvd2x v24, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: li r3, 64
+; FAST-NEXT: stxvd2x v25, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: li r3, 80
+; FAST-NEXT: vmr v25, v3
+; FAST-NEXT: stxvd2x v26, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: li r3, 96
+; FAST-NEXT: vmr v26, v4
+; FAST-NEXT: stxvd2x v27, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: li r3, 112
+; FAST-NEXT: vmr v27, v5
+; FAST-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: li r3, 128
+; FAST-NEXT: vmr v28, v6
+; FAST-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: li r3, 144
+; FAST-NEXT: vmr v29, v7
+; FAST-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: li r3, 160
+; FAST-NEXT: vmr v30, v8
+; FAST-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: vmr v31, v9
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: vmr v2, v25
+; FAST-NEXT: mtvsrd v24, r3
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: vmr v2, v26
+; FAST-NEXT: xxmrghd v25, vs0, v24
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: vmr v2, v27
+; FAST-NEXT: mtvsrd v26, r3
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: vmr v2, v28
+; FAST-NEXT: xxmrghd v27, vs0, v26
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: vmr v2, v29
+; FAST-NEXT: mtvsrd v28, r3
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: vmr v2, v30
+; FAST-NEXT: xxmrghd v29, vs0, v28
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: vmr v2, v31
+; FAST-NEXT: mtvsrd v30, r3
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: li r3, 160
+; FAST-NEXT: vmr v4, v29
+; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 144
+; FAST-NEXT: vmr v3, v27
+; FAST-NEXT: vmr v2, v25
+; FAST-NEXT: xxmrghd v5, vs0, v30
+; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 128
+; FAST-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 112
+; FAST-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 96
+; FAST-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 80
+; FAST-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 64
+; FAST-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 48
+; FAST-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: addi r1, r1, 176
+; FAST-NEXT: ld r0, 16(r1)
+; FAST-NEXT: mtlr r0
+; FAST-NEXT: blr
+ %a = call <8 x i64> @llvm.lrint.v8i64.v8f128(<8 x fp128> %x)
+ ret <8 x i64> %a
+}
+declare <8 x i64> @llvm.lrint.v8i64.v8f128(<8 x fp128>)
+
+define <16 x i64> @lrint_v16i64_v16f128(<16 x fp128> %x) nounwind {
+; BE-LABEL: lrint_v16i64_v16f128:
+; BE: # %bb.0:
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -496(r1)
+; BE-NEXT: li r3, 304
+; BE-NEXT: std r0, 512(r1)
+; BE-NEXT: stxvd2x v20, r1, r3 # 16-byte Folded Spill
+; BE-NEXT: li r3, 320
+; BE-NEXT: stxvd2x v21, r1, r3 # 16-byte Folded Spill
+; BE-NEXT: li r3, 336
+; BE-NEXT: vmr v21, v2
+; BE-NEXT: vmr v2, v3
+; BE-NEXT: stxvd2x v22, r1, r3 # 16-byte Folded Spill
+; BE-NEXT: li r3, 352
+; BE-NEXT: vmr v22, v4
+; BE-NEXT: stxvd2x v23, r1, r3 # 16-byte Folded Spill
+; BE-NEXT: li r3, 368
+; BE-NEXT: vmr v23, v5
+; BE-NEXT: stxvd2x v24, r1, r3 # 16-byte Folded Spill
+; BE-NEXT: li r3, 384
+; BE-NEXT: vmr v24, v6
+; BE-NEXT: stxvd2x v25, r1, r3 # 16-byte Folded Spill
+; BE-NEXT: li r3, 400
+; BE-NEXT: vmr v25, v7
+; BE-NEXT: stxvd2x v26, r1, r3 # 16-byte Folded Spill
+; BE-NEXT: li r3, 416
+; BE-NEXT: vmr v26, v8
+; BE-NEXT: stxvd2x v27, r1, r3 # 16-byte Folded Spill
+; BE-NEXT: li r3, 432
+; BE-NEXT: vmr v27, v9
+; BE-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill
+; BE-NEXT: li r3, 448
+; BE-NEXT: vmr v28, v11
+; BE-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill
+; BE-NEXT: li r3, 464
+; BE-NEXT: vmr v29, v10
+; BE-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill
+; BE-NEXT: li r3, 480
+; BE-NEXT: vmr v30, v13
+; BE-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
+; BE-NEXT: li r3, 128
+; BE-NEXT: stxvd2x v12, r1, r3 # 16-byte Folded Spill
+; BE-NEXT: addi r3, r1, 768
+; BE-NEXT: lxvw4x vs0, 0, r3
+; BE-NEXT: li r3, 160
+; BE-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; BE-NEXT: addi r3, r1, 784
+; BE-NEXT: lxvw4x vs0, 0, r3
+; BE-NEXT: li r3, 144
+; BE-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; BE-NEXT: addi r3, r1, 736
+; BE-NEXT: lxvw4x v20, 0, r3
+; BE-NEXT: addi r3, r1, 752
+; BE-NEXT: lxvw4x v31, 0, r3
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: vmr v2, v21
+; BE-NEXT: std r3, 184(r1)
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: vmr v2, v23
+; BE-NEXT: std r3, 176(r1)
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: vmr v2, v22
+; BE-NEXT: std r3, 200(r1)
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: vmr v2, v25
+; BE-NEXT: std r3, 192(r1)
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: vmr v2, v24
+; BE-NEXT: std r3, 216(r1)
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: vmr v2, v27
+; BE-NEXT: std r3, 208(r1)
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: vmr v2, v26
+; BE-NEXT: std r3, 232(r1)
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: vmr v2, v28
+; BE-NEXT: std r3, 224(r1)
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: vmr v2, v29
+; BE-NEXT: std r3, 248(r1)
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: vmr v2, v30
+; BE-NEXT: std r3, 240(r1)
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: std r3, 264(r1)
+; BE-NEXT: li r3, 128
+; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: vmr v2, v31
+; BE-NEXT: std r3, 256(r1)
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: vmr v2, v20
+; BE-NEXT: std r3, 280(r1)
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: std r3, 272(r1)
+; BE-NEXT: li r3, 144
+; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: std r3, 296(r1)
+; BE-NEXT: li r3, 160
+; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: std r3, 288(r1)
+; BE-NEXT: addi r3, r1, 176
+; BE-NEXT: lxvd2x v2, 0, r3
+; BE-NEXT: addi r3, r1, 192
+; BE-NEXT: lxvd2x v3, 0, r3
+; BE-NEXT: addi r3, r1, 208
+; BE-NEXT: lxvd2x v4, 0, r3
+; BE-NEXT: addi r3, r1, 224
+; BE-NEXT: lxvd2x v5, 0, r3
+; BE-NEXT: addi r3, r1, 240
+; BE-NEXT: lxvd2x v6, 0, r3
+; BE-NEXT: addi r3, r1, 256
+; BE-NEXT: lxvd2x v7, 0, r3
+; BE-NEXT: addi r3, r1, 272
+; BE-NEXT: lxvd2x v8, 0, r3
+; BE-NEXT: addi r3, r1, 288
+; BE-NEXT: lxvd2x v9, 0, r3
+; BE-NEXT: li r3, 480
+; BE-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: li r3, 464
+; BE-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: li r3, 448
+; BE-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: li r3, 432
+; BE-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: li r3, 416
+; BE-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: li r3, 400
+; BE-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: li r3, 384
+; BE-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: li r3, 368
+; BE-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: li r3, 352
+; BE-NEXT: lxvd2x v23, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: li r3, 336
+; BE-NEXT: lxvd2x v22, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: li r3, 320
+; BE-NEXT: lxvd2x v21, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: li r3, 304
+; BE-NEXT: lxvd2x v20, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: addi r1, r1, 496
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+;
+; CHECK-LABEL: lrint_v16i64_v16f128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mflr r0
+; CHECK-NEXT: stdu r1, -304(r1)
+; CHECK-NEXT: li r3, 112
+; CHECK-NEXT: std r0, 320(r1)
+; CHECK-NEXT: stxvd2x v20, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: li r3, 128
+; CHECK-NEXT: stxvd2x v21, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: li r3, 144
+; CHECK-NEXT: vmr v21, v4
+; CHECK-NEXT: stxvd2x v22, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: li r3, 160
+; CHECK-NEXT: vmr v22, v6
+; CHECK-NEXT: stxvd2x v23, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: li r3, 176
+; CHECK-NEXT: vmr v23, v8
+; CHECK-NEXT: stxvd2x v24, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: li r3, 192
+; CHECK-NEXT: vmr v24, v9
+; CHECK-NEXT: stxvd2x v25, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: li r3, 208
+; CHECK-NEXT: vmr v25, v7
+; CHECK-NEXT: stxvd2x v26, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: li r3, 224
+; CHECK-NEXT: vmr v26, v10
+; CHECK-NEXT: stxvd2x v27, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: li r3, 240
+; CHECK-NEXT: vmr v27, v5
+; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: li r3, 256
+; CHECK-NEXT: vmr v28, v11
+; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: li r3, 272
+; CHECK-NEXT: vmr v29, v12
+; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: li r3, 288
+; CHECK-NEXT: vmr v30, v3
+; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: li r3, 64
+; CHECK-NEXT: stxvd2x v13, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: addi r3, r1, 576
+; CHECK-NEXT: lxvd2x vs0, 0, r3
+; CHECK-NEXT: addi r3, r1, 560
+; CHECK-NEXT: lxvd2x vs1, 0, r3
+; CHECK-NEXT: addi r3, r1, 544
+; CHECK-NEXT: lxvd2x vs2, 0, r3
+; CHECK-NEXT: li r3, 96
+; CHECK-NEXT: xxswapd vs0, vs0
+; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: li r3, 80
+; CHECK-NEXT: xxswapd vs0, vs1
+; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: li r3, 48
+; CHECK-NEXT: xxswapd vs0, vs2
+; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: addi r3, r1, 528
+; CHECK-NEXT: lxvd2x vs0, 0, r3
+; CHECK-NEXT: xxswapd v31, vs0
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: vmr v2, v30
+; CHECK-NEXT: mtvsrd v20, r3
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: vmr v2, v21
+; CHECK-NEXT: xxmrghd v30, vs0, v20
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: vmr v2, v27
+; CHECK-NEXT: mtvsrd v21, r3
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: vmr v2, v22
+; CHECK-NEXT: xxmrghd v27, vs0, v21
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: vmr v2, v25
+; CHECK-NEXT: mtvsrd v22, r3
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: vmr v2, v23
+; CHECK-NEXT: xxmrghd v25, vs0, v22
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: vmr v2, v24
+; CHECK-NEXT: mtvsrd v23, r3
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: vmr v2, v26
+; CHECK-NEXT: xxmrghd v24, vs0, v23
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: vmr v2, v28
+; CHECK-NEXT: mtvsrd v26, r3
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: vmr v2, v29
+; CHECK-NEXT: xxmrghd v28, vs0, v26
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtvsrd v29, r3
+; CHECK-NEXT: li r3, 64
+; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: vmr v2, v31
+; CHECK-NEXT: xxmrghd v29, vs0, v29
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtvsrd v31, r3
+; CHECK-NEXT: li r3, 48
+; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: li r3, 80
+; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: xxmrghd v31, vs0, v31
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtvsrd v26, r3
+; CHECK-NEXT: li r3, 96
+; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: li r3, 288
+; CHECK-NEXT: vmr v8, v31
+; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: li r3, 272
+; CHECK-NEXT: vmr v2, v30
+; CHECK-NEXT: vmr v7, v29
+; CHECK-NEXT: vmr v6, v28
+; CHECK-NEXT: vmr v3, v27
+; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: li r3, 256
+; CHECK-NEXT: vmr v4, v25
+; CHECK-NEXT: vmr v5, v24
+; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: li r3, 240
+; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: li r3, 224
+; CHECK-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: li r3, 208
+; CHECK-NEXT: xxmrghd v9, vs0, v26
+; CHECK-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: li r3, 192
+; CHECK-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: li r3, 176
+; CHECK-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: li r3, 160
+; CHECK-NEXT: lxvd2x v23, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: li r3, 144
+; CHECK-NEXT: lxvd2x v22, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: li r3, 128
+; CHECK-NEXT: lxvd2x v21, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: li r3, 112
+; CHECK-NEXT: lxvd2x v20, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: addi r1, r1, 304
+; CHECK-NEXT: ld r0, 16(r1)
+; CHECK-NEXT: mtlr r0
+; CHECK-NEXT: blr
+;
+; FAST-LABEL: lrint_v16i64_v16f128:
+; FAST: # %bb.0:
+; FAST-NEXT: mflr r0
+; FAST-NEXT: stdu r1, -304(r1)
+; FAST-NEXT: li r3, 112
+; FAST-NEXT: std r0, 320(r1)
+; FAST-NEXT: stxvd2x v20, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: li r3, 128
+; FAST-NEXT: stxvd2x v21, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: li r3, 144
+; FAST-NEXT: vmr v21, v4
+; FAST-NEXT: stxvd2x v22, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: li r3, 160
+; FAST-NEXT: vmr v22, v6
+; FAST-NEXT: stxvd2x v23, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: li r3, 176
+; FAST-NEXT: vmr v23, v8
+; FAST-NEXT: stxvd2x v24, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: li r3, 192
+; FAST-NEXT: vmr v24, v9
+; FAST-NEXT: stxvd2x v25, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: li r3, 208
+; FAST-NEXT: vmr v25, v7
+; FAST-NEXT: stxvd2x v26, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: li r3, 224
+; FAST-NEXT: vmr v26, v10
+; FAST-NEXT: stxvd2x v27, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: li r3, 240
+; FAST-NEXT: vmr v27, v5
+; FAST-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: li r3, 256
+; FAST-NEXT: vmr v28, v11
+; FAST-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: li r3, 272
+; FAST-NEXT: vmr v29, v12
+; FAST-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: li r3, 288
+; FAST-NEXT: vmr v30, v3
+; FAST-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: li r3, 64
+; FAST-NEXT: stxvd2x v13, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: addi r3, r1, 576
+; FAST-NEXT: lxvd2x vs0, 0, r3
+; FAST-NEXT: addi r3, r1, 560
+; FAST-NEXT: lxvd2x vs1, 0, r3
+; FAST-NEXT: addi r3, r1, 544
+; FAST-NEXT: lxvd2x vs2, 0, r3
+; FAST-NEXT: li r3, 96
+; FAST-NEXT: xxswapd vs0, vs0
+; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: li r3, 80
+; FAST-NEXT: xxswapd vs0, vs1
+; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: li r3, 48
+; FAST-NEXT: xxswapd vs0, vs2
+; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: addi r3, r1, 528
+; FAST-NEXT: lxvd2x vs0, 0, r3
+; FAST-NEXT: xxswapd v31, vs0
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: vmr v2, v30
+; FAST-NEXT: mtvsrd v20, r3
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: vmr v2, v21
+; FAST-NEXT: xxmrghd v30, vs0, v20
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: vmr v2, v27
+; FAST-NEXT: mtvsrd v21, r3
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: vmr v2, v22
+; FAST-NEXT: xxmrghd v27, vs0, v21
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: vmr v2, v25
+; FAST-NEXT: mtvsrd v22, r3
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: vmr v2, v23
+; FAST-NEXT: xxmrghd v25, vs0, v22
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: vmr v2, v24
+; FAST-NEXT: mtvsrd v23, r3
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: vmr v2, v26
+; FAST-NEXT: xxmrghd v24, vs0, v23
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: vmr v2, v28
+; FAST-NEXT: mtvsrd v26, r3
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: vmr v2, v29
+; FAST-NEXT: xxmrghd v28, vs0, v26
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtvsrd v29, r3
+; FAST-NEXT: li r3, 64
+; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: vmr v2, v31
+; FAST-NEXT: xxmrghd v29, vs0, v29
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtvsrd v31, r3
+; FAST-NEXT: li r3, 48
+; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: li r3, 80
+; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: xxmrghd v31, vs0, v31
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtvsrd v26, r3
+; FAST-NEXT: li r3, 96
+; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: li r3, 288
+; FAST-NEXT: vmr v8, v31
+; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 272
+; FAST-NEXT: vmr v2, v30
+; FAST-NEXT: vmr v7, v29
+; FAST-NEXT: vmr v6, v28
+; FAST-NEXT: vmr v3, v27
+; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 256
+; FAST-NEXT: vmr v4, v25
+; FAST-NEXT: vmr v5, v24
+; FAST-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 240
+; FAST-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 224
+; FAST-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 208
+; FAST-NEXT: xxmrghd v9, vs0, v26
+; FAST-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 192
+; FAST-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 176
+; FAST-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 160
+; FAST-NEXT: lxvd2x v23, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 144
+; FAST-NEXT: lxvd2x v22, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 128
+; FAST-NEXT: lxvd2x v21, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 112
+; FAST-NEXT: lxvd2x v20, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: addi r1, r1, 304
+; FAST-NEXT: ld r0, 16(r1)
+; FAST-NEXT: mtlr r0
+; FAST-NEXT: blr
+ %a = call <16 x i64> @llvm.lrint.v16i64.v16f128(<16 x fp128> %x)
+ ret <16 x i64> %a
+}
+declare <16 x i64> @llvm.lrint.v16i64.v16f128(<16 x fp128>)
+
+define <32 x i64> @lrint_v32i64_v32f128(<32 x fp128> %x) nounwind {
+; BE-LABEL: lrint_v32i64_v32f128:
+; BE: # %bb.0:
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -896(r1)
+; BE-NEXT: std r0, 912(r1)
+; BE-NEXT: std r30, 880(r1) # 8-byte Folded Spill
+; BE-NEXT: mr r30, r3
+; BE-NEXT: addi r3, r1, 1440
+; BE-NEXT: li r4, 688
+; BE-NEXT: lxvw4x vs0, 0, r3
+; BE-NEXT: li r3, 256
+; BE-NEXT: stxvd2x v20, r1, r4 # 16-byte Folded Spill
+; BE-NEXT: li r4, 704
+; BE-NEXT: stxvd2x v21, r1, r4 # 16-byte Folded Spill
+; BE-NEXT: li r4, 720
+; BE-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; BE-NEXT: addi r3, r1, 1456
+; BE-NEXT: stxvd2x v22, r1, r4 # 16-byte Folded Spill
+; BE-NEXT: li r4, 736
+; BE-NEXT: lxvw4x vs0, 0, r3
+; BE-NEXT: li r3, 240
+; BE-NEXT: stxvd2x v23, r1, r4 # 16-byte Folded Spill
+; BE-NEXT: li r4, 752
+; BE-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; BE-NEXT: addi r3, r1, 1408
+; BE-NEXT: stxvd2x v24, r1, r4 # 16-byte Folded Spill
+; BE-NEXT: li r4, 768
+; BE-NEXT: lxvw4x vs0, 0, r3
+; BE-NEXT: li r3, 224
+; BE-NEXT: stxvd2x v25, r1, r4 # 16-byte Folded Spill
+; BE-NEXT: li r4, 784
+; BE-NEXT: stxvd2x v26, r1, r4 # 16-byte Folded Spill
+; BE-NEXT: li r4, 800
+; BE-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; BE-NEXT: addi r3, r1, 1424
+; BE-NEXT: stxvd2x v27, r1, r4 # 16-byte Folded Spill
+; BE-NEXT: li r4, 816
+; BE-NEXT: lxvw4x vs0, 0, r3
+; BE-NEXT: li r3, 208
+; BE-NEXT: stxvd2x v28, r1, r4 # 16-byte Folded Spill
+; BE-NEXT: li r4, 832
+; BE-NEXT: vmr v28, v2
+; BE-NEXT: vmr v2, v3
+; BE-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; BE-NEXT: addi r3, r1, 1376
+; BE-NEXT: stxvd2x v29, r1, r4 # 16-byte Folded Spill
+; BE-NEXT: li r4, 848
+; BE-NEXT: lxvw4x vs0, 0, r3
+; BE-NEXT: li r3, 192
+; BE-NEXT: stxvd2x v30, r1, r4 # 16-byte Folded Spill
+; BE-NEXT: li r4, 864
+; BE-NEXT: stxvd2x v31, r1, r4 # 16-byte Folded Spill
+; BE-NEXT: li r4, 400
+; BE-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; BE-NEXT: addi r3, r1, 1392
+; BE-NEXT: stxvd2x v13, r1, r4 # 16-byte Folded Spill
+; BE-NEXT: li r4, 416
+; BE-NEXT: lxvw4x vs0, 0, r3
+; BE-NEXT: li r3, 176
+; BE-NEXT: stxvd2x v12, r1, r4 # 16-byte Folded Spill
+; BE-NEXT: li r4, 368
+; BE-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; BE-NEXT: addi r3, r1, 1344
+; BE-NEXT: stxvd2x v11, r1, r4 # 16-byte Folded Spill
+; BE-NEXT: li r4, 384
+; BE-NEXT: lxvw4x vs0, 0, r3
+; BE-NEXT: li r3, 160
+; BE-NEXT: stxvd2x v10, r1, r4 # 16-byte Folded Spill
+; BE-NEXT: li r4, 336
+; BE-NEXT: stxvd2x v9, r1, r4 # 16-byte Folded Spill
+; BE-NEXT: li r4, 352
+; BE-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; BE-NEXT: addi r3, r1, 1360
+; BE-NEXT: stxvd2x v8, r1, r4 # 16-byte Folded Spill
+; BE-NEXT: li r4, 304
+; BE-NEXT: lxvw4x vs0, 0, r3
+; BE-NEXT: li r3, 144
+; BE-NEXT: stxvd2x v7, r1, r4 # 16-byte Folded Spill
+; BE-NEXT: li r4, 320
+; BE-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; BE-NEXT: addi r3, r1, 1312
+; BE-NEXT: stxvd2x v6, r1, r4 # 16-byte Folded Spill
+; BE-NEXT: li r4, 272
+; BE-NEXT: lxvw4x vs0, 0, r3
+; BE-NEXT: li r3, 128
+; BE-NEXT: stxvd2x v5, r1, r4 # 16-byte Folded Spill
+; BE-NEXT: li r4, 288
+; BE-NEXT: stxvd2x v4, r1, r4 # 16-byte Folded Spill
+; BE-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; BE-NEXT: addi r3, r1, 1328
+; BE-NEXT: lxvw4x v23, 0, r3
+; BE-NEXT: addi r3, r1, 1280
+; BE-NEXT: lxvw4x v22, 0, r3
+; BE-NEXT: addi r3, r1, 1296
+; BE-NEXT: lxvw4x v21, 0, r3
+; BE-NEXT: addi r3, r1, 1248
+; BE-NEXT: lxvw4x v20, 0, r3
+; BE-NEXT: addi r3, r1, 1264
+; BE-NEXT: lxvw4x v31, 0, r3
+; BE-NEXT: addi r3, r1, 1216
+; BE-NEXT: lxvw4x v30, 0, r3
+; BE-NEXT: addi r3, r1, 1232
+; BE-NEXT: lxvw4x v29, 0, r3
+; BE-NEXT: addi r3, r1, 1184
+; BE-NEXT: lxvw4x v27, 0, r3
+; BE-NEXT: addi r3, r1, 1200
+; BE-NEXT: lxvw4x v26, 0, r3
+; BE-NEXT: addi r3, r1, 1152
+; BE-NEXT: lxvw4x v25, 0, r3
+; BE-NEXT: addi r3, r1, 1168
+; BE-NEXT: lxvw4x v24, 0, r3
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: vmr v2, v28
+; BE-NEXT: std r3, 440(r1)
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: vmr v2, v24
+; BE-NEXT: std r3, 432(r1)
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: vmr v2, v25
+; BE-NEXT: std r3, 536(r1)
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: vmr v2, v26
+; BE-NEXT: std r3, 528(r1)
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: vmr v2, v27
+; BE-NEXT: std r3, 552(r1)
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: vmr v2, v29
+; BE-NEXT: std r3, 544(r1)
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: vmr v2, v30
+; BE-NEXT: std r3, 568(r1)
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: vmr v2, v31
+; BE-NEXT: std r3, 560(r1)
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: vmr v2, v20
+; BE-NEXT: std r3, 584(r1)
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: vmr v2, v21
+; BE-NEXT: std r3, 576(r1)
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: vmr v2, v22
+; BE-NEXT: std r3, 600(r1)
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: vmr v2, v23
+; BE-NEXT: std r3, 592(r1)
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: std r3, 616(r1)
+; BE-NEXT: li r3, 128
+; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: std r3, 608(r1)
+; BE-NEXT: li r3, 144
+; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: std r3, 632(r1)
+; BE-NEXT: li r3, 160
+; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: std r3, 624(r1)
+; BE-NEXT: li r3, 176
+; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: std r3, 648(r1)
+; BE-NEXT: li r3, 192
+; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: std r3, 640(r1)
+; BE-NEXT: li r3, 208
+; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: std r3, 664(r1)
+; BE-NEXT: li r3, 224
+; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: std r3, 656(r1)
+; BE-NEXT: li r3, 240
+; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: std r3, 680(r1)
+; BE-NEXT: li r3, 256
+; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: std r3, 672(r1)
+; BE-NEXT: li r3, 272
+; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: std r3, 456(r1)
+; BE-NEXT: li r3, 288
+; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: std r3, 448(r1)
+; BE-NEXT: li r3, 304
+; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: std r3, 472(r1)
+; BE-NEXT: li r3, 320
+; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: std r3, 464(r1)
+; BE-NEXT: li r3, 336
+; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: std r3, 488(r1)
+; BE-NEXT: li r3, 352
+; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: std r3, 480(r1)
+; BE-NEXT: li r3, 368
+; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: std r3, 504(r1)
+; BE-NEXT: li r3, 384
+; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: std r3, 496(r1)
+; BE-NEXT: li r3, 400
+; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: std r3, 520(r1)
+; BE-NEXT: li r3, 416
+; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: bl lrintf128
+; BE-NEXT: nop
+; BE-NEXT: std r3, 512(r1)
+; BE-NEXT: addi r3, r1, 432
+; BE-NEXT: lxvd2x vs0, 0, r3
+; BE-NEXT: addi r3, r1, 528
+; BE-NEXT: lxvd2x vs1, 0, r3
+; BE-NEXT: addi r3, r1, 544
+; BE-NEXT: lxvd2x vs2, 0, r3
+; BE-NEXT: addi r3, r1, 560
+; BE-NEXT: lxvd2x vs3, 0, r3
+; BE-NEXT: addi r3, r1, 576
+; BE-NEXT: lxvd2x vs4, 0, r3
+; BE-NEXT: addi r3, r1, 592
+; BE-NEXT: lxvd2x vs5, 0, r3
+; BE-NEXT: addi r3, r1, 608
+; BE-NEXT: lxvd2x vs6, 0, r3
+; BE-NEXT: addi r3, r1, 624
+; BE-NEXT: lxvd2x vs7, 0, r3
+; BE-NEXT: addi r3, r1, 640
+; BE-NEXT: lxvd2x vs8, 0, r3
+; BE-NEXT: addi r3, r1, 656
+; BE-NEXT: lxvd2x vs9, 0, r3
+; BE-NEXT: addi r3, r1, 672
+; BE-NEXT: lxvd2x vs10, 0, r3
+; BE-NEXT: addi r3, r1, 448
+; BE-NEXT: lxvd2x vs11, 0, r3
+; BE-NEXT: addi r3, r1, 464
+; BE-NEXT: lxvd2x vs12, 0, r3
+; BE-NEXT: addi r3, r1, 480
+; BE-NEXT: lxvd2x vs13, 0, r3
+; BE-NEXT: addi r3, r1, 496
+; BE-NEXT: lxvd2x v2, 0, r3
+; BE-NEXT: addi r3, r1, 512
+; BE-NEXT: lxvd2x v3, 0, r3
+; BE-NEXT: li r3, 80
+; BE-NEXT: stxvd2x v3, r30, r3
+; BE-NEXT: li r3, 64
+; BE-NEXT: stxvd2x v2, r30, r3
+; BE-NEXT: li r3, 48
+; BE-NEXT: stxvd2x vs13, r30, r3
+; BE-NEXT: li r3, 32
+; BE-NEXT: stxvd2x vs12, r30, r3
+; BE-NEXT: li r3, 16
+; BE-NEXT: stxvd2x vs11, r30, r3
+; BE-NEXT: li r3, 240
+; BE-NEXT: stxvd2x vs10, r30, r3
+; BE-NEXT: li r3, 224
+; BE-NEXT: stxvd2x vs9, r30, r3
+; BE-NEXT: li r3, 208
+; BE-NEXT: stxvd2x vs8, r30, r3
+; BE-NEXT: li r3, 192
+; BE-NEXT: stxvd2x vs7, r30, r3
+; BE-NEXT: li r3, 176
+; BE-NEXT: stxvd2x vs6, r30, r3
+; BE-NEXT: li r3, 160
+; BE-NEXT: stxvd2x vs5, r30, r3
+; BE-NEXT: li r3, 144
+; BE-NEXT: stxvd2x vs4, r30, r3
+; BE-NEXT: li r3, 128
+; BE-NEXT: stxvd2x vs3, r30, r3
+; BE-NEXT: li r3, 112
+; BE-NEXT: stxvd2x vs2, r30, r3
+; BE-NEXT: li r3, 96
+; BE-NEXT: stxvd2x vs1, r30, r3
+; BE-NEXT: li r3, 864
+; BE-NEXT: stxvd2x vs0, 0, r30
+; BE-NEXT: ld r30, 880(r1) # 8-byte Folded Reload
+; BE-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: li r3, 848
+; BE-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: li r3, 832
+; BE-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: li r3, 816
+; BE-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: li r3, 800
+; BE-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: li r3, 784
+; BE-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: li r3, 768
+; BE-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: li r3, 752
+; BE-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: li r3, 736
+; BE-NEXT: lxvd2x v23, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: li r3, 720
+; BE-NEXT: lxvd2x v22, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: li r3, 704
+; BE-NEXT: lxvd2x v21, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: li r3, 688
+; BE-NEXT: lxvd2x v20, r1, r3 # 16-byte Folded Reload
+; BE-NEXT: addi r1, r1, 896
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+;
+; CHECK-LABEL: lrint_v32i64_v32f128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mflr r0
+; CHECK-NEXT: stdu r1, -640(r1)
+; CHECK-NEXT: li r4, 432
+; CHECK-NEXT: std r0, 656(r1)
+; CHECK-NEXT: std r30, 624(r1) # 8-byte Folded Spill
+; CHECK-NEXT: mr r30, r3
+; CHECK-NEXT: addi r3, r1, 1184
+; CHECK-NEXT: stxvd2x v20, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: li r4, 448
+; CHECK-NEXT: lxvd2x vs0, 0, r3
+; CHECK-NEXT: addi r3, r1, 1168
+; CHECK-NEXT: stxvd2x v21, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: li r4, 464
+; CHECK-NEXT: lxvd2x vs1, 0, r3
+; CHECK-NEXT: addi r3, r1, 1152
+; CHECK-NEXT: stxvd2x v22, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: li r4, 480
+; CHECK-NEXT: lxvd2x vs2, 0, r3
+; CHECK-NEXT: addi r3, r1, 1136
+; CHECK-NEXT: stxvd2x v23, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: li r4, 496
+; CHECK-NEXT: lxvd2x vs3, 0, r3
+; CHECK-NEXT: addi r3, r1, 1120
+; CHECK-NEXT: stxvd2x v24, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: li r4, 512
+; CHECK-NEXT: lxvd2x vs4, 0, r3
+; CHECK-NEXT: addi r3, r1, 1104
+; CHECK-NEXT: vmr v24, v3
+; CHECK-NEXT: stxvd2x v25, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: li r4, 528
+; CHECK-NEXT: lxvd2x vs5, 0, r3
+; CHECK-NEXT: addi r3, r1, 1088
+; CHECK-NEXT: stxvd2x v26, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: li r4, 544
+; CHECK-NEXT: xxswapd vs0, vs0
+; CHECK-NEXT: stxvd2x v27, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: li r4, 560
+; CHECK-NEXT: stxvd2x v28, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: li r4, 576
+; CHECK-NEXT: stxvd2x v29, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: li r4, 592
+; CHECK-NEXT: stxvd2x v30, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: li r4, 608
+; CHECK-NEXT: stxvd2x v31, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: li r4, 416
+; CHECK-NEXT: stxvd2x v13, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: li r4, 400
+; CHECK-NEXT: stxvd2x v12, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: li r4, 384
+; CHECK-NEXT: stxvd2x v11, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: li r4, 352
+; CHECK-NEXT: stxvd2x v10, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: li r4, 336
+; CHECK-NEXT: stxvd2x v9, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: li r4, 304
+; CHECK-NEXT: stxvd2x v8, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: li r4, 288
+; CHECK-NEXT: stxvd2x v7, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: li r4, 256
+; CHECK-NEXT: stxvd2x v6, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: li r4, 240
+; CHECK-NEXT: stxvd2x v5, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: li r4, 224
+; CHECK-NEXT: stxvd2x v4, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: li r4, 192
+; CHECK-NEXT: stxvd2x vs0, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: li r4, 176
+; CHECK-NEXT: xxswapd vs0, vs1
+; CHECK-NEXT: stxvd2x vs0, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: li r4, 160
+; CHECK-NEXT: xxswapd vs0, vs2
+; CHECK-NEXT: stxvd2x vs0, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: li r4, 144
+; CHECK-NEXT: xxswapd vs0, vs3
+; CHECK-NEXT: stxvd2x vs0, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: li r4, 128
+; CHECK-NEXT: xxswapd vs0, vs4
+; CHECK-NEXT: stxvd2x vs0, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: li r4, 112
+; CHECK-NEXT: xxswapd vs0, vs5
+; CHECK-NEXT: stxvd2x vs0, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT: lxvd2x vs0, 0, r3
+; CHECK-NEXT: li r3, 96
+; CHECK-NEXT: xxswapd vs0, vs0
+; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: addi r3, r1, 1072
+; CHECK-NEXT: lxvd2x vs0, 0, r3
+; CHECK-NEXT: li r3, 80
+; CHECK-NEXT: xxswapd vs0, vs0
+; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: addi r3, r1, 1056
+; CHECK-NEXT: lxvd2x vs0, 0, r3
+; CHECK-NEXT: li r3, 64
+; CHECK-NEXT: xxswapd vs0, vs0
+; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: addi r3, r1, 1040
+; CHECK-NEXT: lxvd2x vs0, 0, r3
+; CHECK-NEXT: li r3, 48
+; CHECK-NEXT: xxswapd vs0, vs0
+; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: addi r3, r1, 1024
+; CHECK-NEXT: lxvd2x vs0, 0, r3
+; CHECK-NEXT: addi r3, r1, 1008
+; CHECK-NEXT: xxswapd v22, vs0
+; CHECK-NEXT: lxvd2x vs0, 0, r3
+; CHECK-NEXT: addi r3, r1, 992
+; CHECK-NEXT: xxswapd v21, vs0
+; CHECK-NEXT: lxvd2x vs0, 0, r3
+; CHECK-NEXT: addi r3, r1, 976
+; CHECK-NEXT: xxswapd v20, vs0
+; CHECK-NEXT: lxvd2x vs0, 0, r3
+; CHECK-NEXT: addi r3, r1, 960
+; CHECK-NEXT: xxswapd v31, vs0
+; CHECK-NEXT: lxvd2x vs0, 0, r3
+; CHECK-NEXT: addi r3, r1, 944
+; CHECK-NEXT: xxswapd v30, vs0
+; CHECK-NEXT: lxvd2x vs0, 0, r3
+; CHECK-NEXT: addi r3, r1, 928
+; CHECK-NEXT: xxswapd v29, vs0
+; CHECK-NEXT: lxvd2x vs0, 0, r3
+; CHECK-NEXT: addi r3, r1, 912
+; CHECK-NEXT: xxswapd v28, vs0
+; CHECK-NEXT: lxvd2x vs0, 0, r3
+; CHECK-NEXT: addi r3, r1, 896
+; CHECK-NEXT: xxswapd v27, vs0
+; CHECK-NEXT: lxvd2x vs0, 0, r3
+; CHECK-NEXT: addi r3, r1, 880
+; CHECK-NEXT: xxswapd v26, vs0
+; CHECK-NEXT: lxvd2x vs0, 0, r3
+; CHECK-NEXT: xxswapd v25, vs0
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: vmr v2, v24
+; CHECK-NEXT: mtvsrd v23, r3
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: li r3, 368
+; CHECK-NEXT: vmr v2, v25
+; CHECK-NEXT: xxmrghd vs0, vs0, v23
+; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: vmr v2, v26
+; CHECK-NEXT: mtvsrd v25, r3
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: li r3, 320
+; CHECK-NEXT: vmr v2, v27
+; CHECK-NEXT: xxmrghd vs0, vs0, v25
+; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: vmr v2, v28
+; CHECK-NEXT: mtvsrd v27, r3
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: li r3, 272
+; CHECK-NEXT: vmr v2, v29
+; CHECK-NEXT: xxmrghd vs0, vs0, v27
+; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: vmr v2, v30
+; CHECK-NEXT: mtvsrd v29, r3
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: li r3, 208
+; CHECK-NEXT: vmr v2, v31
+; CHECK-NEXT: xxmrghd vs0, vs0, v29
+; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: vmr v2, v20
+; CHECK-NEXT: mtvsrd v31, r3
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: vmr v2, v21
+; CHECK-NEXT: xxmrghd v31, vs0, v31
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: vmr v2, v22
+; CHECK-NEXT: mtvsrd v29, r3
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: li r3, 48
+; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: xxmrghd v29, vs0, v29
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtvsrd v27, r3
+; CHECK-NEXT: li r3, 64
+; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: li r3, 80
+; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: xxmrghd v27, vs0, v27
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtvsrd v25, r3
+; CHECK-NEXT: li r3, 96
+; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: li r3, 112
+; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: xxmrghd v25, vs0, v25
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtvsrd v23, r3
+; CHECK-NEXT: li r3, 128
+; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: li r3, 144
+; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: xxmrghd v23, vs0, v23
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtvsrd v22, r3
+; CHECK-NEXT: li r3, 160
+; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: li r3, 176
+; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: xxmrghd v22, vs0, v22
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtvsrd v21, r3
+; CHECK-NEXT: li r3, 192
+; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: li r3, 224
+; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: xxmrghd v21, vs0, v21
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtvsrd v20, r3
+; CHECK-NEXT: li r3, 240
+; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: li r3, 256
+; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: xxmrghd v20, vs0, v20
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtvsrd v24, r3
+; CHECK-NEXT: li r3, 288
+; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: li r3, 304
+; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: xxmrghd v24, vs0, v24
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtvsrd v26, r3
+; CHECK-NEXT: li r3, 336
+; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: li r3, 352
+; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: xxmrghd v26, vs0, v26
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtvsrd v28, r3
+; CHECK-NEXT: li r3, 384
+; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: li r3, 400
+; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: xxmrghd v28, vs0, v28
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtvsrd v30, r3
+; CHECK-NEXT: li r3, 416
+; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: bl lrintf128
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: li r3, 80
+; CHECK-NEXT: xxswapd vs1, v28
+; CHECK-NEXT: li r4, 208
+; CHECK-NEXT: xxswapd vs2, v26
+; CHECK-NEXT: xxswapd vs3, v27
+; CHECK-NEXT: xxmrghd v2, vs0, v30
+; CHECK-NEXT: xxswapd vs0, v2
+; CHECK-NEXT: stxvd2x vs0, r30, r3
+; CHECK-NEXT: li r3, 64
+; CHECK-NEXT: stxvd2x vs1, r30, r3
+; CHECK-NEXT: li r3, 48
+; CHECK-NEXT: stxvd2x vs2, r30, r3
+; CHECK-NEXT: li r3, 32
+; CHECK-NEXT: xxswapd vs0, v24
+; CHECK-NEXT: stxvd2x vs0, r30, r3
+; CHECK-NEXT: li r3, 16
+; CHECK-NEXT: xxswapd vs1, v20
+; CHECK-NEXT: stxvd2x vs1, r30, r3
+; CHECK-NEXT: li r3, 240
+; CHECK-NEXT: xxswapd vs2, v23
+; CHECK-NEXT: xxswapd vs0, v21
+; CHECK-NEXT: stxvd2x vs0, r30, r3
+; CHECK-NEXT: li r3, 224
+; CHECK-NEXT: xxswapd vs1, v22
+; CHECK-NEXT: stxvd2x vs1, r30, r3
+; CHECK-NEXT: li r3, 208
+; CHECK-NEXT: stxvd2x vs2, r30, r3
+; CHECK-NEXT: li r3, 192
+; CHECK-NEXT: xxswapd vs0, v25
+; CHECK-NEXT: stxvd2x vs0, r30, r3
+; CHECK-NEXT: li r3, 176
+; CHECK-NEXT: stxvd2x vs3, r30, r3
+; CHECK-NEXT: li r3, 160
+; CHECK-NEXT: lxvd2x vs2, r1, r4 # 16-byte Folded Reload
+; CHECK-NEXT: li r4, 272
+; CHECK-NEXT: xxswapd vs1, v29
+; CHECK-NEXT: stxvd2x vs1, r30, r3
+; CHECK-NEXT: li r3, 144
+; CHECK-NEXT: lxvd2x vs1, r1, r4 # 16-byte Folded Reload
+; CHECK-NEXT: li r4, 320
+; CHECK-NEXT: lxvd2x vs3, r1, r4 # 16-byte Folded Reload
+; CHECK-NEXT: li r4, 368
+; CHECK-NEXT: lxvd2x vs4, r1, r4 # 16-byte Folded Reload
+; CHECK-NEXT: xxswapd vs0, v31
+; CHECK-NEXT: stxvd2x vs0, r30, r3
+; CHECK-NEXT: li r3, 128
+; CHECK-NEXT: xxswapd vs2, vs2
+; CHECK-NEXT: stxvd2x vs2, r30, r3
+; CHECK-NEXT: li r3, 112
+; CHECK-NEXT: xxswapd vs1, vs1
+; CHECK-NEXT: stxvd2x vs1, r30, r3
+; CHECK-NEXT: li r3, 96
+; CHECK-NEXT: xxswapd vs3, vs3
+; CHECK-NEXT: stxvd2x vs3, r30, r3
+; CHECK-NEXT: li r3, 608
+; CHECK-NEXT: xxswapd vs4, vs4
+; CHECK-NEXT: stxvd2x vs4, 0, r30
+; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: li r3, 592
+; CHECK-NEXT: ld r30, 624(r1) # 8-byte Folded Reload
+; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: li r3, 576
+; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: li r3, 560
+; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: li r3, 544
+; CHECK-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: li r3, 528
+; CHECK-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: li r3, 512
+; CHECK-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: li r3, 496
+; CHECK-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: li r3, 480
+; CHECK-NEXT: lxvd2x v23, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: li r3, 464
+; CHECK-NEXT: lxvd2x v22, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: li r3, 448
+; CHECK-NEXT: lxvd2x v21, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: li r3, 432
+; CHECK-NEXT: lxvd2x v20, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT: addi r1, r1, 640
+; CHECK-NEXT: ld r0, 16(r1)
+; CHECK-NEXT: mtlr r0
+; CHECK-NEXT: blr
+;
+; FAST-LABEL: lrint_v32i64_v32f128:
+; FAST: # %bb.0:
+; FAST-NEXT: mflr r0
+; FAST-NEXT: stdu r1, -640(r1)
+; FAST-NEXT: li r4, 432
+; FAST-NEXT: std r0, 656(r1)
+; FAST-NEXT: std r30, 624(r1) # 8-byte Folded Spill
+; FAST-NEXT: mr r30, r3
+; FAST-NEXT: addi r3, r1, 1184
+; FAST-NEXT: stxvd2x v20, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: li r4, 448
+; FAST-NEXT: lxvd2x vs0, 0, r3
+; FAST-NEXT: addi r3, r1, 1168
+; FAST-NEXT: stxvd2x v21, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: li r4, 464
+; FAST-NEXT: lxvd2x vs1, 0, r3
+; FAST-NEXT: addi r3, r1, 1152
+; FAST-NEXT: stxvd2x v22, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: li r4, 480
+; FAST-NEXT: lxvd2x vs2, 0, r3
+; FAST-NEXT: addi r3, r1, 1136
+; FAST-NEXT: stxvd2x v23, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: li r4, 496
+; FAST-NEXT: lxvd2x vs3, 0, r3
+; FAST-NEXT: addi r3, r1, 1120
+; FAST-NEXT: stxvd2x v24, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: li r4, 512
+; FAST-NEXT: lxvd2x vs4, 0, r3
+; FAST-NEXT: addi r3, r1, 1104
+; FAST-NEXT: vmr v24, v3
+; FAST-NEXT: stxvd2x v25, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: li r4, 528
+; FAST-NEXT: lxvd2x vs5, 0, r3
+; FAST-NEXT: addi r3, r1, 1088
+; FAST-NEXT: stxvd2x v26, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: li r4, 544
+; FAST-NEXT: xxswapd vs0, vs0
+; FAST-NEXT: stxvd2x v27, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: li r4, 560
+; FAST-NEXT: stxvd2x v28, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: li r4, 576
+; FAST-NEXT: stxvd2x v29, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: li r4, 592
+; FAST-NEXT: stxvd2x v30, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: li r4, 608
+; FAST-NEXT: stxvd2x v31, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: li r4, 416
+; FAST-NEXT: stxvd2x v13, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: li r4, 400
+; FAST-NEXT: stxvd2x v12, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: li r4, 384
+; FAST-NEXT: stxvd2x v11, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: li r4, 352
+; FAST-NEXT: stxvd2x v10, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: li r4, 336
+; FAST-NEXT: stxvd2x v9, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: li r4, 304
+; FAST-NEXT: stxvd2x v8, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: li r4, 288
+; FAST-NEXT: stxvd2x v7, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: li r4, 256
+; FAST-NEXT: stxvd2x v6, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: li r4, 240
+; FAST-NEXT: stxvd2x v5, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: li r4, 224
+; FAST-NEXT: stxvd2x v4, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: li r4, 192
+; FAST-NEXT: stxvd2x vs0, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: li r4, 176
+; FAST-NEXT: xxswapd vs0, vs1
+; FAST-NEXT: stxvd2x vs0, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: li r4, 160
+; FAST-NEXT: xxswapd vs0, vs2
+; FAST-NEXT: stxvd2x vs0, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: li r4, 144
+; FAST-NEXT: xxswapd vs0, vs3
+; FAST-NEXT: stxvd2x vs0, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: li r4, 128
+; FAST-NEXT: xxswapd vs0, vs4
+; FAST-NEXT: stxvd2x vs0, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: li r4, 112
+; FAST-NEXT: xxswapd vs0, vs5
+; FAST-NEXT: stxvd2x vs0, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT: lxvd2x vs0, 0, r3
+; FAST-NEXT: li r3, 96
+; FAST-NEXT: xxswapd vs0, vs0
+; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: addi r3, r1, 1072
+; FAST-NEXT: lxvd2x vs0, 0, r3
+; FAST-NEXT: li r3, 80
+; FAST-NEXT: xxswapd vs0, vs0
+; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: addi r3, r1, 1056
+; FAST-NEXT: lxvd2x vs0, 0, r3
+; FAST-NEXT: li r3, 64
+; FAST-NEXT: xxswapd vs0, vs0
+; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: addi r3, r1, 1040
+; FAST-NEXT: lxvd2x vs0, 0, r3
+; FAST-NEXT: li r3, 48
+; FAST-NEXT: xxswapd vs0, vs0
+; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: addi r3, r1, 1024
+; FAST-NEXT: lxvd2x vs0, 0, r3
+; FAST-NEXT: addi r3, r1, 1008
+; FAST-NEXT: xxswapd v22, vs0
+; FAST-NEXT: lxvd2x vs0, 0, r3
+; FAST-NEXT: addi r3, r1, 992
+; FAST-NEXT: xxswapd v21, vs0
+; FAST-NEXT: lxvd2x vs0, 0, r3
+; FAST-NEXT: addi r3, r1, 976
+; FAST-NEXT: xxswapd v20, vs0
+; FAST-NEXT: lxvd2x vs0, 0, r3
+; FAST-NEXT: addi r3, r1, 960
+; FAST-NEXT: xxswapd v31, vs0
+; FAST-NEXT: lxvd2x vs0, 0, r3
+; FAST-NEXT: addi r3, r1, 944
+; FAST-NEXT: xxswapd v30, vs0
+; FAST-NEXT: lxvd2x vs0, 0, r3
+; FAST-NEXT: addi r3, r1, 928
+; FAST-NEXT: xxswapd v29, vs0
+; FAST-NEXT: lxvd2x vs0, 0, r3
+; FAST-NEXT: addi r3, r1, 912
+; FAST-NEXT: xxswapd v28, vs0
+; FAST-NEXT: lxvd2x vs0, 0, r3
+; FAST-NEXT: addi r3, r1, 896
+; FAST-NEXT: xxswapd v27, vs0
+; FAST-NEXT: lxvd2x vs0, 0, r3
+; FAST-NEXT: addi r3, r1, 880
+; FAST-NEXT: xxswapd v26, vs0
+; FAST-NEXT: lxvd2x vs0, 0, r3
+; FAST-NEXT: xxswapd v25, vs0
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: vmr v2, v24
+; FAST-NEXT: mtvsrd v23, r3
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: li r3, 368
+; FAST-NEXT: vmr v2, v25
+; FAST-NEXT: xxmrghd vs0, vs0, v23
+; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: vmr v2, v26
+; FAST-NEXT: mtvsrd v25, r3
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: li r3, 320
+; FAST-NEXT: vmr v2, v27
+; FAST-NEXT: xxmrghd vs0, vs0, v25
+; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: vmr v2, v28
+; FAST-NEXT: mtvsrd v27, r3
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: li r3, 272
+; FAST-NEXT: vmr v2, v29
+; FAST-NEXT: xxmrghd vs0, vs0, v27
+; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: vmr v2, v30
+; FAST-NEXT: mtvsrd v29, r3
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: li r3, 208
+; FAST-NEXT: vmr v2, v31
+; FAST-NEXT: xxmrghd vs0, vs0, v29
+; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: vmr v2, v20
+; FAST-NEXT: mtvsrd v31, r3
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: vmr v2, v21
+; FAST-NEXT: xxmrghd v31, vs0, v31
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: vmr v2, v22
+; FAST-NEXT: mtvsrd v29, r3
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: li r3, 48
+; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: xxmrghd v29, vs0, v29
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtvsrd v27, r3
+; FAST-NEXT: li r3, 64
+; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: li r3, 80
+; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: xxmrghd v27, vs0, v27
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtvsrd v25, r3
+; FAST-NEXT: li r3, 96
+; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: li r3, 112
+; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: xxmrghd v25, vs0, v25
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtvsrd v23, r3
+; FAST-NEXT: li r3, 128
+; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: li r3, 144
+; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: xxmrghd v23, vs0, v23
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtvsrd v22, r3
+; FAST-NEXT: li r3, 160
+; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: li r3, 176
+; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: xxmrghd v22, vs0, v22
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtvsrd v21, r3
+; FAST-NEXT: li r3, 192
+; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: li r3, 224
+; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: xxmrghd v21, vs0, v21
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtvsrd v20, r3
+; FAST-NEXT: li r3, 240
+; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: li r3, 256
+; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: xxmrghd v20, vs0, v20
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtvsrd v24, r3
+; FAST-NEXT: li r3, 288
+; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: li r3, 304
+; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: xxmrghd v24, vs0, v24
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtvsrd v26, r3
+; FAST-NEXT: li r3, 336
+; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: li r3, 352
+; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: xxmrghd v26, vs0, v26
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtvsrd v28, r3
+; FAST-NEXT: li r3, 384
+; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: li r3, 400
+; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: xxmrghd v28, vs0, v28
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtvsrd v30, r3
+; FAST-NEXT: li r3, 416
+; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: bl lrintf128
+; FAST-NEXT: nop
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: li r3, 80
+; FAST-NEXT: xxswapd vs1, v28
+; FAST-NEXT: li r4, 208
+; FAST-NEXT: xxswapd vs2, v26
+; FAST-NEXT: xxswapd vs3, v27
+; FAST-NEXT: xxmrghd v2, vs0, v30
+; FAST-NEXT: xxswapd vs0, v2
+; FAST-NEXT: stxvd2x vs0, r30, r3
+; FAST-NEXT: li r3, 64
+; FAST-NEXT: stxvd2x vs1, r30, r3
+; FAST-NEXT: li r3, 48
+; FAST-NEXT: stxvd2x vs2, r30, r3
+; FAST-NEXT: li r3, 32
+; FAST-NEXT: xxswapd vs0, v24
+; FAST-NEXT: stxvd2x vs0, r30, r3
+; FAST-NEXT: li r3, 16
+; FAST-NEXT: xxswapd vs1, v20
+; FAST-NEXT: stxvd2x vs1, r30, r3
+; FAST-NEXT: li r3, 240
+; FAST-NEXT: xxswapd vs2, v23
+; FAST-NEXT: xxswapd vs0, v21
+; FAST-NEXT: stxvd2x vs0, r30, r3
+; FAST-NEXT: li r3, 224
+; FAST-NEXT: xxswapd vs1, v22
+; FAST-NEXT: stxvd2x vs1, r30, r3
+; FAST-NEXT: li r3, 208
+; FAST-NEXT: stxvd2x vs2, r30, r3
+; FAST-NEXT: li r3, 192
+; FAST-NEXT: xxswapd vs0, v25
+; FAST-NEXT: stxvd2x vs0, r30, r3
+; FAST-NEXT: li r3, 176
+; FAST-NEXT: stxvd2x vs3, r30, r3
+; FAST-NEXT: li r3, 160
+; FAST-NEXT: lxvd2x vs2, r1, r4 # 16-byte Folded Reload
+; FAST-NEXT: li r4, 272
+; FAST-NEXT: xxswapd vs1, v29
+; FAST-NEXT: stxvd2x vs1, r30, r3
+; FAST-NEXT: li r3, 144
+; FAST-NEXT: lxvd2x vs1, r1, r4 # 16-byte Folded Reload
+; FAST-NEXT: li r4, 320
+; FAST-NEXT: lxvd2x vs3, r1, r4 # 16-byte Folded Reload
+; FAST-NEXT: li r4, 368
+; FAST-NEXT: lxvd2x vs4, r1, r4 # 16-byte Folded Reload
+; FAST-NEXT: xxswapd vs0, v31
+; FAST-NEXT: stxvd2x vs0, r30, r3
+; FAST-NEXT: li r3, 128
+; FAST-NEXT: xxswapd vs2, vs2
+; FAST-NEXT: stxvd2x vs2, r30, r3
+; FAST-NEXT: li r3, 112
+; FAST-NEXT: xxswapd vs1, vs1
+; FAST-NEXT: stxvd2x vs1, r30, r3
+; FAST-NEXT: li r3, 96
+; FAST-NEXT: xxswapd vs3, vs3
+; FAST-NEXT: stxvd2x vs3, r30, r3
+; FAST-NEXT: li r3, 608
+; FAST-NEXT: xxswapd vs4, vs4
+; FAST-NEXT: stxvd2x vs4, 0, r30
+; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 592
+; FAST-NEXT: ld r30, 624(r1) # 8-byte Folded Reload
+; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 576
+; FAST-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 560
+; FAST-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 544
+; FAST-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 528
+; FAST-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 512
+; FAST-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 496
+; FAST-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 480
+; FAST-NEXT: lxvd2x v23, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 464
+; FAST-NEXT: lxvd2x v22, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 448
+; FAST-NEXT: lxvd2x v21, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 432
+; FAST-NEXT: lxvd2x v20, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: addi r1, r1, 640
+; FAST-NEXT: ld r0, 16(r1)
+; FAST-NEXT: mtlr r0
+; FAST-NEXT: blr
+ %a = call <32 x i64> @llvm.lrint.v32i64.v32f128(<32 x fp128> %x)
+ ret <32 x i64> %a
+}
+declare <32 x i64> @llvm.lrint.v32i64.v32f128(<32 x fp128>)
diff --git a/llvm/test/CodeGen/RISCV/lrint-conv.ll b/llvm/test/CodeGen/RISCV/lrint-conv.ll
new file mode 100644
index 0000000000000..d3af2153588a1
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/lrint-conv.ll
@@ -0,0 +1,76 @@
+; Tests for lrint and llrint, with both i32 and i64 checked.
+
+; RUN: sed 's/ITy/i32/g' %s | llc -mtriple=riscv32 | FileCheck %s --check-prefixes=RV32
+; RUN: sed 's/ITy/i64/g' %s | llc -mtriple=riscv32 | FileCheck %s --check-prefixes=RV32
+; RUN: sed 's/ITy/i32/g' %s | llc -mtriple=riscv64 | FileCheck %s --check-prefixes=RV64
+; RUN: sed 's/ITy/i64/g' %s | llc -mtriple=riscv64 | FileCheck %s --check-prefixes=RV64
+
+; FIXME: crash
+; define ITy @test_lrint_ixx_f16(half %x) nounwind {
+; %res = tail call ITy @llvm.lrint.ITy.f16(half %x)
+; }
+
+; define ITy @test_llrint_ixx_f16(half %x) nounwind {
+; %res = tail call ITy @llvm.llrint.ITy.f16(half %x)
+; }
+
+define ITy @test_lrint_ixx_f32(float %x) nounwind {
+; RV32-LABEL: test_lrint_ixx_f32:
+; RV32: call lrintf
+;
+; RV64-LABEL: test_lrint_ixx_f32:
+; RV64: call lrintf
+ %res = tail call ITy @llvm.lrint.ITy.f32(float %x)
+ ret ITy %res
+}
+
+define ITy @test_llrint_ixx_f32(float %x) nounwind {
+; RV32-LABEL: test_llrint_ixx_f32:
+; RV32: call llrintf
+;
+; RV64-LABEL: test_llrint_ixx_f32:
+; RV64: call llrintf
+ %res = tail call ITy @llvm.llrint.ITy.f32(float %x)
+ ret ITy %res
+}
+
+define ITy @test_lrint_ixx_f64(double %x) nounwind {
+; RV32-LABEL: test_lrint_ixx_f64:
+; RV32: call lrint
+;
+; RV64-LABEL: test_lrint_ixx_f64:
+; RV64: call lrint
+ %res = tail call ITy @llvm.lrint.ITy.f64(double %x)
+ ret ITy %res
+}
+
+define ITy @test_llrint_ixx_f64(double %x) nounwind {
+; RV32-LABEL: test_llrint_ixx_f64:
+; RV32: call llrint
+;
+; RV64-LABEL: test_llrint_ixx_f64:
+; RV64: call llrint
+ %res = tail call ITy @llvm.llrint.ITy.f64(double %x)
+ ret ITy %res
+}
+
+; FIXME(#44744): incorrect libcall on riscv32
+define ITy @test_lrint_ixx_f128(fp128 %x) nounwind {
+; RV32-LABEL: test_lrint_ixx_f128:
+; RV32: call lrintl
+;
+; RV64-LABEL: test_lrint_ixx_f128:
+; RV64: call lrintl
+ %res = tail call ITy @llvm.lrint.ITy.f128(fp128 %x)
+ ret ITy %res
+}
+
+define ITy @test_llrint_ixx_f128(fp128 %x) nounwind {
+; RV32-LABEL: test_llrint_ixx_f128:
+; RV32: call llrintl
+;
+; RV64-LABEL: test_llrint_ixx_f128:
+; RV64: call llrintl
+ %res = tail call ITy @llvm.llrint.ITy.f128(fp128 %x)
+ ret ITy %res
+}
diff --git a/llvm/test/CodeGen/SPARC/lrint-conv.ll b/llvm/test/CodeGen/SPARC/lrint-conv.ll
new file mode 100644
index 0000000000000..81934114f548f
--- /dev/null
+++ b/llvm/test/CodeGen/SPARC/lrint-conv.ll
@@ -0,0 +1,68 @@
+; Tests for lrint and llrint, with both i32 and i64 checked.
+
+; RUN: sed 's/ITy/i32/g' %s | llc -mtriple=sparc | FileCheck %s --check-prefixes=SPARC32
+; RUN: sed 's/ITy/i64/g' %s | llc -mtriple=sparc | FileCheck %s --check-prefixes=SPARC32
+; RUN: sed 's/ITy/i32/g' %s | llc -mtriple=sparc64 | FileCheck %s --check-prefixes=SPARC64
+; RUN: sed 's/ITy/i64/g' %s | llc -mtriple=sparc64 | FileCheck %s --check-prefixes=SPARC64
+
+; FIXME: crash "Input type needs to be promoted!"
+; define ITy @test_lrint_ixx_f16(half %x) nounwind {
+; %res = tail call ITy @llvm.lrint.ITy.f16(half %x)
+; ret ITy %res
+; }
+
+; define ITy @test_llrint_ixx_f16(half %x) nounwind {
+; %res = tail call ITy @llvm.llrint.ITy.f16(half %x)
+; ret ITy %res
+; }
+
+define ITy @test_lrint_ixx_f32(float %x) nounwind {
+; SPARC32-LABEL: test_lrint_ixx_f32:
+; SPARC32: call lrintf
+;
+; SPARC64-LABEL: test_lrint_ixx_f32:
+; SPARC64: call lrintf
+ %res = tail call ITy @llvm.lrint.ITy.f32(float %x)
+ ret ITy %res
+}
+
+define ITy @test_llrint_ixx_f32(float %x) nounwind {
+; SPARC32-LABEL: test_llrint_ixx_f32:
+; SPARC32: call llrintf
+;
+; SPARC64-LABEL: test_llrint_ixx_f32:
+; SPARC64: call llrintf
+ %res = tail call ITy @llvm.llrint.ITy.f32(float %x)
+ ret ITy %res
+}
+
+define ITy @test_lrint_ixx_f64(double %x) nounwind {
+; SPARC32-LABEL: test_lrint_ixx_f64:
+; SPARC32: call lrint
+;
+; SPARC64-LABEL: test_lrint_ixx_f64:
+; SPARC64: call lrint
+ %res = tail call ITy @llvm.lrint.ITy.f64(double %x)
+ ret ITy %res
+}
+
+define ITy @test_llrint_ixx_f64(double %x) nounwind {
+; SPARC32-LABEL: test_llrint_ixx_f64:
+; SPARC32: call llrint
+;
+; SPARC64-LABEL: test_llrint_ixx_f64:
+; SPARC64: call llrint
+ %res = tail call ITy @llvm.llrint.ITy.f64(double %x)
+ ret ITy %res
+}
+
+; FIXME(#41838): unsupported type
+; define ITy @test_lrint_ixx_f128(fp128 %x) nounwind {
+; %res = tail call ITy @llvm.lrint.ITy.f128(fp128 %x)
+; ret ITy %res
+; }
+
+; define ITy @test_llrint_ixx_f128(fp128 %x) nounwind {
+; %res = tail call ITy @llvm.llrint.ITy.f128(fp128 %x)
+; ret ITy %res
+; }
diff --git a/llvm/test/CodeGen/WebAssembly/lrint-conv.ll b/llvm/test/CodeGen/WebAssembly/lrint-conv.ll
new file mode 100644
index 0000000000000..0571150cb3505
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/lrint-conv.ll
@@ -0,0 +1,62 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+
+; Tests for lrint and llrint, with both i32 and i64 checked.
+
+; RUN: sed 's/ITy/i32/g' %s | llc -mtriple=wasm32-unknown-unknown | FileCheck %s
+; RUN: sed 's/ITy/i64/g' %s | llc -mtriple=wasm32-unknown-unknown | FileCheck %s
+
+define ITy @test_lrint_ixx_f16(half %x) nounwind {
+; CHECK-LABEL: test_lrint_ixx_f16:
+; CHECK: call lrintf
+ %res = tail call ITy @llvm.lrint.ITy.f16(half %x)
+ ret ITy %res
+}
+
+define ITy @test_llrint_ixx_f16(half %x) nounwind {
+; CHECK-LABEL: test_llrint_ixx_f16:
+; CHECK: call llrintf
+ %res = tail call ITy @llvm.llrint.ITy.f16(half %x)
+ ret ITy %res
+}
+
+define ITy @test_lrint_ixx_f32(float %x) nounwind {
+; CHECK-LABEL: test_lrint_ixx_f32:
+; CHECK: call lrintf
+ %res = tail call ITy @llvm.lrint.ITy.f32(float %x)
+ ret ITy %res
+}
+
+define ITy @test_llrint_ixx_f32(float %x) nounwind {
+; CHECK-LABEL: test_llrint_ixx_f32:
+; CHECK: call llrintf
+ %res = tail call ITy @llvm.llrint.ITy.f32(float %x)
+ ret ITy %res
+}
+
+define ITy @test_lrint_ixx_f64(double %x) nounwind {
+; CHECK-LABEL: test_lrint_ixx_f64:
+; CHECK: call lrint
+ %res = tail call ITy @llvm.lrint.ITy.f64(double %x)
+ ret ITy %res
+}
+
+define ITy @test_llrint_ixx_f64(double %x) nounwind {
+; CHECK-LABEL: test_llrint_ixx_f64:
+; CHECK: call llrint
+ %res = tail call ITy @llvm.llrint.ITy.f64(double %x)
+ ret ITy %res
+}
+
+define ITy @test_lrint_ixx_f128(fp128 %x) nounwind {
+; CHECK-LABEL: test_lrint_ixx_f128:
+; CHECK: call lrintl
+ %res = tail call ITy @llvm.lrint.ITy.f128(fp128 %x)
+ ret ITy %res
+}
+
+define ITy @test_llrint_ixx_f128(fp128 %x) nounwind {
+; CHECK-LABEL: test_llrint_ixx_f128:
+; CHECK: call llrintl
+ %res = tail call ITy @llvm.llrint.ITy.f128(fp128 %x)
+ ret ITy %res
+}
diff --git a/llvm/test/CodeGen/X86/llrint-conv.ll b/llvm/test/CodeGen/X86/llrint-conv.ll
index 402daf80a15e8..d3eca5197a94b 100644
--- a/llvm/test/CodeGen/X86/llrint-conv.ll
+++ b/llvm/test/CodeGen/X86/llrint-conv.ll
@@ -7,14 +7,50 @@
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefixes=X64,X64-AVX
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X64,X64-AVX
-define i64 @testmsxs(float %x) {
+define i64 @testmsxh(half %x) nounwind {
+; X86-NOSSE-LABEL: testmsxh:
+; X86-NOSSE: # %bb.0: # %entry
+; X86-NOSSE-NEXT: pushl %eax
+; X86-NOSSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT: movl %eax, (%esp)
+; X86-NOSSE-NEXT: calll __extendhfsf2
+; X86-NOSSE-NEXT: fstps (%esp)
+; X86-NOSSE-NEXT: calll llrintf
+; X86-NOSSE-NEXT: popl %ecx
+; X86-NOSSE-NEXT: retl
+;
+; X86-SSE2-LABEL: testmsxh:
+; X86-SSE2: # %bb.0: # %entry
+; X86-SSE2-NEXT: pushl %eax
+; X86-SSE2-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
+; X86-SSE2-NEXT: pextrw $0, %xmm0, %eax
+; X86-SSE2-NEXT: movw %ax, (%esp)
+; X86-SSE2-NEXT: calll __extendhfsf2
+; X86-SSE2-NEXT: fstps (%esp)
+; X86-SSE2-NEXT: calll llrintf
+; X86-SSE2-NEXT: popl %ecx
+; X86-SSE2-NEXT: retl
+;
+; X64-SSE-LABEL: testmsxh:
+; X64-SSE: # %bb.0: # %entry
+; X64-SSE-NEXT: pushq %rax
+; X64-SSE-NEXT: callq __extendhfsf2 at PLT
+; X64-SSE-NEXT: callq rintf at PLT
+; X64-SSE-NEXT: callq __truncsfhf2 at PLT
+; X64-SSE-NEXT: callq __extendhfsf2 at PLT
+; X64-SSE-NEXT: cvttss2si %xmm0, %rax
+; X64-SSE-NEXT: popq %rcx
+; X64-SSE-NEXT: retq
+entry:
+ %0 = tail call i64 @llvm.llrint.f16(half %x)
+ ret i64 %0
+}
+
+define i64 @testmsxs(float %x) nounwind {
; X86-NOSSE-LABEL: testmsxs:
; X86-NOSSE: # %bb.0: # %entry
; X86-NOSSE-NEXT: pushl %ebp
-; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8
-; X86-NOSSE-NEXT: .cfi_offset %ebp, -8
; X86-NOSSE-NEXT: movl %esp, %ebp
-; X86-NOSSE-NEXT: .cfi_def_cfa_register %ebp
; X86-NOSSE-NEXT: andl $-8, %esp
; X86-NOSSE-NEXT: subl $8, %esp
; X86-NOSSE-NEXT: flds 8(%ebp)
@@ -23,16 +59,12 @@ define i64 @testmsxs(float %x) {
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NOSSE-NEXT: movl %ebp, %esp
; X86-NOSSE-NEXT: popl %ebp
-; X86-NOSSE-NEXT: .cfi_def_cfa %esp, 4
; X86-NOSSE-NEXT: retl
;
; X86-SSE2-LABEL: testmsxs:
; X86-SSE2: # %bb.0: # %entry
; X86-SSE2-NEXT: pushl %ebp
-; X86-SSE2-NEXT: .cfi_def_cfa_offset 8
-; X86-SSE2-NEXT: .cfi_offset %ebp, -8
; X86-SSE2-NEXT: movl %esp, %ebp
-; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp
; X86-SSE2-NEXT: andl $-8, %esp
; X86-SSE2-NEXT: subl $8, %esp
; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
@@ -43,16 +75,12 @@ define i64 @testmsxs(float %x) {
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SSE2-NEXT: movl %ebp, %esp
; X86-SSE2-NEXT: popl %ebp
-; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: testmsxs:
; X86-AVX: # %bb.0: # %entry
; X86-AVX-NEXT: pushl %ebp
-; X86-AVX-NEXT: .cfi_def_cfa_offset 8
-; X86-AVX-NEXT: .cfi_offset %ebp, -8
; X86-AVX-NEXT: movl %esp, %ebp
-; X86-AVX-NEXT: .cfi_def_cfa_register %ebp
; X86-AVX-NEXT: andl $-8, %esp
; X86-AVX-NEXT: subl $8, %esp
; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
@@ -63,7 +91,6 @@ define i64 @testmsxs(float %x) {
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-AVX-NEXT: movl %ebp, %esp
; X86-AVX-NEXT: popl %ebp
-; X86-AVX-NEXT: .cfi_def_cfa %esp, 4
; X86-AVX-NEXT: retl
;
; X64-SSE-LABEL: testmsxs:
@@ -80,14 +107,11 @@ entry:
ret i64 %0
}
-define i64 @testmsxd(double %x) {
+define i64 @testmsxd(double %x) nounwind {
; X86-NOSSE-LABEL: testmsxd:
; X86-NOSSE: # %bb.0: # %entry
; X86-NOSSE-NEXT: pushl %ebp
-; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8
-; X86-NOSSE-NEXT: .cfi_offset %ebp, -8
; X86-NOSSE-NEXT: movl %esp, %ebp
-; X86-NOSSE-NEXT: .cfi_def_cfa_register %ebp
; X86-NOSSE-NEXT: andl $-8, %esp
; X86-NOSSE-NEXT: subl $8, %esp
; X86-NOSSE-NEXT: fldl 8(%ebp)
@@ -96,16 +120,12 @@ define i64 @testmsxd(double %x) {
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NOSSE-NEXT: movl %ebp, %esp
; X86-NOSSE-NEXT: popl %ebp
-; X86-NOSSE-NEXT: .cfi_def_cfa %esp, 4
; X86-NOSSE-NEXT: retl
;
; X86-SSE2-LABEL: testmsxd:
; X86-SSE2: # %bb.0: # %entry
; X86-SSE2-NEXT: pushl %ebp
-; X86-SSE2-NEXT: .cfi_def_cfa_offset 8
-; X86-SSE2-NEXT: .cfi_offset %ebp, -8
; X86-SSE2-NEXT: movl %esp, %ebp
-; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp
; X86-SSE2-NEXT: andl $-8, %esp
; X86-SSE2-NEXT: subl $8, %esp
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
@@ -116,16 +136,12 @@ define i64 @testmsxd(double %x) {
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SSE2-NEXT: movl %ebp, %esp
; X86-SSE2-NEXT: popl %ebp
-; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: testmsxd:
; X86-AVX: # %bb.0: # %entry
; X86-AVX-NEXT: pushl %ebp
-; X86-AVX-NEXT: .cfi_def_cfa_offset 8
-; X86-AVX-NEXT: .cfi_offset %ebp, -8
; X86-AVX-NEXT: movl %esp, %ebp
-; X86-AVX-NEXT: .cfi_def_cfa_register %ebp
; X86-AVX-NEXT: andl $-8, %esp
; X86-AVX-NEXT: subl $8, %esp
; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
@@ -136,7 +152,6 @@ define i64 @testmsxd(double %x) {
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-AVX-NEXT: movl %ebp, %esp
; X86-AVX-NEXT: popl %ebp
-; X86-AVX-NEXT: .cfi_def_cfa %esp, 4
; X86-AVX-NEXT: retl
;
; X64-SSE-LABEL: testmsxd:
@@ -153,14 +168,11 @@ entry:
ret i64 %0
}
-define i64 @testmsll(x86_fp80 %x) {
+define i64 @testmsll(x86_fp80 %x) nounwind {
; X86-LABEL: testmsll:
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %ebp
-; X86-NEXT: .cfi_def_cfa_offset 8
-; X86-NEXT: .cfi_offset %ebp, -8
; X86-NEXT: movl %esp, %ebp
-; X86-NEXT: .cfi_def_cfa_register %ebp
; X86-NEXT: andl $-8, %esp
; X86-NEXT: subl $8, %esp
; X86-NEXT: fldt 8(%ebp)
@@ -169,7 +181,6 @@ define i64 @testmsll(x86_fp80 %x) {
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl %ebp, %esp
; X86-NEXT: popl %ebp
-; X86-NEXT: .cfi_def_cfa %esp, 4
; X86-NEXT: retl
;
; X64-LABEL: testmsll:
@@ -183,6 +194,61 @@ entry:
ret i64 %0
}
+; FIXME(#44744): incorrect libcall
+define i64 @testmslq(fp128 %x) nounwind {
+; X86-NOSSE-LABEL: testmslq:
+; X86-NOSSE: # %bb.0: # %entry
+; X86-NOSSE-NEXT: pushl %ebp
+; X86-NOSSE-NEXT: movl %esp, %ebp
+; X86-NOSSE-NEXT: andl $-16, %esp
+; X86-NOSSE-NEXT: subl $16, %esp
+; X86-NOSSE-NEXT: pushl 20(%ebp)
+; X86-NOSSE-NEXT: pushl 16(%ebp)
+; X86-NOSSE-NEXT: pushl 12(%ebp)
+; X86-NOSSE-NEXT: pushl 8(%ebp)
+; X86-NOSSE-NEXT: calll llrintl
+; X86-NOSSE-NEXT: addl $16, %esp
+; X86-NOSSE-NEXT: movl %ebp, %esp
+; X86-NOSSE-NEXT: popl %ebp
+; X86-NOSSE-NEXT: retl
+;
+; X86-SSE2-LABEL: testmslq:
+; X86-SSE2: # %bb.0: # %entry
+; X86-SSE2-NEXT: pushl %ebp
+; X86-SSE2-NEXT: movl %esp, %ebp
+; X86-SSE2-NEXT: andl $-16, %esp
+; X86-SSE2-NEXT: subl $16, %esp
+; X86-SSE2-NEXT: pushl 20(%ebp)
+; X86-SSE2-NEXT: pushl 16(%ebp)
+; X86-SSE2-NEXT: pushl 12(%ebp)
+; X86-SSE2-NEXT: pushl 8(%ebp)
+; X86-SSE2-NEXT: calll llrintl
+; X86-SSE2-NEXT: addl $16, %esp
+; X86-SSE2-NEXT: movl %ebp, %esp
+; X86-SSE2-NEXT: popl %ebp
+; X86-SSE2-NEXT: retl
+;
+; X86-AVX-LABEL: testmslq:
+; X86-AVX: # %bb.0: # %entry
+; X86-AVX-NEXT: pushl %ebp
+; X86-AVX-NEXT: movl %esp, %ebp
+; X86-AVX-NEXT: andl $-16, %esp
+; X86-AVX-NEXT: subl $32, %esp
+; X86-AVX-NEXT: vmovups 8(%ebp), %xmm0
+; X86-AVX-NEXT: vmovups %xmm0, (%esp)
+; X86-AVX-NEXT: calll llrintl
+; X86-AVX-NEXT: movl %ebp, %esp
+; X86-AVX-NEXT: popl %ebp
+; X86-AVX-NEXT: retl
+;
+; X64-LABEL: testmslq:
+; X64: # %bb.0: # %entry
+; X64-NEXT: jmp llrintl at PLT # TAILCALL
+entry:
+ %0 = tail call i64 @llvm.llrint.fp128(fp128 %x)
+ ret i64 %0
+}
+
declare i64 @llvm.llrint.f32(float) nounwind readnone
declare i64 @llvm.llrint.f64(double) nounwind readnone
declare i64 @llvm.llrint.f80(x86_fp80) nounwind readnone
diff --git a/llvm/test/CodeGen/X86/lrint-conv-i32.ll b/llvm/test/CodeGen/X86/lrint-conv-i32.ll
index 21580f53ec9b3..3c50aea1095f4 100644
--- a/llvm/test/CodeGen/X86/lrint-conv-i32.ll
+++ b/llvm/test/CodeGen/X86/lrint-conv-i32.ll
@@ -7,16 +7,21 @@
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefixes=X64,X64-AVX
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X64,X64-AVX
-define i32 @testmsws(float %x) {
+; FIXME: crash
+; define i32 @testmswh(half %x) nounwind {
+; entry:
+; %0 = tail call i32 @llvm.lrint.i32.f16(half %x)
+; ret i32 %0
+; }
+
+define i32 @testmsws(float %x) nounwind {
; X86-NOSSE-LABEL: testmsws:
; X86-NOSSE: # %bb.0: # %entry
; X86-NOSSE-NEXT: pushl %eax
-; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8
; X86-NOSSE-NEXT: flds {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: fistpl (%esp)
; X86-NOSSE-NEXT: movl (%esp), %eax
; X86-NOSSE-NEXT: popl %ecx
-; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4
; X86-NOSSE-NEXT: retl
;
; X86-SSE2-LABEL: testmsws:
@@ -43,16 +48,14 @@ entry:
ret i32 %0
}
-define i32 @testmswd(double %x) {
+define i32 @testmswd(double %x) nounwind {
; X86-NOSSE-LABEL: testmswd:
; X86-NOSSE: # %bb.0: # %entry
; X86-NOSSE-NEXT: pushl %eax
-; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8
; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: fistpl (%esp)
; X86-NOSSE-NEXT: movl (%esp), %eax
; X86-NOSSE-NEXT: popl %ecx
-; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4
; X86-NOSSE-NEXT: retl
;
; X86-SSE2-LABEL: testmswd:
@@ -79,16 +82,14 @@ entry:
ret i32 %0
}
-define i32 @testmsll(x86_fp80 %x) {
+define i32 @testmsll(x86_fp80 %x) nounwind {
; X86-LABEL: testmsll:
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %eax
-; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: fldt {{[0-9]+}}(%esp)
; X86-NEXT: fistpl (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: popl %ecx
-; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
;
; X64-LABEL: testmsll:
@@ -102,6 +103,61 @@ entry:
ret i32 %0
}
+; FIXME(#44744): incorrect libcall
+define i32 @testmswq(fp128 %x) nounwind {
+; X86-NOSSE-LABEL: testmswq:
+; X86-NOSSE: # %bb.0: # %entry
+; X86-NOSSE-NEXT: pushl %ebp
+; X86-NOSSE-NEXT: movl %esp, %ebp
+; X86-NOSSE-NEXT: andl $-16, %esp
+; X86-NOSSE-NEXT: subl $16, %esp
+; X86-NOSSE-NEXT: pushl 20(%ebp)
+; X86-NOSSE-NEXT: pushl 16(%ebp)
+; X86-NOSSE-NEXT: pushl 12(%ebp)
+; X86-NOSSE-NEXT: pushl 8(%ebp)
+; X86-NOSSE-NEXT: calll lrintl
+; X86-NOSSE-NEXT: addl $16, %esp
+; X86-NOSSE-NEXT: movl %ebp, %esp
+; X86-NOSSE-NEXT: popl %ebp
+; X86-NOSSE-NEXT: retl
+;
+; X86-SSE2-LABEL: testmswq:
+; X86-SSE2: # %bb.0: # %entry
+; X86-SSE2-NEXT: pushl %ebp
+; X86-SSE2-NEXT: movl %esp, %ebp
+; X86-SSE2-NEXT: andl $-16, %esp
+; X86-SSE2-NEXT: subl $16, %esp
+; X86-SSE2-NEXT: pushl 20(%ebp)
+; X86-SSE2-NEXT: pushl 16(%ebp)
+; X86-SSE2-NEXT: pushl 12(%ebp)
+; X86-SSE2-NEXT: pushl 8(%ebp)
+; X86-SSE2-NEXT: calll lrintl
+; X86-SSE2-NEXT: addl $16, %esp
+; X86-SSE2-NEXT: movl %ebp, %esp
+; X86-SSE2-NEXT: popl %ebp
+; X86-SSE2-NEXT: retl
+;
+; X86-AVX-LABEL: testmswq:
+; X86-AVX: # %bb.0: # %entry
+; X86-AVX-NEXT: pushl %ebp
+; X86-AVX-NEXT: movl %esp, %ebp
+; X86-AVX-NEXT: andl $-16, %esp
+; X86-AVX-NEXT: subl $32, %esp
+; X86-AVX-NEXT: vmovups 8(%ebp), %xmm0
+; X86-AVX-NEXT: vmovups %xmm0, (%esp)
+; X86-AVX-NEXT: calll lrintl
+; X86-AVX-NEXT: movl %ebp, %esp
+; X86-AVX-NEXT: popl %ebp
+; X86-AVX-NEXT: retl
+;
+; X64-LABEL: testmswq:
+; X64: # %bb.0: # %entry
+; X64-NEXT: jmp lrintl at PLT # TAILCALL
+entry:
+ %0 = tail call i32 @llvm.lrint.i32.f128(fp128 %x)
+ ret i32 %0
+}
+
declare i32 @llvm.lrint.i32.f32(float) nounwind readnone
declare i32 @llvm.lrint.i32.f64(double) nounwind readnone
declare i32 @llvm.lrint.i32.f80(x86_fp80) nounwind readnone
diff --git a/llvm/test/CodeGen/X86/lrint-conv-i64.ll b/llvm/test/CodeGen/X86/lrint-conv-i64.ll
index 38fa09085e189..2ba1500df0b6e 100644
--- a/llvm/test/CodeGen/X86/lrint-conv-i64.ll
+++ b/llvm/test/CodeGen/X86/lrint-conv-i64.ll
@@ -3,7 +3,23 @@
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefixes=CHECK,AVX
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=CHECK,AVX
-define i64 @testmsxs(float %x) {
+define i64 @testmsxh(half %x) nounwind {
+; SSE-LABEL: testmsxh:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: pushq %rax
+; SSE-NEXT: callq __extendhfsf2 at PLT
+; SSE-NEXT: callq rintf at PLT
+; SSE-NEXT: callq __truncsfhf2 at PLT
+; SSE-NEXT: callq __extendhfsf2 at PLT
+; SSE-NEXT: cvttss2si %xmm0, %rax
+; SSE-NEXT: popq %rcx
+; SSE-NEXT: retq
+entry:
+ %0 = tail call i64 @llvm.lrint.i64.f16(half %x)
+ ret i64 %0
+}
+
+define i64 @testmsxs(float %x) nounwind {
; SSE-LABEL: testmsxs:
; SSE: # %bb.0: # %entry
; SSE-NEXT: cvtss2si %xmm0, %rax
@@ -18,7 +34,7 @@ entry:
ret i64 %0
}
-define i64 @testmsxd(double %x) {
+define i64 @testmsxd(double %x) nounwind {
; SSE-LABEL: testmsxd:
; SSE: # %bb.0: # %entry
; SSE-NEXT: cvtsd2si %xmm0, %rax
@@ -33,7 +49,7 @@ entry:
ret i64 %0
}
-define i64 @testmsll(x86_fp80 %x) {
+define i64 @testmsll(x86_fp80 %x) nounwind {
; CHECK-LABEL: testmsll:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
@@ -45,7 +61,17 @@ entry:
ret i64 %0
}
-define i32 @PR125324(float %x) {
+; FIXME(#44744): incorrect libcall
+define i64 @testmsxq(fp128 %x) nounwind {
+; CHECK-LABEL: testmsxq:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: jmp lrintl at PLT # TAILCALL
+entry:
+ %0 = tail call i64 @llvm.lrint.i64.f128(fp128 %x)
+ ret i64 %0
+}
+
+define i32 @PR125324(float %x) nounwind {
; SSE-LABEL: PR125324:
; SSE: # %bb.0: # %entry
; SSE-NEXT: cvtss2si %xmm0, %rax
diff --git a/llvm/test/CodeGen/X86/vector-llrint-f16.ll b/llvm/test/CodeGen/X86/vector-llrint-f16.ll
index 5e5c5849fc22e..eb7be61b719f2 100644
--- a/llvm/test/CodeGen/X86/vector-llrint-f16.ll
+++ b/llvm/test/CodeGen/X86/vector-llrint-f16.ll
@@ -1,10 +1,13 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; FIXME: crash "Do not know how to split the result of this operator!"
+; SKIP: sed 's/XRINT/lrint/g' %s | llc -mtriple=i686-unknown | FileCheck %s --check-prefix=X86
+; SKIP: sed 's/XRINT/llrint/g' %s | llc -mtriple=i686-unknown | FileCheck %s --check-prefix=X86
; RUN: sed 's/XRINT/lrint/g' %s | llc -mtriple=x86_64-unknown -mattr=avx2,f16c | FileCheck %s --check-prefix=AVX
; RUN: sed 's/XRINT/llrint/g' %s | llc -mtriple=x86_64-unknown -mattr=avx2,f16c | FileCheck %s --check-prefix=AVX
; RUN: sed 's/XRINT/lrint/g' %s | llc -mtriple=x86_64-unknown -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefix=FP16
; RUN: sed 's/XRINT/llrint/g' %s | llc -mtriple=x86_64-unknown -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefix=FP16
-define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) {
+define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) nounwind {
; AVX-LABEL: llrint_v1i64_v1f16:
; AVX: # %bb.0:
; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
@@ -22,7 +25,7 @@ define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) {
ret <1 x i64> %a
}
-define <2 x i64> @llrint_v2i64_v2f16(<2 x half> %x) {
+define <2 x i64> @llrint_v2i64_v2f16(<2 x half> %x) nounwind {
; AVX-LABEL: llrint_v2i64_v2f16:
; AVX: # %bb.0:
; AVX-NEXT: vcvtph2ps %xmm0, %xmm1
@@ -49,7 +52,7 @@ define <2 x i64> @llrint_v2i64_v2f16(<2 x half> %x) {
ret <2 x i64> %a
}
-define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) {
+define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) nounwind {
; AVX-LABEL: llrint_v4i64_v4f16:
; AVX: # %bb.0:
; AVX-NEXT: vpsrlq $48, %xmm0, %xmm1
@@ -92,7 +95,7 @@ define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) {
ret <4 x i64> %a
}
-define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) {
+define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) nounwind {
; AVX-LABEL: llrint_v8i64_v8f16:
; AVX: # %bb.0:
; AVX-NEXT: vpsrlq $48, %xmm0, %xmm1
@@ -167,7 +170,7 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) {
ret <8 x i64> %a
}
-define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) {
+define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) nounwind {
; AVX-LABEL: llrint_v16i64_v16f16:
; AVX: # %bb.0:
; AVX-NEXT: vmovdqa %ymm0, %ymm2
@@ -307,7 +310,7 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) {
ret <16 x i64> %a
}
-define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
+define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) nounwind {
; AVX-LABEL: llrint_v32i64_v32f16:
; AVX: # %bb.0:
; AVX-NEXT: movq %rdi, %rax
diff --git a/llvm/test/CodeGen/X86/vector-llrint.ll b/llvm/test/CodeGen/X86/vector-llrint.ll
index 7017eb60df41d..6fd1a35505aac 100644
--- a/llvm/test/CodeGen/X86/vector-llrint.ll
+++ b/llvm/test/CodeGen/X86/vector-llrint.ll
@@ -1,10 +1,25 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=X86
; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=SSE
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefixes=AVX,AVX1
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=AVX,AVX512
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512dq,avx512vl | FileCheck %s --check-prefixes=AVX512DQ
-define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) {
+define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) nounwind {
+; X86-LABEL: llrint_v1i64_v1f32:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: andl $-8, %esp
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: flds 8(%ebp)
+; X86-NEXT: fistpll (%esp)
+; X86-NEXT: movl (%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+;
; SSE-LABEL: llrint_v1i64_v1f32:
; SSE: # %bb.0:
; SSE-NEXT: cvtss2si %xmm0, %rax
@@ -24,7 +39,34 @@ define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) {
}
declare <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float>)
-define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) {
+define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) nounwind {
+; X86-LABEL: llrint_v2i64_v2f32:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: andl $-8, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: flds 16(%ebp)
+; X86-NEXT: flds 12(%ebp)
+; X86-NEXT: fistpll (%esp)
+; X86-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NEXT: movl (%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, 12(%eax)
+; X86-NEXT: movl %esi, 8(%eax)
+; X86-NEXT: movl %edx, 4(%eax)
+; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: leal -8(%ebp), %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl $4
+;
; SSE-LABEL: llrint_v2i64_v2f32:
; SSE: # %bb.0:
; SSE-NEXT: cvtss2si %xmm0, %rax
@@ -55,7 +97,54 @@ define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) {
}
declare <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float>)
-define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) {
+define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) nounwind {
+; X86-LABEL: llrint_v4i64_v4f32:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: andl $-8, %esp
+; X86-NEXT: subl $56, %esp
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: flds 24(%ebp)
+; X86-NEXT: flds 20(%ebp)
+; X86-NEXT: flds 16(%ebp)
+; X86-NEXT: flds 12(%ebp)
+; X86-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %esi, 28(%eax)
+; X86-NEXT: movl %ecx, 24(%eax)
+; X86-NEXT: movl %edx, 20(%eax)
+; X86-NEXT: movl %ebx, 16(%eax)
+; X86-NEXT: movl %edi, 12(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 8(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 4(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: leal -12(%ebp), %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl $4
+;
; SSE-LABEL: llrint_v4i64_v4f32:
; SSE: # %bb.0:
; SSE-NEXT: cvtss2si %xmm0, %rax
@@ -121,7 +210,94 @@ define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) {
}
declare <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float>)
-define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) {
+define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) nounwind {
+; X86-LABEL: llrint_v8i64_v8f32:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: andl $-8, %esp
+; X86-NEXT: subl $120, %esp
+; X86-NEXT: flds 12(%ebp)
+; X86-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NEXT: flds 16(%ebp)
+; X86-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NEXT: flds 20(%ebp)
+; X86-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NEXT: flds 24(%ebp)
+; X86-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NEXT: flds 28(%ebp)
+; X86-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NEXT: flds 32(%ebp)
+; X86-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NEXT: flds 36(%ebp)
+; X86-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NEXT: flds 40(%ebp)
+; X86-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl %ebx, 60(%eax)
+; X86-NEXT: movl %ecx, 56(%eax)
+; X86-NEXT: movl %edx, 52(%eax)
+; X86-NEXT: movl %esi, 48(%eax)
+; X86-NEXT: movl %edi, 44(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 40(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 36(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 32(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 28(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 24(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 20(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 16(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 8(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 4(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: leal -12(%ebp), %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl $4
+;
; SSE-LABEL: llrint_v8i64_v8f32:
; SSE: # %bb.0:
; SSE-NEXT: movaps %xmm0, %xmm2
@@ -235,7 +411,174 @@ define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) {
}
declare <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float>)
-define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) {
+define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) nounwind {
+; X86-LABEL: llrint_v16i64_v16f32:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: andl $-8, %esp
+; X86-NEXT: subl $248, %esp
+; X86-NEXT: flds 12(%ebp)
+; X86-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NEXT: flds 16(%ebp)
+; X86-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NEXT: flds 20(%ebp)
+; X86-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NEXT: flds 24(%ebp)
+; X86-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NEXT: flds 28(%ebp)
+; X86-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NEXT: flds 32(%ebp)
+; X86-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NEXT: flds 36(%ebp)
+; X86-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NEXT: flds 40(%ebp)
+; X86-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NEXT: flds 44(%ebp)
+; X86-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NEXT: flds 48(%ebp)
+; X86-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NEXT: flds 52(%ebp)
+; X86-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NEXT: flds 56(%ebp)
+; X86-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NEXT: flds 60(%ebp)
+; X86-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NEXT: flds 64(%ebp)
+; X86-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NEXT: flds 68(%ebp)
+; X86-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NEXT: flds 72(%ebp)
+; X86-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl %ebx, 124(%eax)
+; X86-NEXT: movl %ecx, 120(%eax)
+; X86-NEXT: movl %edx, 116(%eax)
+; X86-NEXT: movl %esi, 112(%eax)
+; X86-NEXT: movl %edi, 108(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 104(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 100(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 96(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 92(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 88(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 84(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 80(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 76(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 72(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 68(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 64(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 60(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 56(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 52(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 48(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 44(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 40(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 36(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 32(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 28(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 24(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 20(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 16(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 8(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 4(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: leal -12(%ebp), %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl $4
+;
; SSE-LABEL: llrint_v16i64_v16f32:
; SSE: # %bb.0:
; SSE-NEXT: movq %rdi, %rax
@@ -451,7 +794,21 @@ define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) {
}
declare <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float>)
-define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) {
+define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) nounwind {
+; X86-LABEL: llrint_v1i64_v1f64:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: andl $-8, %esp
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: fldl 8(%ebp)
+; X86-NEXT: fistpll (%esp)
+; X86-NEXT: movl (%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+;
; SSE-LABEL: llrint_v1i64_v1f64:
; SSE: # %bb.0:
; SSE-NEXT: cvtsd2si %xmm0, %rax
@@ -471,7 +828,34 @@ define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) {
}
declare <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double>)
-define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) {
+define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) nounwind {
+; X86-LABEL: llrint_v2i64_v2f64:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: andl $-8, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: fldl 20(%ebp)
+; X86-NEXT: fldl 12(%ebp)
+; X86-NEXT: fistpll (%esp)
+; X86-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NEXT: movl (%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, 12(%eax)
+; X86-NEXT: movl %esi, 8(%eax)
+; X86-NEXT: movl %edx, 4(%eax)
+; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: leal -8(%ebp), %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl $4
+;
; SSE-LABEL: llrint_v2i64_v2f64:
; SSE: # %bb.0:
; SSE-NEXT: cvtsd2si %xmm0, %rax
@@ -502,7 +886,54 @@ define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) {
}
declare <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double>)
-define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) {
+define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) nounwind {
+; X86-LABEL: llrint_v4i64_v4f64:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: andl $-8, %esp
+; X86-NEXT: subl $56, %esp
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: fldl 36(%ebp)
+; X86-NEXT: fldl 28(%ebp)
+; X86-NEXT: fldl 20(%ebp)
+; X86-NEXT: fldl 12(%ebp)
+; X86-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %esi, 28(%eax)
+; X86-NEXT: movl %ecx, 24(%eax)
+; X86-NEXT: movl %edx, 20(%eax)
+; X86-NEXT: movl %ebx, 16(%eax)
+; X86-NEXT: movl %edi, 12(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 8(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 4(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: leal -12(%ebp), %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl $4
+;
; SSE-LABEL: llrint_v4i64_v4f64:
; SSE: # %bb.0:
; SSE-NEXT: cvtsd2si %xmm0, %rax
@@ -566,7 +997,94 @@ define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) {
}
declare <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double>)
-define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) {
+define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) nounwind {
+; X86-LABEL: llrint_v8i64_v8f64:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: andl $-8, %esp
+; X86-NEXT: subl $120, %esp
+; X86-NEXT: fldl 12(%ebp)
+; X86-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NEXT: fldl 20(%ebp)
+; X86-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NEXT: fldl 28(%ebp)
+; X86-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NEXT: fldl 36(%ebp)
+; X86-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NEXT: fldl 44(%ebp)
+; X86-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NEXT: fldl 52(%ebp)
+; X86-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NEXT: fldl 60(%ebp)
+; X86-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NEXT: fldl 68(%ebp)
+; X86-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl %ebx, 60(%eax)
+; X86-NEXT: movl %ecx, 56(%eax)
+; X86-NEXT: movl %edx, 52(%eax)
+; X86-NEXT: movl %esi, 48(%eax)
+; X86-NEXT: movl %edi, 44(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 40(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 36(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 32(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 28(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 24(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 20(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 16(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 8(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 4(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: leal -12(%ebp), %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl $4
+;
; SSE-LABEL: llrint_v8i64_v8f64:
; SSE: # %bb.0:
; SSE-NEXT: cvtsd2si %xmm0, %rax
@@ -673,3 +1191,655 @@ define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) {
ret <8 x i64> %a
}
declare <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double>)
+
+define <1 x i64> @llrint_v1i64_v1f128(<1 x fp128> %x) nounwind {
+; X86-LABEL: llrint_v1i64_v1f128:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: pushl 20(%ebp)
+; X86-NEXT: pushl 16(%ebp)
+; X86-NEXT: pushl 12(%ebp)
+; X86-NEXT: pushl 8(%ebp)
+; X86-NEXT: calll llrintl
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+;
+; SSE-LABEL: llrint_v1i64_v1f128:
+; SSE: # %bb.0:
+; SSE-NEXT: pushq %rax
+; SSE-NEXT: callq llrintl at PLT
+; SSE-NEXT: popq %rcx
+; SSE-NEXT: retq
+;
+; AVX-LABEL: llrint_v1i64_v1f128:
+; AVX: # %bb.0:
+; AVX-NEXT: pushq %rax
+; AVX-NEXT: callq llrintl at PLT
+; AVX-NEXT: popq %rcx
+; AVX-NEXT: retq
+;
+; AVX512DQ-LABEL: llrint_v1i64_v1f128:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: pushq %rax
+; AVX512DQ-NEXT: callq llrintl at PLT
+; AVX512DQ-NEXT: popq %rcx
+; AVX512DQ-NEXT: retq
+ %a = call <1 x i64> @llvm.llrint.v1i64.v1f128(<1 x fp128> %x)
+ ret <1 x i64> %a
+}
+declare <1 x i64> @llvm.llrint.v1i64.v1f128(<1 x fp128>)
+
+define <2 x i64> @llrint_v2i64_v2f128(<2 x fp128> %x) nounwind {
+; X86-LABEL: llrint_v2i64_v2f128:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 8(%ebp), %esi
+; X86-NEXT: pushl 24(%ebp)
+; X86-NEXT: pushl 20(%ebp)
+; X86-NEXT: pushl 16(%ebp)
+; X86-NEXT: pushl 12(%ebp)
+; X86-NEXT: calll llrintl
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: pushl 40(%ebp)
+; X86-NEXT: pushl 36(%ebp)
+; X86-NEXT: pushl 32(%ebp)
+; X86-NEXT: pushl 28(%ebp)
+; X86-NEXT: calll llrintl
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: movl %edx, 12(%esi)
+; X86-NEXT: movl %eax, 8(%esi)
+; X86-NEXT: movl %ebx, 4(%esi)
+; X86-NEXT: movl %edi, (%esi)
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: leal -12(%ebp), %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl $4
+;
+; SSE-LABEL: llrint_v2i64_v2f128:
+; SSE: # %bb.0:
+; SSE-NEXT: subq $40, %rsp
+; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: callq llrintl at PLT
+; SSE-NEXT: movq %rax, %xmm0
+; SSE-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
+; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT: callq llrintl at PLT
+; SSE-NEXT: movq %rax, %xmm0
+; SSE-NEXT: punpcklqdq (%rsp), %xmm0 # 16-byte Folded Reload
+; SSE-NEXT: # xmm0 = xmm0[0],mem[0]
+; SSE-NEXT: addq $40, %rsp
+; SSE-NEXT: retq
+;
+; AVX-LABEL: llrint_v2i64_v2f128:
+; AVX: # %bb.0:
+; AVX-NEXT: subq $40, %rsp
+; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX-NEXT: vmovaps %xmm1, %xmm0
+; AVX-NEXT: callq llrintl at PLT
+; AVX-NEXT: vmovq %rax, %xmm0
+; AVX-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX-NEXT: callq llrintl at PLT
+; AVX-NEXT: vmovq %rax, %xmm0
+; AVX-NEXT: vpunpcklqdq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
+; AVX-NEXT: addq $40, %rsp
+; AVX-NEXT: retq
+;
+; AVX512DQ-LABEL: llrint_v2i64_v2f128:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: subq $40, %rsp
+; AVX512DQ-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-NEXT: vmovaps %xmm1, %xmm0
+; AVX512DQ-NEXT: callq llrintl at PLT
+; AVX512DQ-NEXT: vmovq %rax, %xmm0
+; AVX512DQ-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; AVX512DQ-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512DQ-NEXT: callq llrintl at PLT
+; AVX512DQ-NEXT: vmovq %rax, %xmm0
+; AVX512DQ-NEXT: vpunpcklqdq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512DQ-NEXT: # xmm0 = xmm0[0],mem[0]
+; AVX512DQ-NEXT: addq $40, %rsp
+; AVX512DQ-NEXT: retq
+ %a = call <2 x i64> @llvm.llrint.v2i64.v2f128(<2 x fp128> %x)
+ ret <2 x i64> %a
+}
+declare <2 x i64> @llvm.llrint.v2i64.v2f128(<2 x fp128>)
+
+define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) nounwind {
+; X86-LABEL: llrint_v4i64_v4f128:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $32, %esp
+; X86-NEXT: movl 8(%ebp), %esi
+; X86-NEXT: movl 36(%ebp), %edi
+; X86-NEXT: movl 40(%ebp), %ebx
+; X86-NEXT: pushl 24(%ebp)
+; X86-NEXT: pushl 20(%ebp)
+; X86-NEXT: pushl 16(%ebp)
+; X86-NEXT: pushl 12(%ebp)
+; X86-NEXT: calll llrintl
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl 32(%ebp)
+; X86-NEXT: pushl 28(%ebp)
+; X86-NEXT: calll llrintl
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: pushl 56(%ebp)
+; X86-NEXT: pushl 52(%ebp)
+; X86-NEXT: pushl 48(%ebp)
+; X86-NEXT: pushl 44(%ebp)
+; X86-NEXT: calll llrintl
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: pushl 72(%ebp)
+; X86-NEXT: pushl 68(%ebp)
+; X86-NEXT: pushl 64(%ebp)
+; X86-NEXT: pushl 60(%ebp)
+; X86-NEXT: calll llrintl
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: movl %edx, 28(%esi)
+; X86-NEXT: movl %eax, 24(%esi)
+; X86-NEXT: movl %ebx, 20(%esi)
+; X86-NEXT: movl %edi, 16(%esi)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, 12(%esi)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, 8(%esi)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, 4(%esi)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, (%esi)
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: leal -12(%ebp), %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl $4
+;
+; SSE-LABEL: llrint_v4i64_v4f128:
+; SSE: # %bb.0:
+; SSE-NEXT: subq $72, %rsp
+; SSE-NEXT: movaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT: movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: callq llrintl at PLT
+; SSE-NEXT: movq %rax, %xmm0
+; SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; SSE-NEXT: callq llrintl at PLT
+; SSE-NEXT: movq %rax, %xmm0
+; SSE-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; SSE-NEXT: # xmm0 = xmm0[0],mem[0]
+; SSE-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
+; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT: callq llrintl at PLT
+; SSE-NEXT: movq %rax, %xmm0
+; SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT: callq llrintl at PLT
+; SSE-NEXT: movq %rax, %xmm1
+; SSE-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
+; SSE-NEXT: # xmm1 = xmm1[0],mem[0]
+; SSE-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; SSE-NEXT: addq $72, %rsp
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: llrint_v4i64_v4f128:
+; AVX1: # %bb.0:
+; AVX1-NEXT: subq $72, %rsp
+; AVX1-NEXT: vmovaps %xmm2, (%rsp) # 16-byte Spill
+; AVX1-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX1-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX1-NEXT: vmovaps %xmm3, %xmm0
+; AVX1-NEXT: callq llrintl at PLT
+; AVX1-NEXT: vmovq %rax, %xmm0
+; AVX1-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX1-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; AVX1-NEXT: callq llrintl at PLT
+; AVX1-NEXT: vmovq %rax, %xmm0
+; AVX1-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX1-NEXT: # xmm0 = xmm0[0],mem[0]
+; AVX1-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; AVX1-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX1-NEXT: callq llrintl at PLT
+; AVX1-NEXT: vmovq %rax, %xmm0
+; AVX1-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX1-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX1-NEXT: callq llrintl at PLT
+; AVX1-NEXT: vmovq %rax, %xmm0
+; AVX1-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX1-NEXT: # xmm0 = xmm0[0],mem[0]
+; AVX1-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
+; AVX1-NEXT: addq $72, %rsp
+; AVX1-NEXT: retq
+;
+; AVX512-LABEL: llrint_v4i64_v4f128:
+; AVX512: # %bb.0:
+; AVX512-NEXT: subq $72, %rsp
+; AVX512-NEXT: vmovaps %xmm2, (%rsp) # 16-byte Spill
+; AVX512-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vmovaps %xmm3, %xmm0
+; AVX512-NEXT: callq llrintl at PLT
+; AVX512-NEXT: vmovq %rax, %xmm0
+; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; AVX512-NEXT: callq llrintl at PLT
+; AVX512-NEXT: vmovq %rax, %xmm0
+; AVX512-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0]
+; AVX512-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; AVX512-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-NEXT: callq llrintl at PLT
+; AVX512-NEXT: vmovq %rax, %xmm0
+; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-NEXT: callq llrintl at PLT
+; AVX512-NEXT: vmovq %rax, %xmm0
+; AVX512-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0]
+; AVX512-NEXT: vinserti128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
+; AVX512-NEXT: addq $72, %rsp
+; AVX512-NEXT: retq
+;
+; AVX512DQ-LABEL: llrint_v4i64_v4f128:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: subq $72, %rsp
+; AVX512DQ-NEXT: vmovaps %xmm2, (%rsp) # 16-byte Spill
+; AVX512DQ-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-NEXT: vmovaps %xmm3, %xmm0
+; AVX512DQ-NEXT: callq llrintl at PLT
+; AVX512DQ-NEXT: vmovq %rax, %xmm0
+; AVX512DQ-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; AVX512DQ-NEXT: callq llrintl at PLT
+; AVX512DQ-NEXT: vmovq %rax, %xmm0
+; AVX512DQ-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512DQ-NEXT: # xmm0 = xmm0[0],mem[0]
+; AVX512DQ-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; AVX512DQ-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512DQ-NEXT: callq llrintl at PLT
+; AVX512DQ-NEXT: vmovq %rax, %xmm0
+; AVX512DQ-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512DQ-NEXT: callq llrintl at PLT
+; AVX512DQ-NEXT: vmovq %rax, %xmm0
+; AVX512DQ-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512DQ-NEXT: # xmm0 = xmm0[0],mem[0]
+; AVX512DQ-NEXT: vinserti128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
+; AVX512DQ-NEXT: addq $72, %rsp
+; AVX512DQ-NEXT: retq
+ %a = call <4 x i64> @llvm.llrint.v4i64.v4f128(<4 x fp128> %x)
+ ret <4 x i64> %a
+}
+declare <4 x i64> @llvm.llrint.v4i64.v4f128(<4 x fp128>)
+
+define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) nounwind {
+; X86-LABEL: llrint_v8i64_v8f128:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $64, %esp
+; X86-NEXT: movl 8(%ebp), %esi
+; X86-NEXT: movl 36(%ebp), %edi
+; X86-NEXT: movl 40(%ebp), %ebx
+; X86-NEXT: pushl 24(%ebp)
+; X86-NEXT: pushl 20(%ebp)
+; X86-NEXT: pushl 16(%ebp)
+; X86-NEXT: pushl 12(%ebp)
+; X86-NEXT: calll llrintl
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl 32(%ebp)
+; X86-NEXT: pushl 28(%ebp)
+; X86-NEXT: calll llrintl
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: pushl 56(%ebp)
+; X86-NEXT: pushl 52(%ebp)
+; X86-NEXT: pushl 48(%ebp)
+; X86-NEXT: pushl 44(%ebp)
+; X86-NEXT: calll llrintl
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: pushl 72(%ebp)
+; X86-NEXT: pushl 68(%ebp)
+; X86-NEXT: pushl 64(%ebp)
+; X86-NEXT: pushl 60(%ebp)
+; X86-NEXT: calll llrintl
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: pushl 88(%ebp)
+; X86-NEXT: pushl 84(%ebp)
+; X86-NEXT: pushl 80(%ebp)
+; X86-NEXT: pushl 76(%ebp)
+; X86-NEXT: calll llrintl
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: pushl 104(%ebp)
+; X86-NEXT: pushl 100(%ebp)
+; X86-NEXT: pushl 96(%ebp)
+; X86-NEXT: pushl 92(%ebp)
+; X86-NEXT: calll llrintl
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: pushl 120(%ebp)
+; X86-NEXT: pushl 116(%ebp)
+; X86-NEXT: pushl 112(%ebp)
+; X86-NEXT: pushl 108(%ebp)
+; X86-NEXT: calll llrintl
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: pushl 136(%ebp)
+; X86-NEXT: pushl 132(%ebp)
+; X86-NEXT: pushl 128(%ebp)
+; X86-NEXT: pushl 124(%ebp)
+; X86-NEXT: calll llrintl
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: movl %edx, 60(%esi)
+; X86-NEXT: movl %eax, 56(%esi)
+; X86-NEXT: movl %ebx, 52(%esi)
+; X86-NEXT: movl %edi, 48(%esi)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, 44(%esi)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, 40(%esi)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, 36(%esi)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, 32(%esi)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, 28(%esi)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, 24(%esi)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, 20(%esi)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, 16(%esi)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, 12(%esi)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, 8(%esi)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, 4(%esi)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, (%esi)
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: leal -12(%ebp), %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl $4
+;
+; SSE-LABEL: llrint_v8i64_v8f128:
+; SSE: # %bb.0:
+; SSE-NEXT: subq $136, %rsp
+; SSE-NEXT: movaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT: movaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT: movaps %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT: movaps %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT: movaps %xmm3, (%rsp) # 16-byte Spill
+; SSE-NEXT: movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: callq llrintl at PLT
+; SSE-NEXT: movq %rax, %xmm0
+; SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT: callq llrintl at PLT
+; SSE-NEXT: movq %rax, %xmm0
+; SSE-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; SSE-NEXT: # xmm0 = xmm0[0],mem[0]
+; SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; SSE-NEXT: callq llrintl at PLT
+; SSE-NEXT: movq %rax, %xmm0
+; SSE-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
+; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT: callq llrintl at PLT
+; SSE-NEXT: movq %rax, %xmm0
+; SSE-NEXT: punpcklqdq (%rsp), %xmm0 # 16-byte Folded Reload
+; SSE-NEXT: # xmm0 = xmm0[0],mem[0]
+; SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT: callq llrintl at PLT
+; SSE-NEXT: movq %rax, %xmm0
+; SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT: callq llrintl at PLT
+; SSE-NEXT: movq %rax, %xmm0
+; SSE-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; SSE-NEXT: # xmm0 = xmm0[0],mem[0]
+; SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT: callq llrintl at PLT
+; SSE-NEXT: movq %rax, %xmm0
+; SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT: callq llrintl at PLT
+; SSE-NEXT: movq %rax, %xmm3
+; SSE-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 # 16-byte Folded Reload
+; SSE-NEXT: # xmm3 = xmm3[0],mem[0]
+; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
+; SSE-NEXT: addq $136, %rsp
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: llrint_v8i64_v8f128:
+; AVX1: # %bb.0:
+; AVX1-NEXT: subq $152, %rsp
+; AVX1-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX1-NEXT: vmovaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX1-NEXT: vmovaps %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX1-NEXT: vmovaps %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX1-NEXT: vmovaps %xmm2, (%rsp) # 16-byte Spill
+; AVX1-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX1-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX1-NEXT: vmovaps %xmm3, %xmm0
+; AVX1-NEXT: callq llrintl at PLT
+; AVX1-NEXT: vmovq %rax, %xmm0
+; AVX1-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX1-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; AVX1-NEXT: callq llrintl at PLT
+; AVX1-NEXT: vmovq %rax, %xmm0
+; AVX1-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX1-NEXT: # xmm0 = xmm0[0],mem[0]
+; AVX1-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; AVX1-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX1-NEXT: callq llrintl at PLT
+; AVX1-NEXT: vmovq %rax, %xmm0
+; AVX1-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX1-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX1-NEXT: callq llrintl at PLT
+; AVX1-NEXT: vmovq %rax, %xmm0
+; AVX1-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX1-NEXT: # xmm0 = xmm0[0],mem[0]
+; AVX1-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
+; AVX1-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; AVX1-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: callq llrintl at PLT
+; AVX1-NEXT: vmovq %rax, %xmm0
+; AVX1-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX1-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX1-NEXT: callq llrintl at PLT
+; AVX1-NEXT: vmovq %rax, %xmm0
+; AVX1-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX1-NEXT: # xmm0 = xmm0[0],mem[0]
+; AVX1-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX1-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX1-NEXT: callq llrintl at PLT
+; AVX1-NEXT: vmovq %rax, %xmm0
+; AVX1-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX1-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX1-NEXT: callq llrintl at PLT
+; AVX1-NEXT: vmovq %rax, %xmm0
+; AVX1-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX1-NEXT: # xmm0 = xmm0[0],mem[0]
+; AVX1-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm1 # 16-byte Folded Reload
+; AVX1-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
+; AVX1-NEXT: addq $152, %rsp
+; AVX1-NEXT: retq
+;
+; AVX512-LABEL: llrint_v8i64_v8f128:
+; AVX512: # %bb.0:
+; AVX512-NEXT: subq $152, %rsp
+; AVX512-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill
+; AVX512-NEXT: vmovaps %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vmovaps %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vmovaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vmovaps %xmm7, %xmm0
+; AVX512-NEXT: callq llrintl at PLT
+; AVX512-NEXT: vmovq %rax, %xmm0
+; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; AVX512-NEXT: callq llrintl at PLT
+; AVX512-NEXT: vmovq %rax, %xmm0
+; AVX512-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0]
+; AVX512-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; AVX512-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-NEXT: callq llrintl at PLT
+; AVX512-NEXT: vmovq %rax, %xmm0
+; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-NEXT: callq llrintl at PLT
+; AVX512-NEXT: vmovq %rax, %xmm0
+; AVX512-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0]
+; AVX512-NEXT: vinserti128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
+; AVX512-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; AVX512-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: callq llrintl at PLT
+; AVX512-NEXT: vmovq %rax, %xmm0
+; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-NEXT: callq llrintl at PLT
+; AVX512-NEXT: vmovq %rax, %xmm0
+; AVX512-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0]
+; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-NEXT: callq llrintl at PLT
+; AVX512-NEXT: vmovq %rax, %xmm0
+; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-NEXT: callq llrintl at PLT
+; AVX512-NEXT: vmovq %rax, %xmm0
+; AVX512-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0]
+; AVX512-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; AVX512-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 32-byte Folded Reload
+; AVX512-NEXT: addq $152, %rsp
+; AVX512-NEXT: retq
+;
+; AVX512DQ-LABEL: llrint_v8i64_v8f128:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: subq $152, %rsp
+; AVX512DQ-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill
+; AVX512DQ-NEXT: vmovaps %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-NEXT: vmovaps %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-NEXT: vmovaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-NEXT: vmovaps %xmm7, %xmm0
+; AVX512DQ-NEXT: callq llrintl at PLT
+; AVX512DQ-NEXT: vmovq %rax, %xmm0
+; AVX512DQ-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; AVX512DQ-NEXT: callq llrintl at PLT
+; AVX512DQ-NEXT: vmovq %rax, %xmm0
+; AVX512DQ-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512DQ-NEXT: # xmm0 = xmm0[0],mem[0]
+; AVX512DQ-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; AVX512DQ-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512DQ-NEXT: callq llrintl at PLT
+; AVX512DQ-NEXT: vmovq %rax, %xmm0
+; AVX512DQ-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512DQ-NEXT: callq llrintl at PLT
+; AVX512DQ-NEXT: vmovq %rax, %xmm0
+; AVX512DQ-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512DQ-NEXT: # xmm0 = xmm0[0],mem[0]
+; AVX512DQ-NEXT: vinserti128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
+; AVX512DQ-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; AVX512DQ-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512DQ-NEXT: vzeroupper
+; AVX512DQ-NEXT: callq llrintl at PLT
+; AVX512DQ-NEXT: vmovq %rax, %xmm0
+; AVX512DQ-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512DQ-NEXT: callq llrintl at PLT
+; AVX512DQ-NEXT: vmovq %rax, %xmm0
+; AVX512DQ-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512DQ-NEXT: # xmm0 = xmm0[0],mem[0]
+; AVX512DQ-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512DQ-NEXT: callq llrintl at PLT
+; AVX512DQ-NEXT: vmovq %rax, %xmm0
+; AVX512DQ-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512DQ-NEXT: callq llrintl at PLT
+; AVX512DQ-NEXT: vmovq %rax, %xmm0
+; AVX512DQ-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512DQ-NEXT: # xmm0 = xmm0[0],mem[0]
+; AVX512DQ-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; AVX512DQ-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 32-byte Folded Reload
+; AVX512DQ-NEXT: addq $152, %rsp
+; AVX512DQ-NEXT: retq
+ %a = call <8 x i64> @llvm.llrint.v8i64.v8f128(<8 x fp128> %x)
+ ret <8 x i64> %a
+}
+declare <8 x i64> @llvm.llrint.v8i64.v8f128(<8 x fp128>)
diff --git a/llvm/test/CodeGen/X86/vector-lrint-f16.ll b/llvm/test/CodeGen/X86/vector-lrint-f16.ll
index 1316f808aa27e..fa3aeb09eae6f 100644
--- a/llvm/test/CodeGen/X86/vector-lrint-f16.ll
+++ b/llvm/test/CodeGen/X86/vector-lrint-f16.ll
@@ -8,7 +8,7 @@
; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=x86_64-unknown -mattr=avx2,f16c | FileCheck %s --check-prefixes=X64-AVX-I32
; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=x86_64-unknown -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefixes=X64-FP16-I32
-define <1 x iXLen> @lrint_v1f16(<1 x half> %x) {
+define <1 x iXLen> @lrint_v1f16(<1 x half> %x) nounwind {
; X86-AVX-I16-LABEL: lrint_v1f16:
; X86-AVX-I16: # %bb.0:
; X86-AVX-I16-NEXT: vcvtph2ps %xmm0, %xmm0
@@ -73,7 +73,7 @@ define <1 x iXLen> @lrint_v1f16(<1 x half> %x) {
}
declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half>)
-define <2 x iXLen> @lrint_v2f16(<2 x half> %x) {
+define <2 x iXLen> @lrint_v2f16(<2 x half> %x) nounwind {
; X86-AVX-I16-LABEL: lrint_v2f16:
; X86-AVX-I16: # %bb.0:
; X86-AVX-I16-NEXT: vpsrld $16, %xmm0, %xmm1
@@ -250,7 +250,7 @@ define <2 x iXLen> @lrint_v2f16(<2 x half> %x) {
}
declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half>)
-define <4 x iXLen> @lrint_v4f16(<4 x half> %x) {
+define <4 x iXLen> @lrint_v4f16(<4 x half> %x) nounwind {
; X86-AVX-I16-LABEL: lrint_v4f16:
; X86-AVX-I16: # %bb.0:
; X86-AVX-I16-NEXT: vpsrld $16, %xmm0, %xmm1
@@ -455,7 +455,7 @@ define <4 x iXLen> @lrint_v4f16(<4 x half> %x) {
}
declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f16(<4 x half>)
-define <8 x iXLen> @lrint_v8f16(<8 x half> %x) {
+define <8 x iXLen> @lrint_v8f16(<8 x half> %x) nounwind {
; X86-AVX-I16-LABEL: lrint_v8f16:
; X86-AVX-I16: # %bb.0:
; X86-AVX-I16-NEXT: vpsrld $16, %xmm0, %xmm1
@@ -718,7 +718,7 @@ define <8 x iXLen> @lrint_v8f16(<8 x half> %x) {
}
declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f16(<8 x half>)
-define <16 x iXLen> @lrint_v16f16(<16 x half> %x) {
+define <16 x iXLen> @lrint_v16f16(<16 x half> %x) nounwind {
; X86-AVX-I16-LABEL: lrint_v16f16:
; X86-AVX-I16: # %bb.0:
; X86-AVX-I16-NEXT: vextracti128 $1, %ymm0, %xmm1
@@ -1211,7 +1211,7 @@ define <16 x iXLen> @lrint_v16f16(<16 x half> %x) {
}
declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half>)
-define <32 x iXLen> @lrint_v32f32(<32 x half> %x) {
+define <32 x iXLen> @lrint_v32f32(<32 x half> %x) nounwind {
; X86-AVX-I16-LABEL: lrint_v32f32:
; X86-AVX-I16: # %bb.0:
; X86-AVX-I16-NEXT: vextracti128 $1, %ymm0, %xmm2
diff --git a/llvm/test/CodeGen/X86/vector-lrint.ll b/llvm/test/CodeGen/X86/vector-lrint.ll
index b1c8d46f497f3..b3e5a0929b7a5 100644
--- a/llvm/test/CodeGen/X86/vector-lrint.ll
+++ b/llvm/test/CodeGen/X86/vector-lrint.ll
@@ -1,4 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=i686-unknown | FileCheck %s --check-prefix=X86-I32
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=i686-unknown | FileCheck %s --check-prefix=X86-I64
; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefix=X86-SSE2
; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=i686-unknown -mattr=avx | FileCheck %s --check-prefixes=X86-AVX,X86-AVX1
; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=i686-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X86-AVX,AVX512-i32
@@ -10,7 +12,30 @@
; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X64-AVX-i64,AVX512-i64
; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=x86_64-unknown -mattr=avx512dq,avx512vl | FileCheck %s --check-prefixes=X64-AVX-i64,AVX512DQ-i64
-define <1 x iXLen> @lrint_v1f32(<1 x float> %x) {
+define <1 x iXLen> @lrint_v1f32(<1 x float> %x) nounwind {
+; X86-I32-LABEL: lrint_v1f32:
+; X86-I32: # %bb.0:
+; X86-I32-NEXT: pushl %eax
+; X86-I32-NEXT: flds {{[0-9]+}}(%esp)
+; X86-I32-NEXT: fistpl (%esp)
+; X86-I32-NEXT: movl (%esp), %eax
+; X86-I32-NEXT: popl %ecx
+; X86-I32-NEXT: retl
+;
+; X86-I64-LABEL: lrint_v1f32:
+; X86-I64: # %bb.0:
+; X86-I64-NEXT: pushl %ebp
+; X86-I64-NEXT: movl %esp, %ebp
+; X86-I64-NEXT: andl $-8, %esp
+; X86-I64-NEXT: subl $8, %esp
+; X86-I64-NEXT: flds 8(%ebp)
+; X86-I64-NEXT: fistpll (%esp)
+; X86-I64-NEXT: movl (%esp), %eax
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-I64-NEXT: movl %ebp, %esp
+; X86-I64-NEXT: popl %ebp
+; X86-I64-NEXT: retl
+;
; X86-SSE2-LABEL: lrint_v1f32:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: cvtss2si {{[0-9]+}}(%esp), %eax
@@ -35,7 +60,46 @@ define <1 x iXLen> @lrint_v1f32(<1 x float> %x) {
}
declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f32(<1 x float>)
-define <2 x iXLen> @lrint_v2f32(<2 x float> %x) {
+define <2 x iXLen> @lrint_v2f32(<2 x float> %x) nounwind {
+; X86-I32-LABEL: lrint_v2f32:
+; X86-I32: # %bb.0:
+; X86-I32-NEXT: subl $8, %esp
+; X86-I32-NEXT: flds {{[0-9]+}}(%esp)
+; X86-I32-NEXT: flds {{[0-9]+}}(%esp)
+; X86-I32-NEXT: fistpl (%esp)
+; X86-I32-NEXT: fistpl {{[0-9]+}}(%esp)
+; X86-I32-NEXT: movl (%esp), %eax
+; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-I32-NEXT: addl $8, %esp
+; X86-I32-NEXT: retl
+;
+; X86-I64-LABEL: lrint_v2f32:
+; X86-I64: # %bb.0:
+; X86-I64-NEXT: pushl %ebp
+; X86-I64-NEXT: movl %esp, %ebp
+; X86-I64-NEXT: pushl %edi
+; X86-I64-NEXT: pushl %esi
+; X86-I64-NEXT: andl $-8, %esp
+; X86-I64-NEXT: subl $16, %esp
+; X86-I64-NEXT: movl 8(%ebp), %eax
+; X86-I64-NEXT: flds 16(%ebp)
+; X86-I64-NEXT: flds 12(%ebp)
+; X86-I64-NEXT: fistpll (%esp)
+; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT: movl (%esp), %ecx
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-I64-NEXT: movl %edi, 12(%eax)
+; X86-I64-NEXT: movl %esi, 8(%eax)
+; X86-I64-NEXT: movl %edx, 4(%eax)
+; X86-I64-NEXT: movl %ecx, (%eax)
+; X86-I64-NEXT: leal -8(%ebp), %esp
+; X86-I64-NEXT: popl %esi
+; X86-I64-NEXT: popl %edi
+; X86-I64-NEXT: popl %ebp
+; X86-I64-NEXT: retl $4
+;
; X86-SSE2-LABEL: lrint_v2f32:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: cvtps2dq %xmm0, %xmm0
@@ -80,7 +144,81 @@ define <2 x iXLen> @lrint_v2f32(<2 x float> %x) {
}
declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float>)
-define <4 x iXLen> @lrint_v4f32(<4 x float> %x) {
+define <4 x iXLen> @lrint_v4f32(<4 x float> %x) nounwind {
+; X86-I32-LABEL: lrint_v4f32:
+; X86-I32: # %bb.0:
+; X86-I32-NEXT: pushl %edi
+; X86-I32-NEXT: pushl %esi
+; X86-I32-NEXT: subl $16, %esp
+; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-I32-NEXT: flds {{[0-9]+}}(%esp)
+; X86-I32-NEXT: flds {{[0-9]+}}(%esp)
+; X86-I32-NEXT: flds {{[0-9]+}}(%esp)
+; X86-I32-NEXT: flds {{[0-9]+}}(%esp)
+; X86-I32-NEXT: fistpl (%esp)
+; X86-I32-NEXT: fistpl {{[0-9]+}}(%esp)
+; X86-I32-NEXT: fistpl {{[0-9]+}}(%esp)
+; X86-I32-NEXT: fistpl {{[0-9]+}}(%esp)
+; X86-I32-NEXT: movl (%esp), %ecx
+; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-I32-NEXT: movl %edi, 12(%eax)
+; X86-I32-NEXT: movl %esi, 8(%eax)
+; X86-I32-NEXT: movl %edx, 4(%eax)
+; X86-I32-NEXT: movl %ecx, (%eax)
+; X86-I32-NEXT: addl $16, %esp
+; X86-I32-NEXT: popl %esi
+; X86-I32-NEXT: popl %edi
+; X86-I32-NEXT: retl $4
+;
+; X86-I64-LABEL: lrint_v4f32:
+; X86-I64: # %bb.0:
+; X86-I64-NEXT: pushl %ebp
+; X86-I64-NEXT: movl %esp, %ebp
+; X86-I64-NEXT: pushl %ebx
+; X86-I64-NEXT: pushl %edi
+; X86-I64-NEXT: pushl %esi
+; X86-I64-NEXT: andl $-8, %esp
+; X86-I64-NEXT: subl $56, %esp
+; X86-I64-NEXT: movl 8(%ebp), %eax
+; X86-I64-NEXT: flds 24(%ebp)
+; X86-I64-NEXT: flds 20(%ebp)
+; X86-I64-NEXT: flds 16(%ebp)
+; X86-I64-NEXT: flds 12(%ebp)
+; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-I64-NEXT: movl %esi, 28(%eax)
+; X86-I64-NEXT: movl %ecx, 24(%eax)
+; X86-I64-NEXT: movl %edx, 20(%eax)
+; X86-I64-NEXT: movl %ebx, 16(%eax)
+; X86-I64-NEXT: movl %edi, 12(%eax)
+; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT: movl %ecx, 8(%eax)
+; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT: movl %ecx, 4(%eax)
+; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT: movl %ecx, (%eax)
+; X86-I64-NEXT: leal -12(%ebp), %esp
+; X86-I64-NEXT: popl %esi
+; X86-I64-NEXT: popl %edi
+; X86-I64-NEXT: popl %ebx
+; X86-I64-NEXT: popl %ebp
+; X86-I64-NEXT: retl $4
+;
; X86-SSE2-LABEL: lrint_v4f32:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: cvtps2dq %xmm0, %xmm0
@@ -141,7 +279,145 @@ define <4 x iXLen> @lrint_v4f32(<4 x float> %x) {
}
declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f32(<4 x float>)
-define <8 x iXLen> @lrint_v8f32(<8 x float> %x) {
+define <8 x iXLen> @lrint_v8f32(<8 x float> %x) nounwind {
+; X86-I32-LABEL: lrint_v8f32:
+; X86-I32: # %bb.0:
+; X86-I32-NEXT: pushl %ebp
+; X86-I32-NEXT: pushl %ebx
+; X86-I32-NEXT: pushl %edi
+; X86-I32-NEXT: pushl %esi
+; X86-I32-NEXT: subl $40, %esp
+; X86-I32-NEXT: flds {{[0-9]+}}(%esp)
+; X86-I32-NEXT: fistpl {{[0-9]+}}(%esp)
+; X86-I32-NEXT: flds {{[0-9]+}}(%esp)
+; X86-I32-NEXT: fistpl {{[0-9]+}}(%esp)
+; X86-I32-NEXT: flds {{[0-9]+}}(%esp)
+; X86-I32-NEXT: fistpl {{[0-9]+}}(%esp)
+; X86-I32-NEXT: flds {{[0-9]+}}(%esp)
+; X86-I32-NEXT: fistpl {{[0-9]+}}(%esp)
+; X86-I32-NEXT: flds {{[0-9]+}}(%esp)
+; X86-I32-NEXT: fistpl {{[0-9]+}}(%esp)
+; X86-I32-NEXT: flds {{[0-9]+}}(%esp)
+; X86-I32-NEXT: fistpl {{[0-9]+}}(%esp)
+; X86-I32-NEXT: flds {{[0-9]+}}(%esp)
+; X86-I32-NEXT: fistpl {{[0-9]+}}(%esp)
+; X86-I32-NEXT: flds {{[0-9]+}}(%esp)
+; X86-I32-NEXT: fistpl {{[0-9]+}}(%esp)
+; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-I32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-I32-NEXT: movl %ecx, (%esp) # 4-byte Spill
+; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-I32-NEXT: movl %edx, 28(%eax)
+; X86-I32-NEXT: movl %ecx, 24(%eax)
+; X86-I32-NEXT: movl %ebp, 20(%eax)
+; X86-I32-NEXT: movl %ebx, 16(%eax)
+; X86-I32-NEXT: movl %edi, 12(%eax)
+; X86-I32-NEXT: movl %esi, 8(%eax)
+; X86-I32-NEXT: movl (%esp), %ecx # 4-byte Reload
+; X86-I32-NEXT: movl %ecx, 4(%eax)
+; X86-I32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I32-NEXT: movl %ecx, (%eax)
+; X86-I32-NEXT: addl $40, %esp
+; X86-I32-NEXT: popl %esi
+; X86-I32-NEXT: popl %edi
+; X86-I32-NEXT: popl %ebx
+; X86-I32-NEXT: popl %ebp
+; X86-I32-NEXT: retl $4
+;
+; X86-I64-LABEL: lrint_v8f32:
+; X86-I64: # %bb.0:
+; X86-I64-NEXT: pushl %ebp
+; X86-I64-NEXT: movl %esp, %ebp
+; X86-I64-NEXT: pushl %ebx
+; X86-I64-NEXT: pushl %edi
+; X86-I64-NEXT: pushl %esi
+; X86-I64-NEXT: andl $-8, %esp
+; X86-I64-NEXT: subl $120, %esp
+; X86-I64-NEXT: flds 12(%ebp)
+; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT: flds 16(%ebp)
+; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT: flds 20(%ebp)
+; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT: flds 24(%ebp)
+; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT: flds 28(%ebp)
+; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT: flds 32(%ebp)
+; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT: flds 36(%ebp)
+; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT: flds 40(%ebp)
+; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT: movl 8(%ebp), %eax
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-I64-NEXT: movl %ebx, 60(%eax)
+; X86-I64-NEXT: movl %ecx, 56(%eax)
+; X86-I64-NEXT: movl %edx, 52(%eax)
+; X86-I64-NEXT: movl %esi, 48(%eax)
+; X86-I64-NEXT: movl %edi, 44(%eax)
+; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT: movl %ecx, 40(%eax)
+; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT: movl %ecx, 36(%eax)
+; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT: movl %ecx, 32(%eax)
+; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT: movl %ecx, 28(%eax)
+; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT: movl %ecx, 24(%eax)
+; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT: movl %ecx, 20(%eax)
+; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT: movl %ecx, 16(%eax)
+; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT: movl %ecx, 12(%eax)
+; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT: movl %ecx, 8(%eax)
+; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT: movl %ecx, 4(%eax)
+; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT: movl %ecx, (%eax)
+; X86-I64-NEXT: leal -12(%ebp), %esp
+; X86-I64-NEXT: popl %esi
+; X86-I64-NEXT: popl %edi
+; X86-I64-NEXT: popl %ebx
+; X86-I64-NEXT: popl %ebp
+; X86-I64-NEXT: retl $4
+;
; X86-SSE2-LABEL: lrint_v8f32:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: cvtps2dq %xmm0, %xmm0
@@ -235,13 +511,36 @@ define <8 x iXLen> @lrint_v8f32(<8 x float> %x) {
}
declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f32(<8 x float>)
-define <16 x iXLen> @lrint_v16iXLen_v16f32(<16 x float> %x) {
+define <16 x iXLen> @lrint_v16iXLen_v16f32(<16 x float> %x) nounwind {
%a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float> %x)
ret <16 x iXLen> %a
}
declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float>)
-define <1 x iXLen> @lrint_v1f64(<1 x double> %x) {
+define <1 x iXLen> @lrint_v1f64(<1 x double> %x) nounwind {
+; X86-I32-LABEL: lrint_v1f64:
+; X86-I32: # %bb.0:
+; X86-I32-NEXT: pushl %eax
+; X86-I32-NEXT: fldl {{[0-9]+}}(%esp)
+; X86-I32-NEXT: fistpl (%esp)
+; X86-I32-NEXT: movl (%esp), %eax
+; X86-I32-NEXT: popl %ecx
+; X86-I32-NEXT: retl
+;
+; X86-I64-LABEL: lrint_v1f64:
+; X86-I64: # %bb.0:
+; X86-I64-NEXT: pushl %ebp
+; X86-I64-NEXT: movl %esp, %ebp
+; X86-I64-NEXT: andl $-8, %esp
+; X86-I64-NEXT: subl $8, %esp
+; X86-I64-NEXT: fldl 8(%ebp)
+; X86-I64-NEXT: fistpll (%esp)
+; X86-I64-NEXT: movl (%esp), %eax
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-I64-NEXT: movl %ebp, %esp
+; X86-I64-NEXT: popl %ebp
+; X86-I64-NEXT: retl
+;
; X86-SSE2-LABEL: lrint_v1f64:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: cvtsd2si {{[0-9]+}}(%esp), %eax
@@ -266,7 +565,46 @@ define <1 x iXLen> @lrint_v1f64(<1 x double> %x) {
}
declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double>)
-define <2 x iXLen> @lrint_v2f64(<2 x double> %x) {
+define <2 x iXLen> @lrint_v2f64(<2 x double> %x) nounwind {
+; X86-I32-LABEL: lrint_v2f64:
+; X86-I32: # %bb.0:
+; X86-I32-NEXT: subl $8, %esp
+; X86-I32-NEXT: fldl {{[0-9]+}}(%esp)
+; X86-I32-NEXT: fldl {{[0-9]+}}(%esp)
+; X86-I32-NEXT: fistpl (%esp)
+; X86-I32-NEXT: fistpl {{[0-9]+}}(%esp)
+; X86-I32-NEXT: movl (%esp), %eax
+; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-I32-NEXT: addl $8, %esp
+; X86-I32-NEXT: retl
+;
+; X86-I64-LABEL: lrint_v2f64:
+; X86-I64: # %bb.0:
+; X86-I64-NEXT: pushl %ebp
+; X86-I64-NEXT: movl %esp, %ebp
+; X86-I64-NEXT: pushl %edi
+; X86-I64-NEXT: pushl %esi
+; X86-I64-NEXT: andl $-8, %esp
+; X86-I64-NEXT: subl $16, %esp
+; X86-I64-NEXT: movl 8(%ebp), %eax
+; X86-I64-NEXT: fldl 20(%ebp)
+; X86-I64-NEXT: fldl 12(%ebp)
+; X86-I64-NEXT: fistpll (%esp)
+; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT: movl (%esp), %ecx
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-I64-NEXT: movl %edi, 12(%eax)
+; X86-I64-NEXT: movl %esi, 8(%eax)
+; X86-I64-NEXT: movl %edx, 4(%eax)
+; X86-I64-NEXT: movl %ecx, (%eax)
+; X86-I64-NEXT: leal -8(%ebp), %esp
+; X86-I64-NEXT: popl %esi
+; X86-I64-NEXT: popl %edi
+; X86-I64-NEXT: popl %ebp
+; X86-I64-NEXT: retl $4
+;
; X86-SSE2-LABEL: lrint_v2f64:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: cvtpd2dq %xmm0, %xmm0
@@ -311,7 +649,81 @@ define <2 x iXLen> @lrint_v2f64(<2 x double> %x) {
}
declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double>)
-define <4 x iXLen> @lrint_v4f64(<4 x double> %x) {
+define <4 x iXLen> @lrint_v4f64(<4 x double> %x) nounwind {
+; X86-I32-LABEL: lrint_v4f64:
+; X86-I32: # %bb.0:
+; X86-I32-NEXT: pushl %edi
+; X86-I32-NEXT: pushl %esi
+; X86-I32-NEXT: subl $16, %esp
+; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-I32-NEXT: fldl {{[0-9]+}}(%esp)
+; X86-I32-NEXT: fldl {{[0-9]+}}(%esp)
+; X86-I32-NEXT: fldl {{[0-9]+}}(%esp)
+; X86-I32-NEXT: fldl {{[0-9]+}}(%esp)
+; X86-I32-NEXT: fistpl (%esp)
+; X86-I32-NEXT: fistpl {{[0-9]+}}(%esp)
+; X86-I32-NEXT: fistpl {{[0-9]+}}(%esp)
+; X86-I32-NEXT: fistpl {{[0-9]+}}(%esp)
+; X86-I32-NEXT: movl (%esp), %ecx
+; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-I32-NEXT: movl %edi, 12(%eax)
+; X86-I32-NEXT: movl %esi, 8(%eax)
+; X86-I32-NEXT: movl %edx, 4(%eax)
+; X86-I32-NEXT: movl %ecx, (%eax)
+; X86-I32-NEXT: addl $16, %esp
+; X86-I32-NEXT: popl %esi
+; X86-I32-NEXT: popl %edi
+; X86-I32-NEXT: retl $4
+;
+; X86-I64-LABEL: lrint_v4f64:
+; X86-I64: # %bb.0:
+; X86-I64-NEXT: pushl %ebp
+; X86-I64-NEXT: movl %esp, %ebp
+; X86-I64-NEXT: pushl %ebx
+; X86-I64-NEXT: pushl %edi
+; X86-I64-NEXT: pushl %esi
+; X86-I64-NEXT: andl $-8, %esp
+; X86-I64-NEXT: subl $56, %esp
+; X86-I64-NEXT: movl 8(%ebp), %eax
+; X86-I64-NEXT: fldl 36(%ebp)
+; X86-I64-NEXT: fldl 28(%ebp)
+; X86-I64-NEXT: fldl 20(%ebp)
+; X86-I64-NEXT: fldl 12(%ebp)
+; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-I64-NEXT: movl %esi, 28(%eax)
+; X86-I64-NEXT: movl %ecx, 24(%eax)
+; X86-I64-NEXT: movl %edx, 20(%eax)
+; X86-I64-NEXT: movl %ebx, 16(%eax)
+; X86-I64-NEXT: movl %edi, 12(%eax)
+; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT: movl %ecx, 8(%eax)
+; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT: movl %ecx, 4(%eax)
+; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT: movl %ecx, (%eax)
+; X86-I64-NEXT: leal -12(%ebp), %esp
+; X86-I64-NEXT: popl %esi
+; X86-I64-NEXT: popl %edi
+; X86-I64-NEXT: popl %ebx
+; X86-I64-NEXT: popl %ebp
+; X86-I64-NEXT: retl $4
+;
; X86-SSE2-LABEL: lrint_v4f64:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: cvtpd2dq %xmm1, %xmm1
@@ -376,14 +788,149 @@ define <4 x iXLen> @lrint_v4f64(<4 x double> %x) {
}
declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f64(<4 x double>)
-define <8 x iXLen> @lrint_v8f64(<8 x double> %x) {
+define <8 x iXLen> @lrint_v8f64(<8 x double> %x) nounwind {
+; X86-I32-LABEL: lrint_v8f64:
+; X86-I32: # %bb.0:
+; X86-I32-NEXT: pushl %ebp
+; X86-I32-NEXT: pushl %ebx
+; X86-I32-NEXT: pushl %edi
+; X86-I32-NEXT: pushl %esi
+; X86-I32-NEXT: subl $40, %esp
+; X86-I32-NEXT: fldl {{[0-9]+}}(%esp)
+; X86-I32-NEXT: fistpl {{[0-9]+}}(%esp)
+; X86-I32-NEXT: fldl {{[0-9]+}}(%esp)
+; X86-I32-NEXT: fistpl {{[0-9]+}}(%esp)
+; X86-I32-NEXT: fldl {{[0-9]+}}(%esp)
+; X86-I32-NEXT: fistpl {{[0-9]+}}(%esp)
+; X86-I32-NEXT: fldl {{[0-9]+}}(%esp)
+; X86-I32-NEXT: fistpl {{[0-9]+}}(%esp)
+; X86-I32-NEXT: fldl {{[0-9]+}}(%esp)
+; X86-I32-NEXT: fistpl {{[0-9]+}}(%esp)
+; X86-I32-NEXT: fldl {{[0-9]+}}(%esp)
+; X86-I32-NEXT: fistpl {{[0-9]+}}(%esp)
+; X86-I32-NEXT: fldl {{[0-9]+}}(%esp)
+; X86-I32-NEXT: fistpl {{[0-9]+}}(%esp)
+; X86-I32-NEXT: fldl {{[0-9]+}}(%esp)
+; X86-I32-NEXT: fistpl {{[0-9]+}}(%esp)
+; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-I32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-I32-NEXT: movl %ecx, (%esp) # 4-byte Spill
+; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-I32-NEXT: movl %edx, 28(%eax)
+; X86-I32-NEXT: movl %ecx, 24(%eax)
+; X86-I32-NEXT: movl %ebp, 20(%eax)
+; X86-I32-NEXT: movl %ebx, 16(%eax)
+; X86-I32-NEXT: movl %edi, 12(%eax)
+; X86-I32-NEXT: movl %esi, 8(%eax)
+; X86-I32-NEXT: movl (%esp), %ecx # 4-byte Reload
+; X86-I32-NEXT: movl %ecx, 4(%eax)
+; X86-I32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I32-NEXT: movl %ecx, (%eax)
+; X86-I32-NEXT: addl $40, %esp
+; X86-I32-NEXT: popl %esi
+; X86-I32-NEXT: popl %edi
+; X86-I32-NEXT: popl %ebx
+; X86-I32-NEXT: popl %ebp
+; X86-I32-NEXT: retl $4
+;
+; X86-I64-LABEL: lrint_v8f64:
+; X86-I64: # %bb.0:
+; X86-I64-NEXT: pushl %ebp
+; X86-I64-NEXT: movl %esp, %ebp
+; X86-I64-NEXT: pushl %ebx
+; X86-I64-NEXT: pushl %edi
+; X86-I64-NEXT: pushl %esi
+; X86-I64-NEXT: andl $-8, %esp
+; X86-I64-NEXT: subl $120, %esp
+; X86-I64-NEXT: fldl 12(%ebp)
+; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT: fldl 20(%ebp)
+; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT: fldl 28(%ebp)
+; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT: fldl 36(%ebp)
+; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT: fldl 44(%ebp)
+; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT: fldl 52(%ebp)
+; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT: fldl 60(%ebp)
+; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT: fldl 68(%ebp)
+; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT: movl 8(%ebp), %eax
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-I64-NEXT: movl %ebx, 60(%eax)
+; X86-I64-NEXT: movl %ecx, 56(%eax)
+; X86-I64-NEXT: movl %edx, 52(%eax)
+; X86-I64-NEXT: movl %esi, 48(%eax)
+; X86-I64-NEXT: movl %edi, 44(%eax)
+; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT: movl %ecx, 40(%eax)
+; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT: movl %ecx, 36(%eax)
+; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT: movl %ecx, 32(%eax)
+; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT: movl %ecx, 28(%eax)
+; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT: movl %ecx, 24(%eax)
+; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT: movl %ecx, 20(%eax)
+; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT: movl %ecx, 16(%eax)
+; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT: movl %ecx, 12(%eax)
+; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT: movl %ecx, 8(%eax)
+; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT: movl %ecx, 4(%eax)
+; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT: movl %ecx, (%eax)
+; X86-I64-NEXT: leal -12(%ebp), %esp
+; X86-I64-NEXT: popl %esi
+; X86-I64-NEXT: popl %edi
+; X86-I64-NEXT: popl %ebx
+; X86-I64-NEXT: popl %ebp
+; X86-I64-NEXT: retl $4
+;
; X86-SSE2-LABEL: lrint_v8f64:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: pushl %ebp
-; X86-SSE2-NEXT: .cfi_def_cfa_offset 8
-; X86-SSE2-NEXT: .cfi_offset %ebp, -8
; X86-SSE2-NEXT: movl %esp, %ebp
-; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp
; X86-SSE2-NEXT: andl $-16, %esp
; X86-SSE2-NEXT: subl $16, %esp
; X86-SSE2-NEXT: cvtpd2dq %xmm1, %xmm1
@@ -394,7 +941,6 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) {
; X86-SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; X86-SSE2-NEXT: movl %ebp, %esp
; X86-SSE2-NEXT: popl %ebp
-; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4
; X86-SSE2-NEXT: retl
;
; X86-AVX1-LABEL: lrint_v8f64:
@@ -490,3 +1036,1145 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) {
ret <8 x iXLen> %a
}
declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double>)
+
+define <1 x iXLen> @lrint_v1fp128(<1 x fp128> %x) nounwind {
+; X86-I32-LABEL: lrint_v1fp128:
+; X86-I32: # %bb.0:
+; X86-I32-NEXT: pushl %ebp
+; X86-I32-NEXT: movl %esp, %ebp
+; X86-I32-NEXT: andl $-16, %esp
+; X86-I32-NEXT: subl $16, %esp
+; X86-I32-NEXT: pushl 20(%ebp)
+; X86-I32-NEXT: pushl 16(%ebp)
+; X86-I32-NEXT: pushl 12(%ebp)
+; X86-I32-NEXT: pushl 8(%ebp)
+; X86-I32-NEXT: calll lrintl
+; X86-I32-NEXT: addl $16, %esp
+; X86-I32-NEXT: movl %ebp, %esp
+; X86-I32-NEXT: popl %ebp
+; X86-I32-NEXT: retl
+;
+; X86-I64-LABEL: lrint_v1fp128:
+; X86-I64: # %bb.0:
+; X86-I64-NEXT: pushl %ebp
+; X86-I64-NEXT: movl %esp, %ebp
+; X86-I64-NEXT: andl $-16, %esp
+; X86-I64-NEXT: subl $16, %esp
+; X86-I64-NEXT: pushl 20(%ebp)
+; X86-I64-NEXT: pushl 16(%ebp)
+; X86-I64-NEXT: pushl 12(%ebp)
+; X86-I64-NEXT: pushl 8(%ebp)
+; X86-I64-NEXT: calll lrintl
+; X86-I64-NEXT: addl $16, %esp
+; X86-I64-NEXT: movl %ebp, %esp
+; X86-I64-NEXT: popl %ebp
+; X86-I64-NEXT: retl
+;
+; X86-SSE2-LABEL: lrint_v1fp128:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: pushl %ebp
+; X86-SSE2-NEXT: movl %esp, %ebp
+; X86-SSE2-NEXT: andl $-16, %esp
+; X86-SSE2-NEXT: subl $16, %esp
+; X86-SSE2-NEXT: pushl 20(%ebp)
+; X86-SSE2-NEXT: pushl 16(%ebp)
+; X86-SSE2-NEXT: pushl 12(%ebp)
+; X86-SSE2-NEXT: pushl 8(%ebp)
+; X86-SSE2-NEXT: calll lrintl
+; X86-SSE2-NEXT: addl $16, %esp
+; X86-SSE2-NEXT: movl %ebp, %esp
+; X86-SSE2-NEXT: popl %ebp
+; X86-SSE2-NEXT: retl
+;
+; X86-AVX-LABEL: lrint_v1fp128:
+; X86-AVX: # %bb.0:
+; X86-AVX-NEXT: pushl %ebp
+; X86-AVX-NEXT: movl %esp, %ebp
+; X86-AVX-NEXT: andl $-16, %esp
+; X86-AVX-NEXT: subl $32, %esp
+; X86-AVX-NEXT: vmovups 8(%ebp), %xmm0
+; X86-AVX-NEXT: vmovups %xmm0, (%esp)
+; X86-AVX-NEXT: calll lrintl
+; X86-AVX-NEXT: movl %ebp, %esp
+; X86-AVX-NEXT: popl %ebp
+; X86-AVX-NEXT: retl
+;
+; X64-AVX-i32-LABEL: lrint_v1fp128:
+; X64-AVX-i32: # %bb.0:
+; X64-AVX-i32-NEXT: pushq %rax
+; X64-AVX-i32-NEXT: callq lrintl at PLT
+; X64-AVX-i32-NEXT: popq %rcx
+; X64-AVX-i32-NEXT: retq
+;
+; X64-AVX-i64-LABEL: lrint_v1fp128:
+; X64-AVX-i64: # %bb.0:
+; X64-AVX-i64-NEXT: pushq %rax
+; X64-AVX-i64-NEXT: callq lrintl at PLT
+; X64-AVX-i64-NEXT: popq %rcx
+; X64-AVX-i64-NEXT: retq
+ %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1fp128(<1 x fp128> %x)
+ ret <1 x iXLen> %a
+}
+declare <1 x iXLen> @llvm.lrint.v1iXLen.v1fp128(<1 x fp128>)
+
+define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) nounwind {
+; X86-I32-LABEL: lrint_v2fp128:
+; X86-I32: # %bb.0:
+; X86-I32-NEXT: pushl %ebp
+; X86-I32-NEXT: movl %esp, %ebp
+; X86-I32-NEXT: pushl %ebx
+; X86-I32-NEXT: pushl %edi
+; X86-I32-NEXT: pushl %esi
+; X86-I32-NEXT: andl $-16, %esp
+; X86-I32-NEXT: subl $16, %esp
+; X86-I32-NEXT: movl 32(%ebp), %edi
+; X86-I32-NEXT: movl 36(%ebp), %ebx
+; X86-I32-NEXT: pushl 20(%ebp)
+; X86-I32-NEXT: pushl 16(%ebp)
+; X86-I32-NEXT: pushl 12(%ebp)
+; X86-I32-NEXT: pushl 8(%ebp)
+; X86-I32-NEXT: calll lrintl
+; X86-I32-NEXT: addl $16, %esp
+; X86-I32-NEXT: movl %eax, %esi
+; X86-I32-NEXT: pushl %ebx
+; X86-I32-NEXT: pushl %edi
+; X86-I32-NEXT: pushl 28(%ebp)
+; X86-I32-NEXT: pushl 24(%ebp)
+; X86-I32-NEXT: calll lrintl
+; X86-I32-NEXT: addl $16, %esp
+; X86-I32-NEXT: movl %eax, %edx
+; X86-I32-NEXT: movl %esi, %eax
+; X86-I32-NEXT: leal -12(%ebp), %esp
+; X86-I32-NEXT: popl %esi
+; X86-I32-NEXT: popl %edi
+; X86-I32-NEXT: popl %ebx
+; X86-I32-NEXT: popl %ebp
+; X86-I32-NEXT: retl
+;
+; X86-I64-LABEL: lrint_v2fp128:
+; X86-I64: # %bb.0:
+; X86-I64-NEXT: pushl %ebp
+; X86-I64-NEXT: movl %esp, %ebp
+; X86-I64-NEXT: pushl %ebx
+; X86-I64-NEXT: pushl %edi
+; X86-I64-NEXT: pushl %esi
+; X86-I64-NEXT: andl $-16, %esp
+; X86-I64-NEXT: subl $16, %esp
+; X86-I64-NEXT: movl 8(%ebp), %esi
+; X86-I64-NEXT: pushl 24(%ebp)
+; X86-I64-NEXT: pushl 20(%ebp)
+; X86-I64-NEXT: pushl 16(%ebp)
+; X86-I64-NEXT: pushl 12(%ebp)
+; X86-I64-NEXT: calll lrintl
+; X86-I64-NEXT: addl $16, %esp
+; X86-I64-NEXT: movl %eax, %edi
+; X86-I64-NEXT: movl %edx, %ebx
+; X86-I64-NEXT: pushl 40(%ebp)
+; X86-I64-NEXT: pushl 36(%ebp)
+; X86-I64-NEXT: pushl 32(%ebp)
+; X86-I64-NEXT: pushl 28(%ebp)
+; X86-I64-NEXT: calll lrintl
+; X86-I64-NEXT: addl $16, %esp
+; X86-I64-NEXT: movl %edx, 12(%esi)
+; X86-I64-NEXT: movl %eax, 8(%esi)
+; X86-I64-NEXT: movl %ebx, 4(%esi)
+; X86-I64-NEXT: movl %edi, (%esi)
+; X86-I64-NEXT: movl %esi, %eax
+; X86-I64-NEXT: leal -12(%ebp), %esp
+; X86-I64-NEXT: popl %esi
+; X86-I64-NEXT: popl %edi
+; X86-I64-NEXT: popl %ebx
+; X86-I64-NEXT: popl %ebp
+; X86-I64-NEXT: retl $4
+;
+; X86-SSE2-LABEL: lrint_v2fp128:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: pushl %ebp
+; X86-SSE2-NEXT: movl %esp, %ebp
+; X86-SSE2-NEXT: pushl %ebx
+; X86-SSE2-NEXT: pushl %edi
+; X86-SSE2-NEXT: pushl %esi
+; X86-SSE2-NEXT: andl $-16, %esp
+; X86-SSE2-NEXT: subl $32, %esp
+; X86-SSE2-NEXT: movl 12(%ebp), %edi
+; X86-SSE2-NEXT: movl 16(%ebp), %ebx
+; X86-SSE2-NEXT: movl 20(%ebp), %esi
+; X86-SSE2-NEXT: pushl 36(%ebp)
+; X86-SSE2-NEXT: pushl 32(%ebp)
+; X86-SSE2-NEXT: pushl 28(%ebp)
+; X86-SSE2-NEXT: pushl 24(%ebp)
+; X86-SSE2-NEXT: calll lrintl
+; X86-SSE2-NEXT: addl $16, %esp
+; X86-SSE2-NEXT: movd %eax, %xmm0
+; X86-SSE2-NEXT: movdqa %xmm0, (%esp) # 16-byte Spill
+; X86-SSE2-NEXT: pushl %esi
+; X86-SSE2-NEXT: pushl %ebx
+; X86-SSE2-NEXT: pushl %edi
+; X86-SSE2-NEXT: pushl 8(%ebp)
+; X86-SSE2-NEXT: calll lrintl
+; X86-SSE2-NEXT: addl $16, %esp
+; X86-SSE2-NEXT: movd %eax, %xmm0
+; X86-SSE2-NEXT: punpckldq (%esp), %xmm0 # 16-byte Folded Reload
+; X86-SSE2-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; X86-SSE2-NEXT: leal -12(%ebp), %esp
+; X86-SSE2-NEXT: popl %esi
+; X86-SSE2-NEXT: popl %edi
+; X86-SSE2-NEXT: popl %ebx
+; X86-SSE2-NEXT: popl %ebp
+; X86-SSE2-NEXT: retl
+;
+; X86-AVX-LABEL: lrint_v2fp128:
+; X86-AVX: # %bb.0:
+; X86-AVX-NEXT: pushl %ebp
+; X86-AVX-NEXT: movl %esp, %ebp
+; X86-AVX-NEXT: andl $-16, %esp
+; X86-AVX-NEXT: subl $48, %esp
+; X86-AVX-NEXT: vmovups 8(%ebp), %xmm0
+; X86-AVX-NEXT: vmovups %xmm0, (%esp)
+; X86-AVX-NEXT: calll lrintl
+; X86-AVX-NEXT: vmovups 24(%ebp), %xmm0
+; X86-AVX-NEXT: vmovups %xmm0, (%esp)
+; X86-AVX-NEXT: vmovd %eax, %xmm0
+; X86-AVX-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX-NEXT: calll lrintl
+; X86-AVX-NEXT: vmovdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
+; X86-AVX-NEXT: movl %ebp, %esp
+; X86-AVX-NEXT: popl %ebp
+; X86-AVX-NEXT: retl
+;
+; X64-AVX-i32-LABEL: lrint_v2fp128:
+; X64-AVX-i32: # %bb.0:
+; X64-AVX-i32-NEXT: pushq %rbx
+; X64-AVX-i32-NEXT: subq $16, %rsp
+; X64-AVX-i32-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; X64-AVX-i32-NEXT: vmovaps %xmm1, %xmm0
+; X64-AVX-i32-NEXT: callq lrintl at PLT
+; X64-AVX-i32-NEXT: movl %eax, %ebx
+; X64-AVX-i32-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; X64-AVX-i32-NEXT: callq lrintl at PLT
+; X64-AVX-i32-NEXT: vmovd %eax, %xmm0
+; X64-AVX-i32-NEXT: vpinsrd $1, %ebx, %xmm0, %xmm0
+; X64-AVX-i32-NEXT: addq $16, %rsp
+; X64-AVX-i32-NEXT: popq %rbx
+; X64-AVX-i32-NEXT: retq
+;
+; X64-AVX-i64-LABEL: lrint_v2fp128:
+; X64-AVX-i64: # %bb.0:
+; X64-AVX-i64-NEXT: subq $40, %rsp
+; X64-AVX-i64-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX-i64-NEXT: vmovaps %xmm1, %xmm0
+; X64-AVX-i64-NEXT: callq lrintl at PLT
+; X64-AVX-i64-NEXT: vmovq %rax, %xmm0
+; X64-AVX-i64-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; X64-AVX-i64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX-i64-NEXT: callq lrintl at PLT
+; X64-AVX-i64-NEXT: vmovq %rax, %xmm0
+; X64-AVX-i64-NEXT: vpunpcklqdq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX-i64-NEXT: # xmm0 = xmm0[0],mem[0]
+; X64-AVX-i64-NEXT: addq $40, %rsp
+; X64-AVX-i64-NEXT: retq
+ %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2fp128(<2 x fp128> %x)
+ ret <2 x iXLen> %a
+}
+declare <2 x iXLen> @llvm.lrint.v2iXLen.v2fp128(<2 x fp128>)
+
+define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) nounwind {
+; X86-I32-LABEL: lrint_v4fp128:
+; X86-I32: # %bb.0:
+; X86-I32-NEXT: pushl %ebp
+; X86-I32-NEXT: movl %esp, %ebp
+; X86-I32-NEXT: pushl %ebx
+; X86-I32-NEXT: pushl %edi
+; X86-I32-NEXT: pushl %esi
+; X86-I32-NEXT: andl $-16, %esp
+; X86-I32-NEXT: subl $16, %esp
+; X86-I32-NEXT: movl 8(%ebp), %esi
+; X86-I32-NEXT: movl 36(%ebp), %ebx
+; X86-I32-NEXT: movl 40(%ebp), %edi
+; X86-I32-NEXT: pushl 24(%ebp)
+; X86-I32-NEXT: pushl 20(%ebp)
+; X86-I32-NEXT: pushl 16(%ebp)
+; X86-I32-NEXT: pushl 12(%ebp)
+; X86-I32-NEXT: calll lrintl
+; X86-I32-NEXT: addl $16, %esp
+; X86-I32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I32-NEXT: pushl %edi
+; X86-I32-NEXT: pushl %ebx
+; X86-I32-NEXT: pushl 32(%ebp)
+; X86-I32-NEXT: pushl 28(%ebp)
+; X86-I32-NEXT: calll lrintl
+; X86-I32-NEXT: addl $16, %esp
+; X86-I32-NEXT: movl %eax, %ebx
+; X86-I32-NEXT: pushl 56(%ebp)
+; X86-I32-NEXT: pushl 52(%ebp)
+; X86-I32-NEXT: pushl 48(%ebp)
+; X86-I32-NEXT: pushl 44(%ebp)
+; X86-I32-NEXT: calll lrintl
+; X86-I32-NEXT: addl $16, %esp
+; X86-I32-NEXT: movl %eax, %edi
+; X86-I32-NEXT: pushl 72(%ebp)
+; X86-I32-NEXT: pushl 68(%ebp)
+; X86-I32-NEXT: pushl 64(%ebp)
+; X86-I32-NEXT: pushl 60(%ebp)
+; X86-I32-NEXT: calll lrintl
+; X86-I32-NEXT: addl $16, %esp
+; X86-I32-NEXT: movl %eax, 12(%esi)
+; X86-I32-NEXT: movl %edi, 8(%esi)
+; X86-I32-NEXT: movl %ebx, 4(%esi)
+; X86-I32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-I32-NEXT: movl %eax, (%esi)
+; X86-I32-NEXT: movl %esi, %eax
+; X86-I32-NEXT: leal -12(%ebp), %esp
+; X86-I32-NEXT: popl %esi
+; X86-I32-NEXT: popl %edi
+; X86-I32-NEXT: popl %ebx
+; X86-I32-NEXT: popl %ebp
+; X86-I32-NEXT: retl $4
+;
+; X86-I64-LABEL: lrint_v4fp128:
+; X86-I64: # %bb.0:
+; X86-I64-NEXT: pushl %ebp
+; X86-I64-NEXT: movl %esp, %ebp
+; X86-I64-NEXT: pushl %ebx
+; X86-I64-NEXT: pushl %edi
+; X86-I64-NEXT: pushl %esi
+; X86-I64-NEXT: andl $-16, %esp
+; X86-I64-NEXT: subl $32, %esp
+; X86-I64-NEXT: movl 8(%ebp), %esi
+; X86-I64-NEXT: movl 36(%ebp), %edi
+; X86-I64-NEXT: movl 40(%ebp), %ebx
+; X86-I64-NEXT: pushl 24(%ebp)
+; X86-I64-NEXT: pushl 20(%ebp)
+; X86-I64-NEXT: pushl 16(%ebp)
+; X86-I64-NEXT: pushl 12(%ebp)
+; X86-I64-NEXT: calll lrintl
+; X86-I64-NEXT: addl $16, %esp
+; X86-I64-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT: pushl %ebx
+; X86-I64-NEXT: pushl %edi
+; X86-I64-NEXT: pushl 32(%ebp)
+; X86-I64-NEXT: pushl 28(%ebp)
+; X86-I64-NEXT: calll lrintl
+; X86-I64-NEXT: addl $16, %esp
+; X86-I64-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT: pushl 56(%ebp)
+; X86-I64-NEXT: pushl 52(%ebp)
+; X86-I64-NEXT: pushl 48(%ebp)
+; X86-I64-NEXT: pushl 44(%ebp)
+; X86-I64-NEXT: calll lrintl
+; X86-I64-NEXT: addl $16, %esp
+; X86-I64-NEXT: movl %eax, %edi
+; X86-I64-NEXT: movl %edx, %ebx
+; X86-I64-NEXT: pushl 72(%ebp)
+; X86-I64-NEXT: pushl 68(%ebp)
+; X86-I64-NEXT: pushl 64(%ebp)
+; X86-I64-NEXT: pushl 60(%ebp)
+; X86-I64-NEXT: calll lrintl
+; X86-I64-NEXT: addl $16, %esp
+; X86-I64-NEXT: movl %edx, 28(%esi)
+; X86-I64-NEXT: movl %eax, 24(%esi)
+; X86-I64-NEXT: movl %ebx, 20(%esi)
+; X86-I64-NEXT: movl %edi, 16(%esi)
+; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-I64-NEXT: movl %eax, 12(%esi)
+; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-I64-NEXT: movl %eax, 8(%esi)
+; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-I64-NEXT: movl %eax, 4(%esi)
+; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-I64-NEXT: movl %eax, (%esi)
+; X86-I64-NEXT: movl %esi, %eax
+; X86-I64-NEXT: leal -12(%ebp), %esp
+; X86-I64-NEXT: popl %esi
+; X86-I64-NEXT: popl %edi
+; X86-I64-NEXT: popl %ebx
+; X86-I64-NEXT: popl %ebp
+; X86-I64-NEXT: retl $4
+;
+; X86-SSE2-LABEL: lrint_v4fp128:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: pushl %ebp
+; X86-SSE2-NEXT: movl %esp, %ebp
+; X86-SSE2-NEXT: pushl %ebx
+; X86-SSE2-NEXT: pushl %edi
+; X86-SSE2-NEXT: pushl %esi
+; X86-SSE2-NEXT: andl $-16, %esp
+; X86-SSE2-NEXT: subl $48, %esp
+; X86-SSE2-NEXT: movl 48(%ebp), %edi
+; X86-SSE2-NEXT: movl 52(%ebp), %ebx
+; X86-SSE2-NEXT: pushl 36(%ebp)
+; X86-SSE2-NEXT: pushl 32(%ebp)
+; X86-SSE2-NEXT: pushl 28(%ebp)
+; X86-SSE2-NEXT: pushl 24(%ebp)
+; X86-SSE2-NEXT: calll lrintl
+; X86-SSE2-NEXT: addl $16, %esp
+; X86-SSE2-NEXT: movl %eax, %esi
+; X86-SSE2-NEXT: pushl %ebx
+; X86-SSE2-NEXT: pushl %edi
+; X86-SSE2-NEXT: pushl 44(%ebp)
+; X86-SSE2-NEXT: pushl 40(%ebp)
+; X86-SSE2-NEXT: calll lrintl
+; X86-SSE2-NEXT: addl $16, %esp
+; X86-SSE2-NEXT: movl %eax, %edi
+; X86-SSE2-NEXT: pushl 68(%ebp)
+; X86-SSE2-NEXT: pushl 64(%ebp)
+; X86-SSE2-NEXT: pushl 60(%ebp)
+; X86-SSE2-NEXT: pushl 56(%ebp)
+; X86-SSE2-NEXT: calll lrintl
+; X86-SSE2-NEXT: addl $16, %esp
+; X86-SSE2-NEXT: movd %eax, %xmm0
+; X86-SSE2-NEXT: movd %edi, %xmm1
+; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X86-SSE2-NEXT: movdqa %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-SSE2-NEXT: movd %esi, %xmm0
+; X86-SSE2-NEXT: movdqa %xmm0, (%esp) # 16-byte Spill
+; X86-SSE2-NEXT: pushl 20(%ebp)
+; X86-SSE2-NEXT: pushl 16(%ebp)
+; X86-SSE2-NEXT: pushl 12(%ebp)
+; X86-SSE2-NEXT: pushl 8(%ebp)
+; X86-SSE2-NEXT: calll lrintl
+; X86-SSE2-NEXT: addl $16, %esp
+; X86-SSE2-NEXT: movd %eax, %xmm0
+; X86-SSE2-NEXT: punpckldq (%esp), %xmm0 # 16-byte Folded Reload
+; X86-SSE2-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; X86-SSE2-NEXT: punpcklqdq {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; X86-SSE2-NEXT: # xmm0 = xmm0[0],mem[0]
+; X86-SSE2-NEXT: leal -12(%ebp), %esp
+; X86-SSE2-NEXT: popl %esi
+; X86-SSE2-NEXT: popl %edi
+; X86-SSE2-NEXT: popl %ebx
+; X86-SSE2-NEXT: popl %ebp
+; X86-SSE2-NEXT: retl
+;
+; X86-AVX-LABEL: lrint_v4fp128:
+; X86-AVX: # %bb.0:
+; X86-AVX-NEXT: pushl %ebp
+; X86-AVX-NEXT: movl %esp, %ebp
+; X86-AVX-NEXT: pushl %edi
+; X86-AVX-NEXT: pushl %esi
+; X86-AVX-NEXT: andl $-16, %esp
+; X86-AVX-NEXT: subl $32, %esp
+; X86-AVX-NEXT: vmovups 40(%ebp), %xmm0
+; X86-AVX-NEXT: vmovups %xmm0, (%esp)
+; X86-AVX-NEXT: calll lrintl
+; X86-AVX-NEXT: movl %eax, %esi
+; X86-AVX-NEXT: vmovups 24(%ebp), %xmm0
+; X86-AVX-NEXT: vmovups %xmm0, (%esp)
+; X86-AVX-NEXT: calll lrintl
+; X86-AVX-NEXT: movl %eax, %edi
+; X86-AVX-NEXT: vmovups 8(%ebp), %xmm0
+; X86-AVX-NEXT: vmovups %xmm0, (%esp)
+; X86-AVX-NEXT: calll lrintl
+; X86-AVX-NEXT: vmovups 56(%ebp), %xmm0
+; X86-AVX-NEXT: vmovups %xmm0, (%esp)
+; X86-AVX-NEXT: vmovd %eax, %xmm0
+; X86-AVX-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0
+; X86-AVX-NEXT: vpinsrd $2, %esi, %xmm0, %xmm0
+; X86-AVX-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX-NEXT: calll lrintl
+; X86-AVX-NEXT: vmovdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
+; X86-AVX-NEXT: leal -8(%ebp), %esp
+; X86-AVX-NEXT: popl %esi
+; X86-AVX-NEXT: popl %edi
+; X86-AVX-NEXT: popl %ebp
+; X86-AVX-NEXT: retl
+;
+; X64-AVX-i32-LABEL: lrint_v4fp128:
+; X64-AVX-i32: # %bb.0:
+; X64-AVX-i32-NEXT: pushq %rbx
+; X64-AVX-i32-NEXT: subq $48, %rsp
+; X64-AVX-i32-NEXT: vmovaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX-i32-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX-i32-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; X64-AVX-i32-NEXT: vmovaps %xmm1, %xmm0
+; X64-AVX-i32-NEXT: callq lrintl at PLT
+; X64-AVX-i32-NEXT: movl %eax, %ebx
+; X64-AVX-i32-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; X64-AVX-i32-NEXT: callq lrintl at PLT
+; X64-AVX-i32-NEXT: vmovd %eax, %xmm0
+; X64-AVX-i32-NEXT: vpinsrd $1, %ebx, %xmm0, %xmm0
+; X64-AVX-i32-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; X64-AVX-i32-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX-i32-NEXT: callq lrintl at PLT
+; X64-AVX-i32-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
+; X64-AVX-i32-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0
+; X64-AVX-i32-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX-i32-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX-i32-NEXT: callq lrintl at PLT
+; X64-AVX-i32-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX-i32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
+; X64-AVX-i32-NEXT: addq $48, %rsp
+; X64-AVX-i32-NEXT: popq %rbx
+; X64-AVX-i32-NEXT: retq
+;
+; X64-AVX1-i64-LABEL: lrint_v4fp128:
+; X64-AVX1-i64: # %bb.0:
+; X64-AVX1-i64-NEXT: subq $72, %rsp
+; X64-AVX1-i64-NEXT: vmovaps %xmm2, (%rsp) # 16-byte Spill
+; X64-AVX1-i64-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-i64-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-i64-NEXT: vmovaps %xmm3, %xmm0
+; X64-AVX1-i64-NEXT: callq lrintl at PLT
+; X64-AVX1-i64-NEXT: vmovq %rax, %xmm0
+; X64-AVX1-i64-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-i64-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; X64-AVX1-i64-NEXT: callq lrintl at PLT
+; X64-AVX1-i64-NEXT: vmovq %rax, %xmm0
+; X64-AVX1-i64-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX1-i64-NEXT: # xmm0 = xmm0[0],mem[0]
+; X64-AVX1-i64-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; X64-AVX1-i64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-i64-NEXT: callq lrintl at PLT
+; X64-AVX1-i64-NEXT: vmovq %rax, %xmm0
+; X64-AVX1-i64-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-i64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-i64-NEXT: callq lrintl at PLT
+; X64-AVX1-i64-NEXT: vmovq %rax, %xmm0
+; X64-AVX1-i64-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX1-i64-NEXT: # xmm0 = xmm0[0],mem[0]
+; X64-AVX1-i64-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
+; X64-AVX1-i64-NEXT: addq $72, %rsp
+; X64-AVX1-i64-NEXT: retq
+;
+; AVX512-i64-LABEL: lrint_v4fp128:
+; AVX512-i64: # %bb.0:
+; AVX512-i64-NEXT: subq $72, %rsp
+; AVX512-i64-NEXT: vmovaps %xmm2, (%rsp) # 16-byte Spill
+; AVX512-i64-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-i64-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-i64-NEXT: vmovaps %xmm3, %xmm0
+; AVX512-i64-NEXT: callq lrintl at PLT
+; AVX512-i64-NEXT: vmovq %rax, %xmm0
+; AVX512-i64-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-i64-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; AVX512-i64-NEXT: callq lrintl at PLT
+; AVX512-i64-NEXT: vmovq %rax, %xmm0
+; AVX512-i64-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-i64-NEXT: # xmm0 = xmm0[0],mem[0]
+; AVX512-i64-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; AVX512-i64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-i64-NEXT: callq lrintl at PLT
+; AVX512-i64-NEXT: vmovq %rax, %xmm0
+; AVX512-i64-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-i64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-i64-NEXT: callq lrintl at PLT
+; AVX512-i64-NEXT: vmovq %rax, %xmm0
+; AVX512-i64-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-i64-NEXT: # xmm0 = xmm0[0],mem[0]
+; AVX512-i64-NEXT: vinserti128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
+; AVX512-i64-NEXT: addq $72, %rsp
+; AVX512-i64-NEXT: retq
+;
+; AVX512DQ-i64-LABEL: lrint_v4fp128:
+; AVX512DQ-i64: # %bb.0:
+; AVX512DQ-i64-NEXT: subq $72, %rsp
+; AVX512DQ-i64-NEXT: vmovaps %xmm2, (%rsp) # 16-byte Spill
+; AVX512DQ-i64-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-i64-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-i64-NEXT: vmovaps %xmm3, %xmm0
+; AVX512DQ-i64-NEXT: callq lrintl at PLT
+; AVX512DQ-i64-NEXT: vmovq %rax, %xmm0
+; AVX512DQ-i64-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-i64-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; AVX512DQ-i64-NEXT: callq lrintl at PLT
+; AVX512DQ-i64-NEXT: vmovq %rax, %xmm0
+; AVX512DQ-i64-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512DQ-i64-NEXT: # xmm0 = xmm0[0],mem[0]
+; AVX512DQ-i64-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; AVX512DQ-i64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512DQ-i64-NEXT: callq lrintl at PLT
+; AVX512DQ-i64-NEXT: vmovq %rax, %xmm0
+; AVX512DQ-i64-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-i64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512DQ-i64-NEXT: callq lrintl at PLT
+; AVX512DQ-i64-NEXT: vmovq %rax, %xmm0
+; AVX512DQ-i64-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512DQ-i64-NEXT: # xmm0 = xmm0[0],mem[0]
+; AVX512DQ-i64-NEXT: vinserti128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
+; AVX512DQ-i64-NEXT: addq $72, %rsp
+; AVX512DQ-i64-NEXT: retq
+ %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4fp128(<4 x fp128> %x)
+ ret <4 x iXLen> %a
+}
+declare <4 x iXLen> @llvm.lrint.v4iXLen.v4fp128(<4 x fp128>)
+
+define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) nounwind {
+; X86-I32-LABEL: lrint_v8fp128:
+; X86-I32: # %bb.0:
+; X86-I32-NEXT: pushl %ebp
+; X86-I32-NEXT: movl %esp, %ebp
+; X86-I32-NEXT: pushl %ebx
+; X86-I32-NEXT: pushl %edi
+; X86-I32-NEXT: pushl %esi
+; X86-I32-NEXT: andl $-16, %esp
+; X86-I32-NEXT: subl $32, %esp
+; X86-I32-NEXT: movl 8(%ebp), %esi
+; X86-I32-NEXT: movl 36(%ebp), %ebx
+; X86-I32-NEXT: movl 40(%ebp), %edi
+; X86-I32-NEXT: pushl 24(%ebp)
+; X86-I32-NEXT: pushl 20(%ebp)
+; X86-I32-NEXT: pushl 16(%ebp)
+; X86-I32-NEXT: pushl 12(%ebp)
+; X86-I32-NEXT: calll lrintl
+; X86-I32-NEXT: addl $16, %esp
+; X86-I32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I32-NEXT: pushl %edi
+; X86-I32-NEXT: pushl %ebx
+; X86-I32-NEXT: pushl 32(%ebp)
+; X86-I32-NEXT: pushl 28(%ebp)
+; X86-I32-NEXT: calll lrintl
+; X86-I32-NEXT: addl $16, %esp
+; X86-I32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I32-NEXT: pushl 56(%ebp)
+; X86-I32-NEXT: pushl 52(%ebp)
+; X86-I32-NEXT: pushl 48(%ebp)
+; X86-I32-NEXT: pushl 44(%ebp)
+; X86-I32-NEXT: calll lrintl
+; X86-I32-NEXT: addl $16, %esp
+; X86-I32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I32-NEXT: pushl 72(%ebp)
+; X86-I32-NEXT: pushl 68(%ebp)
+; X86-I32-NEXT: pushl 64(%ebp)
+; X86-I32-NEXT: pushl 60(%ebp)
+; X86-I32-NEXT: calll lrintl
+; X86-I32-NEXT: addl $16, %esp
+; X86-I32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I32-NEXT: pushl 88(%ebp)
+; X86-I32-NEXT: pushl 84(%ebp)
+; X86-I32-NEXT: pushl 80(%ebp)
+; X86-I32-NEXT: pushl 76(%ebp)
+; X86-I32-NEXT: calll lrintl
+; X86-I32-NEXT: addl $16, %esp
+; X86-I32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I32-NEXT: pushl 104(%ebp)
+; X86-I32-NEXT: pushl 100(%ebp)
+; X86-I32-NEXT: pushl 96(%ebp)
+; X86-I32-NEXT: pushl 92(%ebp)
+; X86-I32-NEXT: calll lrintl
+; X86-I32-NEXT: addl $16, %esp
+; X86-I32-NEXT: movl %eax, %ebx
+; X86-I32-NEXT: pushl 120(%ebp)
+; X86-I32-NEXT: pushl 116(%ebp)
+; X86-I32-NEXT: pushl 112(%ebp)
+; X86-I32-NEXT: pushl 108(%ebp)
+; X86-I32-NEXT: calll lrintl
+; X86-I32-NEXT: addl $16, %esp
+; X86-I32-NEXT: movl %eax, %edi
+; X86-I32-NEXT: pushl 136(%ebp)
+; X86-I32-NEXT: pushl 132(%ebp)
+; X86-I32-NEXT: pushl 128(%ebp)
+; X86-I32-NEXT: pushl 124(%ebp)
+; X86-I32-NEXT: calll lrintl
+; X86-I32-NEXT: addl $16, %esp
+; X86-I32-NEXT: movl %eax, 28(%esi)
+; X86-I32-NEXT: movl %edi, 24(%esi)
+; X86-I32-NEXT: movl %ebx, 20(%esi)
+; X86-I32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-I32-NEXT: movl %eax, 16(%esi)
+; X86-I32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-I32-NEXT: movl %eax, 12(%esi)
+; X86-I32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-I32-NEXT: movl %eax, 8(%esi)
+; X86-I32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-I32-NEXT: movl %eax, 4(%esi)
+; X86-I32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-I32-NEXT: movl %eax, (%esi)
+; X86-I32-NEXT: movl %esi, %eax
+; X86-I32-NEXT: leal -12(%ebp), %esp
+; X86-I32-NEXT: popl %esi
+; X86-I32-NEXT: popl %edi
+; X86-I32-NEXT: popl %ebx
+; X86-I32-NEXT: popl %ebp
+; X86-I32-NEXT: retl $4
+;
+; X86-I64-LABEL: lrint_v8fp128:
+; X86-I64: # %bb.0:
+; X86-I64-NEXT: pushl %ebp
+; X86-I64-NEXT: movl %esp, %ebp
+; X86-I64-NEXT: pushl %ebx
+; X86-I64-NEXT: pushl %edi
+; X86-I64-NEXT: pushl %esi
+; X86-I64-NEXT: andl $-16, %esp
+; X86-I64-NEXT: subl $64, %esp
+; X86-I64-NEXT: movl 8(%ebp), %esi
+; X86-I64-NEXT: movl 36(%ebp), %edi
+; X86-I64-NEXT: movl 40(%ebp), %ebx
+; X86-I64-NEXT: pushl 24(%ebp)
+; X86-I64-NEXT: pushl 20(%ebp)
+; X86-I64-NEXT: pushl 16(%ebp)
+; X86-I64-NEXT: pushl 12(%ebp)
+; X86-I64-NEXT: calll lrintl
+; X86-I64-NEXT: addl $16, %esp
+; X86-I64-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT: pushl %ebx
+; X86-I64-NEXT: pushl %edi
+; X86-I64-NEXT: pushl 32(%ebp)
+; X86-I64-NEXT: pushl 28(%ebp)
+; X86-I64-NEXT: calll lrintl
+; X86-I64-NEXT: addl $16, %esp
+; X86-I64-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT: pushl 56(%ebp)
+; X86-I64-NEXT: pushl 52(%ebp)
+; X86-I64-NEXT: pushl 48(%ebp)
+; X86-I64-NEXT: pushl 44(%ebp)
+; X86-I64-NEXT: calll lrintl
+; X86-I64-NEXT: addl $16, %esp
+; X86-I64-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT: pushl 72(%ebp)
+; X86-I64-NEXT: pushl 68(%ebp)
+; X86-I64-NEXT: pushl 64(%ebp)
+; X86-I64-NEXT: pushl 60(%ebp)
+; X86-I64-NEXT: calll lrintl
+; X86-I64-NEXT: addl $16, %esp
+; X86-I64-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT: pushl 88(%ebp)
+; X86-I64-NEXT: pushl 84(%ebp)
+; X86-I64-NEXT: pushl 80(%ebp)
+; X86-I64-NEXT: pushl 76(%ebp)
+; X86-I64-NEXT: calll lrintl
+; X86-I64-NEXT: addl $16, %esp
+; X86-I64-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT: pushl 104(%ebp)
+; X86-I64-NEXT: pushl 100(%ebp)
+; X86-I64-NEXT: pushl 96(%ebp)
+; X86-I64-NEXT: pushl 92(%ebp)
+; X86-I64-NEXT: calll lrintl
+; X86-I64-NEXT: addl $16, %esp
+; X86-I64-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT: pushl 120(%ebp)
+; X86-I64-NEXT: pushl 116(%ebp)
+; X86-I64-NEXT: pushl 112(%ebp)
+; X86-I64-NEXT: pushl 108(%ebp)
+; X86-I64-NEXT: calll lrintl
+; X86-I64-NEXT: addl $16, %esp
+; X86-I64-NEXT: movl %eax, %edi
+; X86-I64-NEXT: movl %edx, %ebx
+; X86-I64-NEXT: pushl 136(%ebp)
+; X86-I64-NEXT: pushl 132(%ebp)
+; X86-I64-NEXT: pushl 128(%ebp)
+; X86-I64-NEXT: pushl 124(%ebp)
+; X86-I64-NEXT: calll lrintl
+; X86-I64-NEXT: addl $16, %esp
+; X86-I64-NEXT: movl %edx, 60(%esi)
+; X86-I64-NEXT: movl %eax, 56(%esi)
+; X86-I64-NEXT: movl %ebx, 52(%esi)
+; X86-I64-NEXT: movl %edi, 48(%esi)
+; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-I64-NEXT: movl %eax, 44(%esi)
+; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-I64-NEXT: movl %eax, 40(%esi)
+; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-I64-NEXT: movl %eax, 36(%esi)
+; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-I64-NEXT: movl %eax, 32(%esi)
+; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-I64-NEXT: movl %eax, 28(%esi)
+; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-I64-NEXT: movl %eax, 24(%esi)
+; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-I64-NEXT: movl %eax, 20(%esi)
+; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-I64-NEXT: movl %eax, 16(%esi)
+; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-I64-NEXT: movl %eax, 12(%esi)
+; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-I64-NEXT: movl %eax, 8(%esi)
+; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-I64-NEXT: movl %eax, 4(%esi)
+; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-I64-NEXT: movl %eax, (%esi)
+; X86-I64-NEXT: movl %esi, %eax
+; X86-I64-NEXT: leal -12(%ebp), %esp
+; X86-I64-NEXT: popl %esi
+; X86-I64-NEXT: popl %edi
+; X86-I64-NEXT: popl %ebx
+; X86-I64-NEXT: popl %ebp
+; X86-I64-NEXT: retl $4
+;
+; X86-SSE2-LABEL: lrint_v8fp128:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: pushl %ebp
+; X86-SSE2-NEXT: movl %esp, %ebp
+; X86-SSE2-NEXT: pushl %ebx
+; X86-SSE2-NEXT: pushl %edi
+; X86-SSE2-NEXT: pushl %esi
+; X86-SSE2-NEXT: andl $-16, %esp
+; X86-SSE2-NEXT: subl $64, %esp
+; X86-SSE2-NEXT: movl 108(%ebp), %esi
+; X86-SSE2-NEXT: movl 112(%ebp), %edi
+; X86-SSE2-NEXT: movl 116(%ebp), %ebx
+; X86-SSE2-NEXT: pushl 100(%ebp)
+; X86-SSE2-NEXT: pushl 96(%ebp)
+; X86-SSE2-NEXT: pushl 92(%ebp)
+; X86-SSE2-NEXT: pushl 88(%ebp)
+; X86-SSE2-NEXT: calll lrintl
+; X86-SSE2-NEXT: addl $16, %esp
+; X86-SSE2-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SSE2-NEXT: pushl %ebx
+; X86-SSE2-NEXT: pushl %edi
+; X86-SSE2-NEXT: pushl %esi
+; X86-SSE2-NEXT: pushl 104(%ebp)
+; X86-SSE2-NEXT: calll lrintl
+; X86-SSE2-NEXT: addl $16, %esp
+; X86-SSE2-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SSE2-NEXT: pushl 132(%ebp)
+; X86-SSE2-NEXT: pushl 128(%ebp)
+; X86-SSE2-NEXT: pushl 124(%ebp)
+; X86-SSE2-NEXT: pushl 120(%ebp)
+; X86-SSE2-NEXT: calll lrintl
+; X86-SSE2-NEXT: addl $16, %esp
+; X86-SSE2-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X86-SSE2-NEXT: pushl 20(%ebp)
+; X86-SSE2-NEXT: pushl 16(%ebp)
+; X86-SSE2-NEXT: pushl 12(%ebp)
+; X86-SSE2-NEXT: pushl 8(%ebp)
+; X86-SSE2-NEXT: calll lrintl
+; X86-SSE2-NEXT: addl $16, %esp
+; X86-SSE2-NEXT: movl %eax, %esi
+; X86-SSE2-NEXT: pushl 36(%ebp)
+; X86-SSE2-NEXT: pushl 32(%ebp)
+; X86-SSE2-NEXT: pushl 28(%ebp)
+; X86-SSE2-NEXT: pushl 24(%ebp)
+; X86-SSE2-NEXT: calll lrintl
+; X86-SSE2-NEXT: addl $16, %esp
+; X86-SSE2-NEXT: movl %eax, %edi
+; X86-SSE2-NEXT: pushl 52(%ebp)
+; X86-SSE2-NEXT: pushl 48(%ebp)
+; X86-SSE2-NEXT: pushl 44(%ebp)
+; X86-SSE2-NEXT: pushl 40(%ebp)
+; X86-SSE2-NEXT: calll lrintl
+; X86-SSE2-NEXT: addl $16, %esp
+; X86-SSE2-NEXT: movl %eax, %ebx
+; X86-SSE2-NEXT: pushl 68(%ebp)
+; X86-SSE2-NEXT: pushl 64(%ebp)
+; X86-SSE2-NEXT: pushl 60(%ebp)
+; X86-SSE2-NEXT: pushl 56(%ebp)
+; X86-SSE2-NEXT: calll lrintl
+; X86-SSE2-NEXT: addl $16, %esp
+; X86-SSE2-NEXT: movd %eax, %xmm0
+; X86-SSE2-NEXT: movd %ebx, %xmm1
+; X86-SSE2-NEXT: movd %edi, %xmm2
+; X86-SSE2-NEXT: movd %esi, %xmm4
+; X86-SSE2-NEXT: movss (%esp), %xmm3 # 4-byte Reload
+; X86-SSE2-NEXT: # xmm3 = mem[0],zero,zero,zero
+; X86-SSE2-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm5 # 4-byte Reload
+; X86-SSE2-NEXT: # xmm5 = mem[0],zero,zero,zero
+; X86-SSE2-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm6 # 4-byte Reload
+; X86-SSE2-NEXT: # xmm6 = mem[0],zero,zero,zero
+; X86-SSE2-NEXT: movaps %xmm6, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
+; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm1[0]
+; X86-SSE2-NEXT: movdqa %xmm4, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-SSE2-NEXT: unpcklps {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1]
+; X86-SSE2-NEXT: movaps %xmm5, (%esp) # 16-byte Spill
+; X86-SSE2-NEXT: pushl 84(%ebp)
+; X86-SSE2-NEXT: pushl 80(%ebp)
+; X86-SSE2-NEXT: pushl 76(%ebp)
+; X86-SSE2-NEXT: pushl 72(%ebp)
+; X86-SSE2-NEXT: calll lrintl
+; X86-SSE2-NEXT: addl $16, %esp
+; X86-SSE2-NEXT: movd %eax, %xmm1
+; X86-SSE2-NEXT: punpckldq {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Folded Reload
+; X86-SSE2-NEXT: # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
+; X86-SSE2-NEXT: punpcklqdq (%esp), %xmm1 # 16-byte Folded Reload
+; X86-SSE2-NEXT: # xmm1 = xmm1[0],mem[0]
+; X86-SSE2-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-SSE2-NEXT: leal -12(%ebp), %esp
+; X86-SSE2-NEXT: popl %esi
+; X86-SSE2-NEXT: popl %edi
+; X86-SSE2-NEXT: popl %ebx
+; X86-SSE2-NEXT: popl %ebp
+; X86-SSE2-NEXT: retl
+;
+; X86-AVX1-LABEL: lrint_v8fp128:
+; X86-AVX1: # %bb.0:
+; X86-AVX1-NEXT: pushl %ebp
+; X86-AVX1-NEXT: movl %esp, %ebp
+; X86-AVX1-NEXT: pushl %ebx
+; X86-AVX1-NEXT: pushl %edi
+; X86-AVX1-NEXT: pushl %esi
+; X86-AVX1-NEXT: andl $-16, %esp
+; X86-AVX1-NEXT: subl $80, %esp
+; X86-AVX1-NEXT: vmovups 40(%ebp), %xmm0
+; X86-AVX1-NEXT: vmovups %xmm0, (%esp)
+; X86-AVX1-NEXT: calll lrintl
+; X86-AVX1-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-AVX1-NEXT: vmovups 24(%ebp), %xmm0
+; X86-AVX1-NEXT: vmovups %xmm0, (%esp)
+; X86-AVX1-NEXT: calll lrintl
+; X86-AVX1-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-AVX1-NEXT: vmovups 8(%ebp), %xmm0
+; X86-AVX1-NEXT: vmovups %xmm0, (%esp)
+; X86-AVX1-NEXT: calll lrintl
+; X86-AVX1-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-AVX1-NEXT: vmovups 120(%ebp), %xmm0
+; X86-AVX1-NEXT: vmovups %xmm0, (%esp)
+; X86-AVX1-NEXT: calll lrintl
+; X86-AVX1-NEXT: movl %eax, %esi
+; X86-AVX1-NEXT: vmovups 104(%ebp), %xmm0
+; X86-AVX1-NEXT: vmovups %xmm0, (%esp)
+; X86-AVX1-NEXT: calll lrintl
+; X86-AVX1-NEXT: movl %eax, %edi
+; X86-AVX1-NEXT: vmovups 88(%ebp), %xmm0
+; X86-AVX1-NEXT: vmovups %xmm0, (%esp)
+; X86-AVX1-NEXT: calll lrintl
+; X86-AVX1-NEXT: movl %eax, %ebx
+; X86-AVX1-NEXT: vmovups 72(%ebp), %xmm0
+; X86-AVX1-NEXT: vmovups %xmm0, (%esp)
+; X86-AVX1-NEXT: calll lrintl
+; X86-AVX1-NEXT: vmovd %eax, %xmm0
+; X86-AVX1-NEXT: vpinsrd $1, %ebx, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpinsrd $2, %edi, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpinsrd $3, %esi, %xmm0, %xmm0
+; X86-AVX1-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT: vmovd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Folded Reload
+; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT: vpinsrd $1, {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload
+; X86-AVX1-NEXT: vmovups 56(%ebp), %xmm1
+; X86-AVX1-NEXT: vmovups %xmm1, (%esp)
+; X86-AVX1-NEXT: vpinsrd $2, {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload
+; X86-AVX1-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT: calll lrintl
+; X86-AVX1-NEXT: vmovdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
+; X86-AVX1-NEXT: vinsertf128 $1, {{[-0-9]+}}(%e{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; X86-AVX1-NEXT: leal -12(%ebp), %esp
+; X86-AVX1-NEXT: popl %esi
+; X86-AVX1-NEXT: popl %edi
+; X86-AVX1-NEXT: popl %ebx
+; X86-AVX1-NEXT: popl %ebp
+; X86-AVX1-NEXT: retl
+;
+; X64-AVX1-i32-LABEL: lrint_v8fp128:
+; X64-AVX1-i32: # %bb.0:
+; X64-AVX1-i32-NEXT: pushq %rbx
+; X64-AVX1-i32-NEXT: subq $112, %rsp
+; X64-AVX1-i32-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-i32-NEXT: vmovaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-i32-NEXT: vmovaps %xmm4, (%rsp) # 16-byte Spill
+; X64-AVX1-i32-NEXT: vmovaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-i32-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-i32-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-i32-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-i32-NEXT: vmovaps %xmm5, %xmm0
+; X64-AVX1-i32-NEXT: callq lrintl at PLT
+; X64-AVX1-i32-NEXT: movl %eax, %ebx
+; X64-AVX1-i32-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; X64-AVX1-i32-NEXT: callq lrintl at PLT
+; X64-AVX1-i32-NEXT: vmovd %eax, %xmm0
+; X64-AVX1-i32-NEXT: vpinsrd $1, %ebx, %xmm0, %xmm0
+; X64-AVX1-i32-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; X64-AVX1-i32-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-i32-NEXT: callq lrintl at PLT
+; X64-AVX1-i32-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
+; X64-AVX1-i32-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0
+; X64-AVX1-i32-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-i32-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-i32-NEXT: callq lrintl at PLT
+; X64-AVX1-i32-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-i32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
+; X64-AVX1-i32-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-i32-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-i32-NEXT: callq lrintl at PLT
+; X64-AVX1-i32-NEXT: movl %eax, %ebx
+; X64-AVX1-i32-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-i32-NEXT: callq lrintl at PLT
+; X64-AVX1-i32-NEXT: vmovd %eax, %xmm0
+; X64-AVX1-i32-NEXT: vpinsrd $1, %ebx, %xmm0, %xmm0
+; X64-AVX1-i32-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-i32-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-i32-NEXT: callq lrintl at PLT
+; X64-AVX1-i32-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-i32-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0
+; X64-AVX1-i32-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-i32-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-i32-NEXT: callq lrintl at PLT
+; X64-AVX1-i32-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-i32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
+; X64-AVX1-i32-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; X64-AVX1-i32-NEXT: addq $112, %rsp
+; X64-AVX1-i32-NEXT: popq %rbx
+; X64-AVX1-i32-NEXT: retq
+;
+; X64-AVX1-i64-LABEL: lrint_v8fp128:
+; X64-AVX1-i64: # %bb.0:
+; X64-AVX1-i64-NEXT: subq $152, %rsp
+; X64-AVX1-i64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-i64-NEXT: vmovaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-i64-NEXT: vmovaps %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-i64-NEXT: vmovaps %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-i64-NEXT: vmovaps %xmm2, (%rsp) # 16-byte Spill
+; X64-AVX1-i64-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-i64-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-i64-NEXT: vmovaps %xmm3, %xmm0
+; X64-AVX1-i64-NEXT: callq lrintl at PLT
+; X64-AVX1-i64-NEXT: vmovq %rax, %xmm0
+; X64-AVX1-i64-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-i64-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; X64-AVX1-i64-NEXT: callq lrintl at PLT
+; X64-AVX1-i64-NEXT: vmovq %rax, %xmm0
+; X64-AVX1-i64-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX1-i64-NEXT: # xmm0 = xmm0[0],mem[0]
+; X64-AVX1-i64-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; X64-AVX1-i64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-i64-NEXT: callq lrintl at PLT
+; X64-AVX1-i64-NEXT: vmovq %rax, %xmm0
+; X64-AVX1-i64-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-i64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-i64-NEXT: callq lrintl at PLT
+; X64-AVX1-i64-NEXT: vmovq %rax, %xmm0
+; X64-AVX1-i64-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX1-i64-NEXT: # xmm0 = xmm0[0],mem[0]
+; X64-AVX1-i64-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
+; X64-AVX1-i64-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; X64-AVX1-i64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-i64-NEXT: vzeroupper
+; X64-AVX1-i64-NEXT: callq lrintl at PLT
+; X64-AVX1-i64-NEXT: vmovq %rax, %xmm0
+; X64-AVX1-i64-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-i64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-i64-NEXT: callq lrintl at PLT
+; X64-AVX1-i64-NEXT: vmovq %rax, %xmm0
+; X64-AVX1-i64-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX1-i64-NEXT: # xmm0 = xmm0[0],mem[0]
+; X64-AVX1-i64-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-i64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-i64-NEXT: callq lrintl at PLT
+; X64-AVX1-i64-NEXT: vmovq %rax, %xmm0
+; X64-AVX1-i64-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-i64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-i64-NEXT: callq lrintl at PLT
+; X64-AVX1-i64-NEXT: vmovq %rax, %xmm0
+; X64-AVX1-i64-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX1-i64-NEXT: # xmm0 = xmm0[0],mem[0]
+; X64-AVX1-i64-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm1 # 16-byte Folded Reload
+; X64-AVX1-i64-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
+; X64-AVX1-i64-NEXT: addq $152, %rsp
+; X64-AVX1-i64-NEXT: retq
+;
+; AVX512-i64-LABEL: lrint_v8fp128:
+; AVX512-i64: # %bb.0:
+; AVX512-i64-NEXT: subq $152, %rsp
+; AVX512-i64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill
+; AVX512-i64-NEXT: vmovaps %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-i64-NEXT: vmovaps %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-i64-NEXT: vmovaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-i64-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-i64-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-i64-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-i64-NEXT: vmovaps %xmm7, %xmm0
+; AVX512-i64-NEXT: callq lrintl at PLT
+; AVX512-i64-NEXT: vmovq %rax, %xmm0
+; AVX512-i64-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-i64-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; AVX512-i64-NEXT: callq lrintl at PLT
+; AVX512-i64-NEXT: vmovq %rax, %xmm0
+; AVX512-i64-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-i64-NEXT: # xmm0 = xmm0[0],mem[0]
+; AVX512-i64-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; AVX512-i64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-i64-NEXT: callq lrintl at PLT
+; AVX512-i64-NEXT: vmovq %rax, %xmm0
+; AVX512-i64-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-i64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-i64-NEXT: callq lrintl at PLT
+; AVX512-i64-NEXT: vmovq %rax, %xmm0
+; AVX512-i64-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-i64-NEXT: # xmm0 = xmm0[0],mem[0]
+; AVX512-i64-NEXT: vinserti128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
+; AVX512-i64-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; AVX512-i64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-i64-NEXT: vzeroupper
+; AVX512-i64-NEXT: callq lrintl at PLT
+; AVX512-i64-NEXT: vmovq %rax, %xmm0
+; AVX512-i64-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-i64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-i64-NEXT: callq lrintl at PLT
+; AVX512-i64-NEXT: vmovq %rax, %xmm0
+; AVX512-i64-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-i64-NEXT: # xmm0 = xmm0[0],mem[0]
+; AVX512-i64-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-i64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-i64-NEXT: callq lrintl at PLT
+; AVX512-i64-NEXT: vmovq %rax, %xmm0
+; AVX512-i64-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-i64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-i64-NEXT: callq lrintl at PLT
+; AVX512-i64-NEXT: vmovq %rax, %xmm0
+; AVX512-i64-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-i64-NEXT: # xmm0 = xmm0[0],mem[0]
+; AVX512-i64-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; AVX512-i64-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 32-byte Folded Reload
+; AVX512-i64-NEXT: addq $152, %rsp
+; AVX512-i64-NEXT: retq
+;
+; AVX512DQ-i64-LABEL: lrint_v8fp128:
+; AVX512DQ-i64: # %bb.0:
+; AVX512DQ-i64-NEXT: subq $152, %rsp
+; AVX512DQ-i64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill
+; AVX512DQ-i64-NEXT: vmovaps %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-i64-NEXT: vmovaps %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-i64-NEXT: vmovaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-i64-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-i64-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-i64-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-i64-NEXT: vmovaps %xmm7, %xmm0
+; AVX512DQ-i64-NEXT: callq lrintl at PLT
+; AVX512DQ-i64-NEXT: vmovq %rax, %xmm0
+; AVX512DQ-i64-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-i64-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; AVX512DQ-i64-NEXT: callq lrintl at PLT
+; AVX512DQ-i64-NEXT: vmovq %rax, %xmm0
+; AVX512DQ-i64-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512DQ-i64-NEXT: # xmm0 = xmm0[0],mem[0]
+; AVX512DQ-i64-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; AVX512DQ-i64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512DQ-i64-NEXT: callq lrintl at PLT
+; AVX512DQ-i64-NEXT: vmovq %rax, %xmm0
+; AVX512DQ-i64-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-i64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512DQ-i64-NEXT: callq lrintl at PLT
+; AVX512DQ-i64-NEXT: vmovq %rax, %xmm0
+; AVX512DQ-i64-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512DQ-i64-NEXT: # xmm0 = xmm0[0],mem[0]
+; AVX512DQ-i64-NEXT: vinserti128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
+; AVX512DQ-i64-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; AVX512DQ-i64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512DQ-i64-NEXT: vzeroupper
+; AVX512DQ-i64-NEXT: callq lrintl at PLT
+; AVX512DQ-i64-NEXT: vmovq %rax, %xmm0
+; AVX512DQ-i64-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-i64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512DQ-i64-NEXT: callq lrintl at PLT
+; AVX512DQ-i64-NEXT: vmovq %rax, %xmm0
+; AVX512DQ-i64-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512DQ-i64-NEXT: # xmm0 = xmm0[0],mem[0]
+; AVX512DQ-i64-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-i64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512DQ-i64-NEXT: callq lrintl at PLT
+; AVX512DQ-i64-NEXT: vmovq %rax, %xmm0
+; AVX512DQ-i64-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-i64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512DQ-i64-NEXT: callq lrintl at PLT
+; AVX512DQ-i64-NEXT: vmovq %rax, %xmm0
+; AVX512DQ-i64-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512DQ-i64-NEXT: # xmm0 = xmm0[0],mem[0]
+; AVX512DQ-i64-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; AVX512DQ-i64-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 32-byte Folded Reload
+; AVX512DQ-i64-NEXT: addq $152, %rsp
+; AVX512DQ-i64-NEXT: retq
+ %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8fp128(<8 x fp128> %x)
+ ret <8 x iXLen> %a
+}
+declare <8 x iXLen> @llvm.lrint.v8iXLen.v8fp128(<8 x fp128>)
>From 4658d1f796093c24d3ca4a42a8244b700c6dffef Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross at umich.edu>
Date: Fri, 8 Aug 2025 05:04:09 -0500
Subject: [PATCH 2/4] [SelectionDAG] Add `f16` soft promotion for `lrint` and
`lround`
On platforms that soft promote `half`, using `lrint` intrinsics crashes
with the following:
SoftPromoteHalfOperand Op #0: t5: i32 = lrint t4
LLVM ERROR: Do not know how to soft promote this operator's operand!
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0. Program arguments: /Users/tmgross/Documents/projects/llvm/llvm-build/bin/llc -mtriple=riscv32
1. Running pass 'Function Pass Manager' on module '<stdin>'.
2. Running pass 'RISC-V DAG->DAG Pattern Instruction Selection' on function '@test_lrint_ixx_f16'
Resolve this by adding a soft promotion.
`SoftPromoteHalfOp_FP_TO_XINT` is reused here since it provides the
correct input and output types. It is renamed `PromoteFloatOp_UnaryOp`
to match `PromoteFloatOp_UnaryOp` and similar functions that are used to
handle the same sets of intrinsics.
---
.../SelectionDAG/LegalizeFloatTypes.cpp | 17 +-
llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 2 +-
llvm/test/CodeGen/ARM/lrint-conv.ll | 15 +-
llvm/test/CodeGen/ARM/vector-lrint.ll | 5189 ++++++++++++++++-
llvm/test/CodeGen/LoongArch/lrint-conv.ll | 33 +-
llvm/test/CodeGen/Mips/llrint-conv.ll | 27 +-
llvm/test/CodeGen/Mips/lrint-conv.ll | 27 +-
llvm/test/CodeGen/RISCV/lrint-conv.ll | 25 +-
llvm/test/CodeGen/X86/lrint-conv-i32.ll | 52 +-
9 files changed, 5297 insertions(+), 90 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 2cad36eff9c88..f84e6c8291cce 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -20,6 +20,7 @@
#include "LegalizeTypes.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -3729,10 +3730,20 @@ bool DAGTypeLegalizer::SoftPromoteHalfOperand(SDNode *N, unsigned OpNo) {
Res = SoftPromoteHalfOp_FAKE_USE(N, OpNo);
break;
case ISD::FCOPYSIGN: Res = SoftPromoteHalfOp_FCOPYSIGN(N, OpNo); break;
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::LLRINT:
+ case ISD::LLROUND:
+ case ISD::LRINT:
+ case ISD::LROUND:
case ISD::STRICT_FP_TO_SINT:
case ISD::STRICT_FP_TO_UINT:
- case ISD::FP_TO_SINT:
- case ISD::FP_TO_UINT: Res = SoftPromoteHalfOp_FP_TO_XINT(N); break;
+ case ISD::STRICT_LLRINT:
+ case ISD::STRICT_LLROUND:
+ case ISD::STRICT_LRINT:
+ case ISD::STRICT_LROUND:
+ Res = SoftPromoteHalfOp_UnaryOp(N);
+ break;
case ISD::FP_TO_SINT_SAT:
case ISD::FP_TO_UINT_SAT:
Res = SoftPromoteHalfOp_FP_TO_XINT_SAT(N); break;
@@ -3811,7 +3822,7 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_EXTEND(SDNode *N) {
return DAG.getNode(GetPromotionOpcode(SVT, RVT), SDLoc(N), RVT, Op);
}
-SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_TO_XINT(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftPromoteHalfOp_UnaryOp(SDNode *N) {
EVT RVT = N->getValueType(0);
bool IsStrict = N->isStrictFPOpcode();
SDValue Op = N->getOperand(IsStrict ? 1 : 0);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 63544e63e1da1..8eb3cec8bc87a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -840,7 +840,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue SoftPromoteHalfOp_FAKE_USE(SDNode *N, unsigned OpNo);
SDValue SoftPromoteHalfOp_FCOPYSIGN(SDNode *N, unsigned OpNo);
SDValue SoftPromoteHalfOp_FP_EXTEND(SDNode *N);
- SDValue SoftPromoteHalfOp_FP_TO_XINT(SDNode *N);
+ SDValue SoftPromoteHalfOp_UnaryOp(SDNode *N);
SDValue SoftPromoteHalfOp_FP_TO_XINT_SAT(SDNode *N);
SDValue SoftPromoteHalfOp_SETCC(SDNode *N);
SDValue SoftPromoteHalfOp_SELECT_CC(SDNode *N, unsigned OpNo);
diff --git a/llvm/test/CodeGen/ARM/lrint-conv.ll b/llvm/test/CodeGen/ARM/lrint-conv.ll
index 9aa95112af533..848b14e48f2d1 100644
--- a/llvm/test/CodeGen/ARM/lrint-conv.ll
+++ b/llvm/test/CodeGen/ARM/lrint-conv.ll
@@ -1,12 +1,15 @@
; RUN: llc < %s -mtriple=arm-eabi -float-abi=soft | FileCheck %s --check-prefix=SOFTFP
; RUN: llc < %s -mtriple=arm-eabi -float-abi=hard | FileCheck %s --check-prefix=HARDFP
-; FIXME: crash
-; define i32 @testmswh_builtin(half %x) {
-; entry:
-; %0 = tail call i32 @llvm.lrint.i32.f16(half %x)
-; ret i32 %0
-; }
+; SOFTFP-LABEL: testmswh_builtin:
+; SOFTFP: bl lrintf
+; HARDFP-LABEL: testmswh_builtin:
+; HARDFP: bl lrintf
+define i32 @testmswh_builtin(half %x) {
+entry:
+ %0 = tail call i32 @llvm.lrint.i32.f16(half %x)
+ ret i32 %0
+}
; SOFTFP-LABEL: testmsws_builtin:
; SOFTFP: bl lrintf
diff --git a/llvm/test/CodeGen/ARM/vector-lrint.ll b/llvm/test/CodeGen/ARM/vector-lrint.ll
index 50c8b9ff6d913..076dc0bfbf4dc 100644
--- a/llvm/test/CodeGen/ARM/vector-lrint.ll
+++ b/llvm/test/CodeGen/ARM/vector-lrint.ll
@@ -9,42 +9,5163 @@
; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefix=BE-I32-NEON
; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefix=BE-I64-NEON
-; FIXME: crash "Do not know how to soft promote this operator's operand!"
-; define <1 x iXLen> @lrint_v1f16(<1 x half> %x) {
-; %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half> %x)
-; ret <1 x iXLen> %a
-; }
-; declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half>)
-
-; define <2 x iXLen> @lrint_v2f16(<2 x half> %x) {
-; %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half> %x)
-; ret <2 x iXLen> %a
-; }
-; declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half>)
-
-; define <4 x iXLen> @lrint_v4f16(<4 x half> %x) {
-; %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f16(<4 x half> %x)
-; ret <4 x iXLen> %a
-; }
-; declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f16(<4 x half>)
-
-; define <8 x iXLen> @lrint_v8f16(<8 x half> %x) {
-; %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f16(<8 x half> %x)
-; ret <8 x iXLen> %a
-; }
-; declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f16(<8 x half>)
+define <1 x iXLen> @lrint_v1f16(<1 x half> %x) {
+; LE-I32-LABEL: lrint_v1f16:
+; LE-I32: @ %bb.0:
+; LE-I32-NEXT: .save {r11, lr}
+; LE-I32-NEXT: push {r11, lr}
+; LE-I32-NEXT: vmov r0, s0
+; LE-I32-NEXT: bl __aeabi_f2h
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: pop {r11, pc}
+;
+; LE-I64-LABEL: lrint_v1f16:
+; LE-I64: @ %bb.0:
+; LE-I64-NEXT: .save {r11, lr}
+; LE-I64-NEXT: push {r11, lr}
+; LE-I64-NEXT: vmov r0, s0
+; LE-I64-NEXT: bl __aeabi_f2h
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d0[0], r0
+; LE-I64-NEXT: vmov.32 d0[1], r1
+; LE-I64-NEXT: pop {r11, pc}
+;
+; LE-I32-NEON-LABEL: lrint_v1f16:
+; LE-I32-NEON: @ %bb.0:
+; LE-I32-NEON-NEXT: .save {r11, lr}
+; LE-I32-NEON-NEXT: push {r11, lr}
+; LE-I32-NEON-NEXT: vmov r0, s0
+; LE-I32-NEON-NEXT: bl __aeabi_f2h
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: vmov s0, r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: pop {r11, pc}
+;
+; LE-I64-NEON-LABEL: lrint_v1f16:
+; LE-I64-NEON: @ %bb.0:
+; LE-I64-NEON-NEXT: .save {r11, lr}
+; LE-I64-NEON-NEXT: push {r11, lr}
+; LE-I64-NEON-NEXT: vmov r0, s0
+; LE-I64-NEON-NEXT: bl __aeabi_f2h
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: vmov s0, r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.32 d0[0], r0
+; LE-I64-NEON-NEXT: vmov.32 d0[1], r1
+; LE-I64-NEON-NEXT: pop {r11, pc}
+;
+; BE-I32-LABEL: lrint_v1f16:
+; BE-I32: @ %bb.0:
+; BE-I32-NEXT: .save {r11, lr}
+; BE-I32-NEXT: push {r11, lr}
+; BE-I32-NEXT: vmov r0, s0
+; BE-I32-NEXT: bl __aeabi_f2h
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: pop {r11, pc}
+;
+; BE-I64-LABEL: lrint_v1f16:
+; BE-I64: @ %bb.0:
+; BE-I64-NEXT: .save {r11, lr}
+; BE-I64-NEXT: push {r11, lr}
+; BE-I64-NEXT: vmov r0, s0
+; BE-I64-NEXT: bl __aeabi_f2h
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: vmov.32 d16[1], r1
+; BE-I64-NEXT: vrev64.32 d0, d16
+; BE-I64-NEXT: pop {r11, pc}
+;
+; BE-I32-NEON-LABEL: lrint_v1f16:
+; BE-I32-NEON: @ %bb.0:
+; BE-I32-NEON-NEXT: .save {r11, lr}
+; BE-I32-NEON-NEXT: push {r11, lr}
+; BE-I32-NEON-NEXT: vmov r0, s0
+; BE-I32-NEON-NEXT: bl __aeabi_f2h
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: vmov s0, r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: pop {r11, pc}
+;
+; BE-I64-NEON-LABEL: lrint_v1f16:
+; BE-I64-NEON: @ %bb.0:
+; BE-I64-NEON-NEXT: .save {r11, lr}
+; BE-I64-NEON-NEXT: push {r11, lr}
+; BE-I64-NEON-NEXT: vmov r0, s0
+; BE-I64-NEON-NEXT: bl __aeabi_f2h
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: vmov s0, r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEON-NEXT: vmov.32 d16[1], r1
+; BE-I64-NEON-NEXT: vrev64.32 d0, d16
+; BE-I64-NEON-NEXT: pop {r11, pc}
+ %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half> %x)
+ ret <1 x iXLen> %a
+}
+declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half>)
-; define <16 x iXLen> @lrint_v16f16(<16 x half> %x) {
-; %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half> %x)
-; ret <16 x iXLen> %a
-; }
-; declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half>)
+define <2 x iXLen> @lrint_v2f16(<2 x half> %x) {
+; LE-I32-LABEL: lrint_v2f16:
+; LE-I32: @ %bb.0:
+; LE-I32-NEXT: .save {r11, lr}
+; LE-I32-NEXT: push {r11, lr}
+; LE-I32-NEXT: .vsave {d8}
+; LE-I32-NEXT: vpush {d8}
+; LE-I32-NEXT: vmov r0, s0
+; LE-I32-NEXT: vmov.f32 s16, s1
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov r1, s16
+; LE-I32-NEXT: vmov.32 d8[0], r0
+; LE-I32-NEXT: mov r0, r1
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d8[1], r0
+; LE-I32-NEXT: vorr d0, d8, d8
+; LE-I32-NEXT: vpop {d8}
+; LE-I32-NEXT: pop {r11, pc}
+;
+; LE-I64-LABEL: lrint_v2f16:
+; LE-I64: @ %bb.0:
+; LE-I64-NEXT: .save {r4, r5, r11, lr}
+; LE-I64-NEXT: push {r4, r5, r11, lr}
+; LE-I64-NEXT: .vsave {d8, d9}
+; LE-I64-NEXT: vpush {d8, d9}
+; LE-I64-NEXT: vmov r0, s1
+; LE-I64-NEXT: vmov.f32 s16, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: mov r4, r0
+; LE-I64-NEXT: vmov r0, s16
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: vmov.32 d9[0], r4
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEXT: vmov.32 d9[1], r5
+; LE-I64-NEXT: vmov.32 d8[1], r1
+; LE-I64-NEXT: vorr q0, q4, q4
+; LE-I64-NEXT: vpop {d8, d9}
+; LE-I64-NEXT: pop {r4, r5, r11, pc}
+;
+; LE-I32-NEON-LABEL: lrint_v2f16:
+; LE-I32-NEON: @ %bb.0:
+; LE-I32-NEON-NEXT: .save {r11, lr}
+; LE-I32-NEON-NEXT: push {r11, lr}
+; LE-I32-NEON-NEXT: .vsave {d8}
+; LE-I32-NEON-NEXT: vpush {d8}
+; LE-I32-NEON-NEXT: vmov r0, s0
+; LE-I32-NEON-NEXT: vmov.f32 s16, s1
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: vmov s0, r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov r1, s16
+; LE-I32-NEON-NEXT: vmov.32 d8[0], r0
+; LE-I32-NEON-NEXT: mov r0, r1
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: vmov s0, r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.32 d8[1], r0
+; LE-I32-NEON-NEXT: vorr d0, d8, d8
+; LE-I32-NEON-NEXT: vpop {d8}
+; LE-I32-NEON-NEXT: pop {r11, pc}
+;
+; LE-I64-NEON-LABEL: lrint_v2f16:
+; LE-I64-NEON: @ %bb.0:
+; LE-I64-NEON-NEXT: .save {r4, r5, r11, lr}
+; LE-I64-NEON-NEXT: push {r4, r5, r11, lr}
+; LE-I64-NEON-NEXT: .vsave {d8, d9}
+; LE-I64-NEON-NEXT: vpush {d8, d9}
+; LE-I64-NEON-NEXT: vmov r0, s1
+; LE-I64-NEON-NEXT: vmov.f32 s16, s0
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: vmov s0, r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: mov r4, r0
+; LE-I64-NEON-NEXT: vmov r0, s16
+; LE-I64-NEON-NEXT: mov r5, r1
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: vmov s0, r0
+; LE-I64-NEON-NEXT: vmov.32 d9[0], r4
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT: vmov.32 d9[1], r5
+; LE-I64-NEON-NEXT: vmov.32 d8[1], r1
+; LE-I64-NEON-NEXT: vorr q0, q4, q4
+; LE-I64-NEON-NEXT: vpop {d8, d9}
+; LE-I64-NEON-NEXT: pop {r4, r5, r11, pc}
+;
+; BE-I32-LABEL: lrint_v2f16:
+; BE-I32: @ %bb.0:
+; BE-I32-NEXT: .save {r11, lr}
+; BE-I32-NEXT: push {r11, lr}
+; BE-I32-NEXT: .vsave {d8}
+; BE-I32-NEXT: vpush {d8}
+; BE-I32-NEXT: vmov r0, s0
+; BE-I32-NEXT: vmov.f32 s16, s1
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov r1, s16
+; BE-I32-NEXT: vmov.32 d8[0], r0
+; BE-I32-NEXT: mov r0, r1
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d8[1], r0
+; BE-I32-NEXT: vrev64.32 d0, d8
+; BE-I32-NEXT: vpop {d8}
+; BE-I32-NEXT: pop {r11, pc}
+;
+; BE-I64-LABEL: lrint_v2f16:
+; BE-I64: @ %bb.0:
+; BE-I64-NEXT: .save {r4, r5, r11, lr}
+; BE-I64-NEXT: push {r4, r5, r11, lr}
+; BE-I64-NEXT: .vsave {d8}
+; BE-I64-NEXT: vpush {d8}
+; BE-I64-NEXT: vmov r0, s1
+; BE-I64-NEXT: vmov.f32 s16, s0
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: mov r4, r0
+; BE-I64-NEXT: vmov r0, s16
+; BE-I64-NEXT: mov r5, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: vmov.32 d8[0], r4
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: vmov.32 d8[1], r5
+; BE-I64-NEXT: vmov.32 d16[1], r1
+; BE-I64-NEXT: vrev64.32 d1, d8
+; BE-I64-NEXT: vrev64.32 d0, d16
+; BE-I64-NEXT: vpop {d8}
+; BE-I64-NEXT: pop {r4, r5, r11, pc}
+;
+; BE-I32-NEON-LABEL: lrint_v2f16:
+; BE-I32-NEON: @ %bb.0:
+; BE-I32-NEON-NEXT: .save {r11, lr}
+; BE-I32-NEON-NEXT: push {r11, lr}
+; BE-I32-NEON-NEXT: .vsave {d8}
+; BE-I32-NEON-NEXT: vpush {d8}
+; BE-I32-NEON-NEXT: vmov r0, s0
+; BE-I32-NEON-NEXT: vmov.f32 s16, s1
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: vmov s0, r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov r1, s16
+; BE-I32-NEON-NEXT: vmov.32 d8[0], r0
+; BE-I32-NEON-NEXT: mov r0, r1
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: vmov s0, r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.32 d8[1], r0
+; BE-I32-NEON-NEXT: vrev64.32 d0, d8
+; BE-I32-NEON-NEXT: vpop {d8}
+; BE-I32-NEON-NEXT: pop {r11, pc}
+;
+; BE-I64-NEON-LABEL: lrint_v2f16:
+; BE-I64-NEON: @ %bb.0:
+; BE-I64-NEON-NEXT: .save {r4, r5, r11, lr}
+; BE-I64-NEON-NEXT: push {r4, r5, r11, lr}
+; BE-I64-NEON-NEXT: .vsave {d8}
+; BE-I64-NEON-NEXT: vpush {d8}
+; BE-I64-NEON-NEXT: vmov r0, s1
+; BE-I64-NEON-NEXT: vmov.f32 s16, s0
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: vmov s0, r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: mov r4, r0
+; BE-I64-NEON-NEXT: vmov r0, s16
+; BE-I64-NEON-NEXT: mov r5, r1
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: vmov s0, r0
+; BE-I64-NEON-NEXT: vmov.32 d8[0], r4
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEON-NEXT: vmov.32 d8[1], r5
+; BE-I64-NEON-NEXT: vmov.32 d16[1], r1
+; BE-I64-NEON-NEXT: vrev64.32 d1, d8
+; BE-I64-NEON-NEXT: vrev64.32 d0, d16
+; BE-I64-NEON-NEXT: vpop {d8}
+; BE-I64-NEON-NEXT: pop {r4, r5, r11, pc}
+ %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half> %x)
+ ret <2 x iXLen> %a
+}
+declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half>)
-; define <32 x iXLen> @lrint_v32f16(<32 x half> %x) {
-; %a = call <32 x iXLen> @llvm.lrint.v32iXLen.v32f16(<32 x half> %x)
-; ret <32 x iXLen> %a
-; }
-; declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f16(<32 x half>)
+define <4 x iXLen> @lrint_v4f16(<4 x half> %x) {
+; LE-I32-LABEL: lrint_v4f16:
+; LE-I32: @ %bb.0:
+; LE-I32-NEXT: .save {r4, r5, r11, lr}
+; LE-I32-NEXT: push {r4, r5, r11, lr}
+; LE-I32-NEXT: .vsave {d8, d9, d10, d11}
+; LE-I32-NEXT: vpush {d8, d9, d10, d11}
+; LE-I32-NEXT: vmov r0, s3
+; LE-I32-NEXT: vmov.f32 s16, s2
+; LE-I32-NEXT: vmov.f32 s18, s1
+; LE-I32-NEXT: vmov.f32 s20, s0
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: mov r4, r0
+; LE-I32-NEXT: vmov r0, s16
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r5, r0
+; LE-I32-NEXT: vmov r0, s20
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r5
+; LE-I32-NEXT: vmov.32 d10[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d11[0], r0
+; LE-I32-NEXT: vmov r0, s18
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: vmov.32 d11[1], r4
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d10[1], r0
+; LE-I32-NEXT: vorr q0, q5, q5
+; LE-I32-NEXT: vpop {d8, d9, d10, d11}
+; LE-I32-NEXT: pop {r4, r5, r11, pc}
+;
+; LE-I64-LABEL: lrint_v4f16:
+; LE-I64: @ %bb.0:
+; LE-I64-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; LE-I64-NEXT: push {r4, r5, r6, r7, r11, lr}
+; LE-I64-NEXT: .vsave {d12, d13}
+; LE-I64-NEXT: vpush {d12, d13}
+; LE-I64-NEXT: .vsave {d8, d9, d10}
+; LE-I64-NEXT: vpush {d8, d9, d10}
+; LE-I64-NEXT: vmov r0, s1
+; LE-I64-NEXT: vmov.f32 s16, s3
+; LE-I64-NEXT: vmov.f32 s20, s2
+; LE-I64-NEXT: vmov.f32 s18, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: mov r5, r0
+; LE-I64-NEXT: vmov r0, s18
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: mov r7, r0
+; LE-I64-NEXT: vmov r0, s16
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov s0, r7
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEXT: vmov r0, s20
+; LE-I64-NEXT: mov r7, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: vmov.32 d13[0], r5
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEXT: vmov.32 d13[1], r4
+; LE-I64-NEXT: vmov.32 d9[1], r6
+; LE-I64-NEXT: vmov.32 d12[1], r7
+; LE-I64-NEXT: vmov.32 d8[1], r1
+; LE-I64-NEXT: vorr q0, q6, q6
+; LE-I64-NEXT: vorr q1, q4, q4
+; LE-I64-NEXT: vpop {d8, d9, d10}
+; LE-I64-NEXT: vpop {d12, d13}
+; LE-I64-NEXT: pop {r4, r5, r6, r7, r11, pc}
+;
+; LE-I32-NEON-LABEL: lrint_v4f16:
+; LE-I32-NEON: @ %bb.0:
+; LE-I32-NEON-NEXT: .save {r4, r5, r11, lr}
+; LE-I32-NEON-NEXT: push {r4, r5, r11, lr}
+; LE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11}
+; LE-I32-NEON-NEXT: vpush {d8, d9, d10, d11}
+; LE-I32-NEON-NEXT: vmov r0, s3
+; LE-I32-NEON-NEXT: vmov.f32 s16, s2
+; LE-I32-NEON-NEXT: vmov.f32 s18, s1
+; LE-I32-NEON-NEXT: vmov.f32 s20, s0
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: vmov s0, r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: mov r4, r0
+; LE-I32-NEON-NEXT: vmov r0, s16
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: mov r5, r0
+; LE-I32-NEON-NEXT: vmov r0, s20
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: vmov s0, r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov s0, r5
+; LE-I32-NEON-NEXT: vmov.32 d10[0], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.32 d11[0], r0
+; LE-I32-NEON-NEXT: vmov r0, s18
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: vmov s0, r0
+; LE-I32-NEON-NEXT: vmov.32 d11[1], r4
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.32 d10[1], r0
+; LE-I32-NEON-NEXT: vorr q0, q5, q5
+; LE-I32-NEON-NEXT: vpop {d8, d9, d10, d11}
+; LE-I32-NEON-NEXT: pop {r4, r5, r11, pc}
+;
+; LE-I64-NEON-LABEL: lrint_v4f16:
+; LE-I64-NEON: @ %bb.0:
+; LE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; LE-I64-NEON-NEXT: push {r4, r5, r6, r7, r11, lr}
+; LE-I64-NEON-NEXT: .vsave {d12, d13}
+; LE-I64-NEON-NEXT: vpush {d12, d13}
+; LE-I64-NEON-NEXT: .vsave {d8, d9, d10}
+; LE-I64-NEON-NEXT: vpush {d8, d9, d10}
+; LE-I64-NEON-NEXT: vmov r0, s1
+; LE-I64-NEON-NEXT: vmov.f32 s16, s3
+; LE-I64-NEON-NEXT: vmov.f32 s20, s2
+; LE-I64-NEON-NEXT: vmov.f32 s18, s0
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: vmov s0, r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: mov r5, r0
+; LE-I64-NEON-NEXT: vmov r0, s18
+; LE-I64-NEON-NEXT: mov r4, r1
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: mov r7, r0
+; LE-I64-NEON-NEXT: vmov r0, s16
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: vmov s0, r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov s0, r7
+; LE-I64-NEON-NEXT: mov r6, r1
+; LE-I64-NEON-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT: vmov r0, s20
+; LE-I64-NEON-NEXT: mov r7, r1
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: vmov s0, r0
+; LE-I64-NEON-NEXT: vmov.32 d13[0], r5
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT: vmov.32 d13[1], r4
+; LE-I64-NEON-NEXT: vmov.32 d9[1], r6
+; LE-I64-NEON-NEXT: vmov.32 d12[1], r7
+; LE-I64-NEON-NEXT: vmov.32 d8[1], r1
+; LE-I64-NEON-NEXT: vorr q0, q6, q6
+; LE-I64-NEON-NEXT: vorr q1, q4, q4
+; LE-I64-NEON-NEXT: vpop {d8, d9, d10}
+; LE-I64-NEON-NEXT: vpop {d12, d13}
+; LE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc}
+;
+; BE-I32-LABEL: lrint_v4f16:
+; BE-I32: @ %bb.0:
+; BE-I32-NEXT: .save {r4, r5, r11, lr}
+; BE-I32-NEXT: push {r4, r5, r11, lr}
+; BE-I32-NEXT: .vsave {d8, d9, d10, d11}
+; BE-I32-NEXT: vpush {d8, d9, d10, d11}
+; BE-I32-NEXT: vmov r0, s3
+; BE-I32-NEXT: vmov.f32 s16, s2
+; BE-I32-NEXT: vmov.f32 s18, s1
+; BE-I32-NEXT: vmov.f32 s20, s0
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: mov r4, r0
+; BE-I32-NEXT: vmov r0, s16
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r5, r0
+; BE-I32-NEXT: vmov r0, s20
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r5
+; BE-I32-NEXT: vmov.32 d10[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d11[0], r0
+; BE-I32-NEXT: vmov r0, s18
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: vmov.32 d11[1], r4
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d10[1], r0
+; BE-I32-NEXT: vrev64.32 q0, q5
+; BE-I32-NEXT: vpop {d8, d9, d10, d11}
+; BE-I32-NEXT: pop {r4, r5, r11, pc}
+;
+; BE-I64-LABEL: lrint_v4f16:
+; BE-I64: @ %bb.0:
+; BE-I64-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; BE-I64-NEXT: push {r4, r5, r6, r7, r11, lr}
+; BE-I64-NEXT: .vsave {d8, d9, d10}
+; BE-I64-NEXT: vpush {d8, d9, d10}
+; BE-I64-NEXT: vmov r0, s1
+; BE-I64-NEXT: vmov.f32 s16, s3
+; BE-I64-NEXT: vmov.f32 s18, s2
+; BE-I64-NEXT: vmov.f32 s20, s0
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: mov r5, r0
+; BE-I64-NEXT: vmov r0, s20
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r7, r0
+; BE-I64-NEXT: vmov r0, s16
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov s0, r7
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: vmov.32 d8[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEXT: vmov r0, s18
+; BE-I64-NEXT: mov r7, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: vmov.32 d9[0], r5
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: vmov.32 d9[1], r4
+; BE-I64-NEXT: vmov.32 d8[1], r6
+; BE-I64-NEXT: vmov.32 d10[1], r7
+; BE-I64-NEXT: vmov.32 d16[1], r1
+; BE-I64-NEXT: vrev64.32 d1, d9
+; BE-I64-NEXT: vrev64.32 d3, d8
+; BE-I64-NEXT: vrev64.32 d0, d10
+; BE-I64-NEXT: vrev64.32 d2, d16
+; BE-I64-NEXT: vpop {d8, d9, d10}
+; BE-I64-NEXT: pop {r4, r5, r6, r7, r11, pc}
+;
+; BE-I32-NEON-LABEL: lrint_v4f16:
+; BE-I32-NEON: @ %bb.0:
+; BE-I32-NEON-NEXT: .save {r4, r5, r11, lr}
+; BE-I32-NEON-NEXT: push {r4, r5, r11, lr}
+; BE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11}
+; BE-I32-NEON-NEXT: vpush {d8, d9, d10, d11}
+; BE-I32-NEON-NEXT: vmov r0, s3
+; BE-I32-NEON-NEXT: vmov.f32 s16, s2
+; BE-I32-NEON-NEXT: vmov.f32 s18, s1
+; BE-I32-NEON-NEXT: vmov.f32 s20, s0
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: vmov s0, r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: mov r4, r0
+; BE-I32-NEON-NEXT: vmov r0, s16
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: mov r5, r0
+; BE-I32-NEON-NEXT: vmov r0, s20
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: vmov s0, r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov s0, r5
+; BE-I32-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.32 d11[0], r0
+; BE-I32-NEON-NEXT: vmov r0, s18
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: vmov s0, r0
+; BE-I32-NEON-NEXT: vmov.32 d11[1], r4
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.32 d10[1], r0
+; BE-I32-NEON-NEXT: vrev64.32 q0, q5
+; BE-I32-NEON-NEXT: vpop {d8, d9, d10, d11}
+; BE-I32-NEON-NEXT: pop {r4, r5, r11, pc}
+;
+; BE-I64-NEON-LABEL: lrint_v4f16:
+; BE-I64-NEON: @ %bb.0:
+; BE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; BE-I64-NEON-NEXT: push {r4, r5, r6, r7, r11, lr}
+; BE-I64-NEON-NEXT: .vsave {d8, d9, d10}
+; BE-I64-NEON-NEXT: vpush {d8, d9, d10}
+; BE-I64-NEON-NEXT: vmov r0, s1
+; BE-I64-NEON-NEXT: vmov.f32 s16, s3
+; BE-I64-NEON-NEXT: vmov.f32 s18, s2
+; BE-I64-NEON-NEXT: vmov.f32 s20, s0
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: vmov s0, r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: mov r5, r0
+; BE-I64-NEON-NEXT: vmov r0, s20
+; BE-I64-NEON-NEXT: mov r4, r1
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: mov r7, r0
+; BE-I64-NEON-NEXT: vmov r0, s16
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: vmov s0, r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov s0, r7
+; BE-I64-NEON-NEXT: mov r6, r1
+; BE-I64-NEON-NEXT: vmov.32 d8[0], r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT: vmov r0, s18
+; BE-I64-NEON-NEXT: mov r7, r1
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: vmov s0, r0
+; BE-I64-NEON-NEXT: vmov.32 d9[0], r5
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEON-NEXT: vmov.32 d9[1], r4
+; BE-I64-NEON-NEXT: vmov.32 d8[1], r6
+; BE-I64-NEON-NEXT: vmov.32 d10[1], r7
+; BE-I64-NEON-NEXT: vmov.32 d16[1], r1
+; BE-I64-NEON-NEXT: vrev64.32 d1, d9
+; BE-I64-NEON-NEXT: vrev64.32 d3, d8
+; BE-I64-NEON-NEXT: vrev64.32 d0, d10
+; BE-I64-NEON-NEXT: vrev64.32 d2, d16
+; BE-I64-NEON-NEXT: vpop {d8, d9, d10}
+; BE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc}
+ %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f16(<4 x half> %x)
+ ret <4 x iXLen> %a
+}
+declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f16(<4 x half>)
+
+define <8 x iXLen> @lrint_v8f16(<8 x half> %x) {
+; LE-I32-LABEL: lrint_v8f16:
+; LE-I32: @ %bb.0:
+; LE-I32-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; LE-I32-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
+; LE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14}
+; LE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14}
+; LE-I32-NEXT: vmov r0, s7
+; LE-I32-NEXT: vmov.f32 s18, s6
+; LE-I32-NEXT: vmov.f32 s16, s5
+; LE-I32-NEXT: vmov.f32 s20, s4
+; LE-I32-NEXT: vmov.f32 s22, s3
+; LE-I32-NEXT: vmov.f32 s24, s2
+; LE-I32-NEXT: vmov.f32 s26, s1
+; LE-I32-NEXT: vmov.f32 s28, s0
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: mov r8, r0
+; LE-I32-NEXT: vmov r0, s26
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r9, r0
+; LE-I32-NEXT: vmov r0, s22
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r6, r0
+; LE-I32-NEXT: vmov r0, s28
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r7, r0
+; LE-I32-NEXT: vmov r0, s24
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r4, r0
+; LE-I32-NEXT: vmov r0, s18
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r5, r0
+; LE-I32-NEXT: vmov r0, s20
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r5
+; LE-I32-NEXT: vmov.32 d10[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r4
+; LE-I32-NEXT: vmov.32 d11[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r7
+; LE-I32-NEXT: vmov.32 d13[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r6
+; LE-I32-NEXT: vmov.32 d12[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r9
+; LE-I32-NEXT: vmov.32 d13[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d12[1], r0
+; LE-I32-NEXT: vmov r0, s16
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: vmov.32 d11[1], r8
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d10[1], r0
+; LE-I32-NEXT: vorr q0, q6, q6
+; LE-I32-NEXT: vorr q1, q5, q5
+; LE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14}
+; LE-I32-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
+;
+; LE-I64-LABEL: lrint_v8f16:
+; LE-I64: @ %bb.0:
+; LE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT: .pad #4
+; LE-I64-NEXT: sub sp, sp, #4
+; LE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: .pad #8
+; LE-I64-NEXT: sub sp, sp, #8
+; LE-I64-NEXT: vmov r0, s1
+; LE-I64-NEXT: vstr s6, [sp, #4] @ 4-byte Spill
+; LE-I64-NEXT: vmov.f32 s16, s7
+; LE-I64-NEXT: vmov.f32 s18, s5
+; LE-I64-NEXT: vmov.f32 s20, s4
+; LE-I64-NEXT: vmov.f32 s22, s3
+; LE-I64-NEXT: vmov.f32 s24, s2
+; LE-I64-NEXT: vmov.f32 s26, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: mov r9, r0
+; LE-I64-NEXT: vmov r0, s26
+; LE-I64-NEXT: str r1, [sp] @ 4-byte Spill
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: mov r10, r0
+; LE-I64-NEXT: vmov r0, s22
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: mov r5, r0
+; LE-I64-NEXT: vmov r0, s24
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: mov r7, r0
+; LE-I64-NEXT: vmov r0, s18
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: mov r6, r0
+; LE-I64-NEXT: vmov r0, s20
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: mov r4, r0
+; LE-I64-NEXT: vmov r0, s16
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov s0, r4
+; LE-I64-NEXT: mov r11, r1
+; LE-I64-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov s0, r6
+; LE-I64-NEXT: mov r8, r1
+; LE-I64-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov s0, r7
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov s0, r5
+; LE-I64-NEXT: mov r7, r1
+; LE-I64-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov s0, r10
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vldr s0, [sp, #4] @ 4-byte Reload
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEXT: vmov r0, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: vmov.32 d9[0], r9
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEXT: ldr r0, [sp] @ 4-byte Reload
+; LE-I64-NEXT: vmov.32 d15[1], r5
+; LE-I64-NEXT: vmov.32 d9[1], r0
+; LE-I64-NEXT: vmov.32 d13[1], r6
+; LE-I64-NEXT: vmov.32 d11[1], r11
+; LE-I64-NEXT: vmov.32 d8[1], r4
+; LE-I64-NEXT: vmov.32 d14[1], r7
+; LE-I64-NEXT: vorr q0, q4, q4
+; LE-I64-NEXT: vmov.32 d12[1], r8
+; LE-I64-NEXT: vorr q1, q7, q7
+; LE-I64-NEXT: vmov.32 d10[1], r1
+; LE-I64-NEXT: vorr q2, q6, q6
+; LE-I64-NEXT: vorr q3, q5, q5
+; LE-I64-NEXT: add sp, sp, #8
+; LE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: add sp, sp, #4
+; LE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; LE-I32-NEON-LABEL: lrint_v8f16:
+; LE-I32-NEON: @ %bb.0:
+; LE-I32-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; LE-I32-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
+; LE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14}
+; LE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14}
+; LE-I32-NEON-NEXT: vmov r0, s7
+; LE-I32-NEON-NEXT: vmov.f32 s18, s6
+; LE-I32-NEON-NEXT: vmov.f32 s16, s5
+; LE-I32-NEON-NEXT: vmov.f32 s20, s4
+; LE-I32-NEON-NEXT: vmov.f32 s22, s3
+; LE-I32-NEON-NEXT: vmov.f32 s24, s2
+; LE-I32-NEON-NEXT: vmov.f32 s26, s1
+; LE-I32-NEON-NEXT: vmov.f32 s28, s0
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: vmov s0, r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: mov r8, r0
+; LE-I32-NEON-NEXT: vmov r0, s26
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: mov r9, r0
+; LE-I32-NEON-NEXT: vmov r0, s22
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: mov r6, r0
+; LE-I32-NEON-NEXT: vmov r0, s28
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: mov r7, r0
+; LE-I32-NEON-NEXT: vmov r0, s24
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: mov r4, r0
+; LE-I32-NEON-NEXT: vmov r0, s18
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: mov r5, r0
+; LE-I32-NEON-NEXT: vmov r0, s20
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: vmov s0, r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov s0, r5
+; LE-I32-NEON-NEXT: vmov.32 d10[0], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov s0, r4
+; LE-I32-NEON-NEXT: vmov.32 d11[0], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov s0, r7
+; LE-I32-NEON-NEXT: vmov.32 d13[0], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov s0, r6
+; LE-I32-NEON-NEXT: vmov.32 d12[0], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov s0, r9
+; LE-I32-NEON-NEXT: vmov.32 d13[1], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.32 d12[1], r0
+; LE-I32-NEON-NEXT: vmov r0, s16
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: vmov s0, r0
+; LE-I32-NEON-NEXT: vmov.32 d11[1], r8
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.32 d10[1], r0
+; LE-I32-NEON-NEXT: vorr q0, q6, q6
+; LE-I32-NEON-NEXT: vorr q1, q5, q5
+; LE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14}
+; LE-I32-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
+;
+; LE-I64-NEON-LABEL: lrint_v8f16:
+; LE-I64-NEON: @ %bb.0:
+; LE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEON-NEXT: .pad #4
+; LE-I64-NEON-NEXT: sub sp, sp, #4
+; LE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT: .pad #8
+; LE-I64-NEON-NEXT: sub sp, sp, #8
+; LE-I64-NEON-NEXT: vmov r0, s1
+; LE-I64-NEON-NEXT: vstr s6, [sp, #4] @ 4-byte Spill
+; LE-I64-NEON-NEXT: vmov.f32 s16, s7
+; LE-I64-NEON-NEXT: vmov.f32 s18, s5
+; LE-I64-NEON-NEXT: vmov.f32 s20, s4
+; LE-I64-NEON-NEXT: vmov.f32 s22, s3
+; LE-I64-NEON-NEXT: vmov.f32 s24, s2
+; LE-I64-NEON-NEXT: vmov.f32 s26, s0
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: vmov s0, r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: mov r9, r0
+; LE-I64-NEON-NEXT: vmov r0, s26
+; LE-I64-NEON-NEXT: str r1, [sp] @ 4-byte Spill
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: mov r10, r0
+; LE-I64-NEON-NEXT: vmov r0, s22
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: mov r5, r0
+; LE-I64-NEON-NEXT: vmov r0, s24
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: mov r7, r0
+; LE-I64-NEON-NEXT: vmov r0, s18
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: mov r6, r0
+; LE-I64-NEON-NEXT: vmov r0, s20
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: mov r4, r0
+; LE-I64-NEON-NEXT: vmov r0, s16
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: vmov s0, r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov s0, r4
+; LE-I64-NEON-NEXT: mov r11, r1
+; LE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov s0, r6
+; LE-I64-NEON-NEXT: mov r8, r1
+; LE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov s0, r7
+; LE-I64-NEON-NEXT: mov r6, r1
+; LE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov s0, r5
+; LE-I64-NEON-NEXT: mov r7, r1
+; LE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov s0, r10
+; LE-I64-NEON-NEXT: mov r5, r1
+; LE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vldr s0, [sp, #4] @ 4-byte Reload
+; LE-I64-NEON-NEXT: mov r4, r1
+; LE-I64-NEON-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT: vmov r0, s0
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: vmov s0, r0
+; LE-I64-NEON-NEXT: vmov.32 d9[0], r9
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT: ldr r0, [sp] @ 4-byte Reload
+; LE-I64-NEON-NEXT: vmov.32 d15[1], r5
+; LE-I64-NEON-NEXT: vmov.32 d9[1], r0
+; LE-I64-NEON-NEXT: vmov.32 d13[1], r6
+; LE-I64-NEON-NEXT: vmov.32 d11[1], r11
+; LE-I64-NEON-NEXT: vmov.32 d8[1], r4
+; LE-I64-NEON-NEXT: vmov.32 d14[1], r7
+; LE-I64-NEON-NEXT: vorr q0, q4, q4
+; LE-I64-NEON-NEXT: vmov.32 d12[1], r8
+; LE-I64-NEON-NEXT: vorr q1, q7, q7
+; LE-I64-NEON-NEXT: vmov.32 d10[1], r1
+; LE-I64-NEON-NEXT: vorr q2, q6, q6
+; LE-I64-NEON-NEXT: vorr q3, q5, q5
+; LE-I64-NEON-NEXT: add sp, sp, #8
+; LE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT: add sp, sp, #4
+; LE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I32-LABEL: lrint_v8f16:
+; BE-I32: @ %bb.0:
+; BE-I32-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; BE-I32-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
+; BE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14}
+; BE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14}
+; BE-I32-NEXT: vmov r0, s1
+; BE-I32-NEXT: vmov.f32 s18, s7
+; BE-I32-NEXT: vmov.f32 s20, s6
+; BE-I32-NEXT: vmov.f32 s16, s5
+; BE-I32-NEXT: vmov.f32 s22, s4
+; BE-I32-NEXT: vmov.f32 s24, s3
+; BE-I32-NEXT: vmov.f32 s26, s2
+; BE-I32-NEXT: vmov.f32 s28, s0
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: mov r8, r0
+; BE-I32-NEXT: vmov r0, s24
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r9, r0
+; BE-I32-NEXT: vmov r0, s18
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r6, r0
+; BE-I32-NEXT: vmov r0, s26
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r7, r0
+; BE-I32-NEXT: vmov r0, s20
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r4, r0
+; BE-I32-NEXT: vmov r0, s28
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r5, r0
+; BE-I32-NEXT: vmov r0, s22
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r5
+; BE-I32-NEXT: vmov.32 d10[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r4
+; BE-I32-NEXT: vmov.32 d12[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r7
+; BE-I32-NEXT: vmov.32 d11[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r6
+; BE-I32-NEXT: vmov.32 d13[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r9
+; BE-I32-NEXT: vmov.32 d11[1], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d13[1], r0
+; BE-I32-NEXT: vmov r0, s16
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: vmov.32 d12[1], r8
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d10[1], r0
+; BE-I32-NEXT: vrev64.32 q0, q6
+; BE-I32-NEXT: vrev64.32 q1, q5
+; BE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14}
+; BE-I32-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
+;
+; BE-I64-LABEL: lrint_v8f16:
+; BE-I64: @ %bb.0:
+; BE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT: .pad #4
+; BE-I64-NEXT: sub sp, sp, #4
+; BE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14}
+; BE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14}
+; BE-I64-NEXT: .pad #8
+; BE-I64-NEXT: sub sp, sp, #8
+; BE-I64-NEXT: vmov r0, s1
+; BE-I64-NEXT: vmov.f32 s18, s7
+; BE-I64-NEXT: vmov.f32 s16, s6
+; BE-I64-NEXT: vmov.f32 s20, s5
+; BE-I64-NEXT: vmov.f32 s22, s4
+; BE-I64-NEXT: vmov.f32 s24, s3
+; BE-I64-NEXT: vmov.f32 s26, s2
+; BE-I64-NEXT: vmov.f32 s28, s0
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: mov r9, r0
+; BE-I64-NEXT: vmov r0, s28
+; BE-I64-NEXT: str r1, [sp, #4] @ 4-byte Spill
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r10, r0
+; BE-I64-NEXT: vmov r0, s24
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r5, r0
+; BE-I64-NEXT: vmov r0, s26
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r7, r0
+; BE-I64-NEXT: vmov r0, s20
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r6, r0
+; BE-I64-NEXT: vmov r0, s22
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r4, r0
+; BE-I64-NEXT: vmov r0, s18
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov s0, r4
+; BE-I64-NEXT: mov r11, r1
+; BE-I64-NEXT: vmov.32 d9[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov s0, r6
+; BE-I64-NEXT: mov r8, r1
+; BE-I64-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov s0, r7
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov s0, r5
+; BE-I64-NEXT: mov r7, r1
+; BE-I64-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov s0, r10
+; BE-I64-NEXT: mov r5, r1
+; BE-I64-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEXT: vmov r0, s16
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: vmov.32 d8[0], r9
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
+; BE-I64-NEXT: vmov.32 d13[1], r5
+; BE-I64-NEXT: vmov.32 d8[1], r0
+; BE-I64-NEXT: vmov.32 d11[1], r6
+; BE-I64-NEXT: vmov.32 d9[1], r11
+; BE-I64-NEXT: vmov.32 d14[1], r4
+; BE-I64-NEXT: vmov.32 d12[1], r7
+; BE-I64-NEXT: vmov.32 d10[1], r8
+; BE-I64-NEXT: vmov.32 d16[1], r1
+; BE-I64-NEXT: vrev64.32 d1, d8
+; BE-I64-NEXT: vrev64.32 d3, d13
+; BE-I64-NEXT: vrev64.32 d5, d11
+; BE-I64-NEXT: vrev64.32 d7, d9
+; BE-I64-NEXT: vrev64.32 d0, d14
+; BE-I64-NEXT: vrev64.32 d2, d12
+; BE-I64-NEXT: vrev64.32 d4, d10
+; BE-I64-NEXT: vrev64.32 d6, d16
+; BE-I64-NEXT: add sp, sp, #8
+; BE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14}
+; BE-I64-NEXT: add sp, sp, #4
+; BE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I32-NEON-LABEL: lrint_v8f16:
+; BE-I32-NEON: @ %bb.0:
+; BE-I32-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; BE-I32-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
+; BE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14}
+; BE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14}
+; BE-I32-NEON-NEXT: vmov r0, s1
+; BE-I32-NEON-NEXT: vmov.f32 s18, s7
+; BE-I32-NEON-NEXT: vmov.f32 s20, s6
+; BE-I32-NEON-NEXT: vmov.f32 s16, s5
+; BE-I32-NEON-NEXT: vmov.f32 s22, s4
+; BE-I32-NEON-NEXT: vmov.f32 s24, s3
+; BE-I32-NEON-NEXT: vmov.f32 s26, s2
+; BE-I32-NEON-NEXT: vmov.f32 s28, s0
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: vmov s0, r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: mov r8, r0
+; BE-I32-NEON-NEXT: vmov r0, s24
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: mov r9, r0
+; BE-I32-NEON-NEXT: vmov r0, s18
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: mov r6, r0
+; BE-I32-NEON-NEXT: vmov r0, s26
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: mov r7, r0
+; BE-I32-NEON-NEXT: vmov r0, s20
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: mov r4, r0
+; BE-I32-NEON-NEXT: vmov r0, s28
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: mov r5, r0
+; BE-I32-NEON-NEXT: vmov r0, s22
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: vmov s0, r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov s0, r5
+; BE-I32-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov s0, r4
+; BE-I32-NEON-NEXT: vmov.32 d12[0], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov s0, r7
+; BE-I32-NEON-NEXT: vmov.32 d11[0], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov s0, r6
+; BE-I32-NEON-NEXT: vmov.32 d13[0], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov s0, r9
+; BE-I32-NEON-NEXT: vmov.32 d11[1], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.32 d13[1], r0
+; BE-I32-NEON-NEXT: vmov r0, s16
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: vmov s0, r0
+; BE-I32-NEON-NEXT: vmov.32 d12[1], r8
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.32 d10[1], r0
+; BE-I32-NEON-NEXT: vrev64.32 q0, q6
+; BE-I32-NEON-NEXT: vrev64.32 q1, q5
+; BE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14}
+; BE-I32-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
+;
+; BE-I64-NEON-LABEL: lrint_v8f16:
+; BE-I64-NEON: @ %bb.0:
+; BE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEON-NEXT: .pad #4
+; BE-I64-NEON-NEXT: sub sp, sp, #4
+; BE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14}
+; BE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14}
+; BE-I64-NEON-NEXT: .pad #8
+; BE-I64-NEON-NEXT: sub sp, sp, #8
+; BE-I64-NEON-NEXT: vmov r0, s1
+; BE-I64-NEON-NEXT: vmov.f32 s18, s7
+; BE-I64-NEON-NEXT: vmov.f32 s16, s6
+; BE-I64-NEON-NEXT: vmov.f32 s20, s5
+; BE-I64-NEON-NEXT: vmov.f32 s22, s4
+; BE-I64-NEON-NEXT: vmov.f32 s24, s3
+; BE-I64-NEON-NEXT: vmov.f32 s26, s2
+; BE-I64-NEON-NEXT: vmov.f32 s28, s0
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: vmov s0, r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: mov r9, r0
+; BE-I64-NEON-NEXT: vmov r0, s28
+; BE-I64-NEON-NEXT: str r1, [sp, #4] @ 4-byte Spill
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: mov r10, r0
+; BE-I64-NEON-NEXT: vmov r0, s24
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: mov r5, r0
+; BE-I64-NEON-NEXT: vmov r0, s26
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: mov r7, r0
+; BE-I64-NEON-NEXT: vmov r0, s20
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: mov r6, r0
+; BE-I64-NEON-NEXT: vmov r0, s22
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: mov r4, r0
+; BE-I64-NEON-NEXT: vmov r0, s18
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: vmov s0, r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov s0, r4
+; BE-I64-NEON-NEXT: mov r11, r1
+; BE-I64-NEON-NEXT: vmov.32 d9[0], r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov s0, r6
+; BE-I64-NEON-NEXT: mov r8, r1
+; BE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov s0, r7
+; BE-I64-NEON-NEXT: mov r6, r1
+; BE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov s0, r5
+; BE-I64-NEON-NEXT: mov r7, r1
+; BE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov s0, r10
+; BE-I64-NEON-NEXT: mov r5, r1
+; BE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT: vmov r0, s16
+; BE-I64-NEON-NEXT: mov r4, r1
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: vmov s0, r0
+; BE-I64-NEON-NEXT: vmov.32 d8[0], r9
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEON-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
+; BE-I64-NEON-NEXT: vmov.32 d13[1], r5
+; BE-I64-NEON-NEXT: vmov.32 d8[1], r0
+; BE-I64-NEON-NEXT: vmov.32 d11[1], r6
+; BE-I64-NEON-NEXT: vmov.32 d9[1], r11
+; BE-I64-NEON-NEXT: vmov.32 d14[1], r4
+; BE-I64-NEON-NEXT: vmov.32 d12[1], r7
+; BE-I64-NEON-NEXT: vmov.32 d10[1], r8
+; BE-I64-NEON-NEXT: vmov.32 d16[1], r1
+; BE-I64-NEON-NEXT: vrev64.32 d1, d8
+; BE-I64-NEON-NEXT: vrev64.32 d3, d13
+; BE-I64-NEON-NEXT: vrev64.32 d5, d11
+; BE-I64-NEON-NEXT: vrev64.32 d7, d9
+; BE-I64-NEON-NEXT: vrev64.32 d0, d14
+; BE-I64-NEON-NEXT: vrev64.32 d2, d12
+; BE-I64-NEON-NEXT: vrev64.32 d4, d10
+; BE-I64-NEON-NEXT: vrev64.32 d6, d16
+; BE-I64-NEON-NEXT: add sp, sp, #8
+; BE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14}
+; BE-I64-NEON-NEXT: add sp, sp, #4
+; BE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+ %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f16(<8 x half> %x)
+ ret <8 x iXLen> %a
+}
+declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f16(<8 x half>)
+
+define <16 x iXLen> @lrint_v16f16(<16 x half> %x) {
+; LE-I32-LABEL: lrint_v16f16:
+; LE-I32: @ %bb.0:
+; LE-I32-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I32-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT: .pad #8
+; LE-I32-NEXT: sub sp, sp, #8
+; LE-I32-NEXT: vmov r0, s15
+; LE-I32-NEXT: vstr s13, [sp, #4] @ 4-byte Spill
+; LE-I32-NEXT: vmov.f32 s26, s14
+; LE-I32-NEXT: vstr s0, [sp] @ 4-byte Spill
+; LE-I32-NEXT: vmov.f32 s20, s12
+; LE-I32-NEXT: vmov.f32 s22, s11
+; LE-I32-NEXT: vmov.f32 s18, s10
+; LE-I32-NEXT: vmov.f32 s17, s9
+; LE-I32-NEXT: vmov.f32 s24, s8
+; LE-I32-NEXT: vmov.f32 s19, s7
+; LE-I32-NEXT: vmov.f32 s30, s6
+; LE-I32-NEXT: vmov.f32 s21, s5
+; LE-I32-NEXT: vmov.f32 s16, s4
+; LE-I32-NEXT: vmov.f32 s23, s3
+; LE-I32-NEXT: vmov.f32 s28, s2
+; LE-I32-NEXT: vmov.f32 s25, s1
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: mov r8, r0
+; LE-I32-NEXT: vmov r0, s17
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r9, r0
+; LE-I32-NEXT: vmov r0, s22
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r10, r0
+; LE-I32-NEXT: vmov r0, s21
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r7, r0
+; LE-I32-NEXT: vmov r0, s19
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r4, r0
+; LE-I32-NEXT: vmov r0, s25
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r5, r0
+; LE-I32-NEXT: vmov r0, s23
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r6, r0
+; LE-I32-NEXT: vmov r0, s20
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d10[0], r0
+; LE-I32-NEXT: vmov r0, s26
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d11[0], r0
+; LE-I32-NEXT: vmov r0, s24
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d12[0], r0
+; LE-I32-NEXT: vmov r0, s18
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d13[0], r0
+; LE-I32-NEXT: vmov r0, s16
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d8[0], r0
+; LE-I32-NEXT: vmov r0, s30
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d9[0], r0
+; LE-I32-NEXT: vmov r0, s28
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vldr s0, [sp] @ 4-byte Reload
+; LE-I32-NEXT: vmov.32 d15[0], r0
+; LE-I32-NEXT: vmov r0, s0
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r6
+; LE-I32-NEXT: vmov.32 d14[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r5
+; LE-I32-NEXT: vmov.32 d15[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r4
+; LE-I32-NEXT: vmov.32 d14[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r7
+; LE-I32-NEXT: vmov.32 d9[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r10
+; LE-I32-NEXT: vmov.32 d8[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r9
+; LE-I32-NEXT: vmov.32 d13[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vldr s0, [sp, #4] @ 4-byte Reload
+; LE-I32-NEXT: vmov.32 d12[1], r0
+; LE-I32-NEXT: vmov r0, s0
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: vmov.32 d11[1], r8
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d10[1], r0
+; LE-I32-NEXT: vorr q0, q7, q7
+; LE-I32-NEXT: vorr q1, q4, q4
+; LE-I32-NEXT: vorr q2, q6, q6
+; LE-I32-NEXT: vorr q3, q5, q5
+; LE-I32-NEXT: add sp, sp, #8
+; LE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; LE-I64-LABEL: lrint_v16f16:
+; LE-I64: @ %bb.0:
+; LE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT: .pad #4
+; LE-I64-NEXT: sub sp, sp, #4
+; LE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: .pad #120
+; LE-I64-NEXT: sub sp, sp, #120
+; LE-I64-NEXT: mov r11, r0
+; LE-I64-NEXT: vmov r0, s7
+; LE-I64-NEXT: vstr s15, [sp, #24] @ 4-byte Spill
+; LE-I64-NEXT: vmov.f32 s23, s13
+; LE-I64-NEXT: vstr s14, [sp, #100] @ 4-byte Spill
+; LE-I64-NEXT: vmov.f32 s25, s12
+; LE-I64-NEXT: vmov.f32 s27, s11
+; LE-I64-NEXT: vstr s10, [sp, #104] @ 4-byte Spill
+; LE-I64-NEXT: vstr s9, [sp, #108] @ 4-byte Spill
+; LE-I64-NEXT: vmov.f32 s24, s8
+; LE-I64-NEXT: vmov.f32 s19, s6
+; LE-I64-NEXT: vmov.f32 s29, s5
+; LE-I64-NEXT: vmov.f32 s17, s4
+; LE-I64-NEXT: vmov.f32 s16, s3
+; LE-I64-NEXT: vmov.f32 s21, s2
+; LE-I64-NEXT: vmov.f32 s26, s1
+; LE-I64-NEXT: vmov.f32 s18, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: mov r7, r0
+; LE-I64-NEXT: vmov r0, s25
+; LE-I64-NEXT: str r1, [sp, #56] @ 4-byte Spill
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: mov r5, r0
+; LE-I64-NEXT: vmov r0, s27
+; LE-I64-NEXT: str r1, [sp, #116] @ 4-byte Spill
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: mov r6, r0
+; LE-I64-NEXT: vmov r0, s29
+; LE-I64-NEXT: str r1, [sp, #112] @ 4-byte Spill
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEXT: vmov r0, s23
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: add lr, sp, #80
+; LE-I64-NEXT: vmov.32 d17[0], r6
+; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: mov r6, r0
+; LE-I64-NEXT: vmov r0, s17
+; LE-I64-NEXT: vmov r8, s21
+; LE-I64-NEXT: str r1, [sp, #76] @ 4-byte Spill
+; LE-I64-NEXT: vmov r10, s19
+; LE-I64-NEXT: vmov.32 d10[0], r5
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: add lr, sp, #40
+; LE-I64-NEXT: vmov.32 d11[0], r6
+; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEXT: mov r0, r10
+; LE-I64-NEXT: mov r9, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: vmov.32 d11[0], r7
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEXT: mov r0, r8
+; LE-I64-NEXT: mov r7, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: mov r6, r0
+; LE-I64-NEXT: ldr r0, [sp, #56] @ 4-byte Reload
+; LE-I64-NEXT: vmov.32 d11[1], r0
+; LE-I64-NEXT: vmov r0, s18
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: mov r5, r0
+; LE-I64-NEXT: vmov r0, s16
+; LE-I64-NEXT: vmov.32 d10[1], r7
+; LE-I64-NEXT: add lr, sp, #56
+; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: vmov.32 d15[1], r4
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEXT: vmov r0, s26
+; LE-I64-NEXT: add lr, sp, #24
+; LE-I64-NEXT: vmov r8, s24
+; LE-I64-NEXT: vmov.32 d14[1], r9
+; LE-I64-NEXT: mov r10, r1
+; LE-I64-NEXT: vmov s24, r5
+; LE-I64-NEXT: vldr s0, [sp, #24] @ 4-byte Reload
+; LE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEXT: vmov r7, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov.f32 s0, s24
+; LE-I64-NEXT: vmov s22, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s22
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEXT: vmov s24, r6
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEXT: mov r0, r7
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov.f32 s0, s24
+; LE-I64-NEXT: vmov s22, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s22
+; LE-I64-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEXT: add lr, sp, #8
+; LE-I64-NEXT: mov r9, r1
+; LE-I64-NEXT: vmov.32 d15[1], r6
+; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEXT: mov r0, r8
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vldr s0, [sp, #100] @ 4-byte Reload
+; LE-I64-NEXT: mov r7, r0
+; LE-I64-NEXT: vmov.32 d14[1], r5
+; LE-I64-NEXT: vmov r0, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vldr s0, [sp, #104] @ 4-byte Reload
+; LE-I64-NEXT: vmov s20, r0
+; LE-I64-NEXT: vmov.32 d13[1], r6
+; LE-I64-NEXT: vmov r4, s0
+; LE-I64-NEXT: vldr s0, [sp, #108] @ 4-byte Reload
+; LE-I64-NEXT: vmov r0, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov.f32 s0, s20
+; LE-I64-NEXT: vmov s16, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s16
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEXT: vmov s18, r7
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEXT: mov r0, r4
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov.f32 s0, s18
+; LE-I64-NEXT: vmov s16, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s16
+; LE-I64-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: vmov.32 d11[1], r6
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: add lr, sp, #80
+; LE-I64-NEXT: vmov.32 d10[1], r4
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #40
+; LE-I64-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #8
+; LE-I64-NEXT: vmov.32 d16[0], r0
+; LE-I64-NEXT: ldr r0, [sp, #76] @ 4-byte Reload
+; LE-I64-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #24
+; LE-I64-NEXT: vmov.32 d19[1], r0
+; LE-I64-NEXT: ldr r0, [sp, #116] @ 4-byte Reload
+; LE-I64-NEXT: vmov.32 d21[1], r10
+; LE-I64-NEXT: vmov.32 d18[1], r0
+; LE-I64-NEXT: ldr r0, [sp, #112] @ 4-byte Reload
+; LE-I64-NEXT: vmov.32 d12[1], r5
+; LE-I64-NEXT: vmov.32 d17[1], r0
+; LE-I64-NEXT: add r0, r11, #64
+; LE-I64-NEXT: vmov.32 d16[1], r1
+; LE-I64-NEXT: vst1.64 {d10, d11}, [r0:128]!
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; LE-I64-NEXT: vst1.64 {d18, d19}, [r0:128]!
+; LE-I64-NEXT: vmov.32 d20[1], r9
+; LE-I64-NEXT: vst1.64 {d12, d13}, [r0:128]
+; LE-I64-NEXT: vst1.64 {d14, d15}, [r11:128]!
+; LE-I64-NEXT: vst1.64 {d20, d21}, [r11:128]!
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #56
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r11:128]!
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r11:128]
+; LE-I64-NEXT: add sp, sp, #120
+; LE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: add sp, sp, #4
+; LE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; LE-I32-NEON-LABEL: lrint_v16f16:
+; LE-I32-NEON: @ %bb.0:
+; LE-I32-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I32-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT: .pad #8
+; LE-I32-NEON-NEXT: sub sp, sp, #8
+; LE-I32-NEON-NEXT: vmov r0, s15
+; LE-I32-NEON-NEXT: vstr s13, [sp, #4] @ 4-byte Spill
+; LE-I32-NEON-NEXT: vmov.f32 s26, s14
+; LE-I32-NEON-NEXT: vstr s0, [sp] @ 4-byte Spill
+; LE-I32-NEON-NEXT: vmov.f32 s20, s12
+; LE-I32-NEON-NEXT: vmov.f32 s22, s11
+; LE-I32-NEON-NEXT: vmov.f32 s18, s10
+; LE-I32-NEON-NEXT: vmov.f32 s17, s9
+; LE-I32-NEON-NEXT: vmov.f32 s24, s8
+; LE-I32-NEON-NEXT: vmov.f32 s19, s7
+; LE-I32-NEON-NEXT: vmov.f32 s30, s6
+; LE-I32-NEON-NEXT: vmov.f32 s21, s5
+; LE-I32-NEON-NEXT: vmov.f32 s16, s4
+; LE-I32-NEON-NEXT: vmov.f32 s23, s3
+; LE-I32-NEON-NEXT: vmov.f32 s28, s2
+; LE-I32-NEON-NEXT: vmov.f32 s25, s1
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: vmov s0, r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: mov r8, r0
+; LE-I32-NEON-NEXT: vmov r0, s17
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: mov r9, r0
+; LE-I32-NEON-NEXT: vmov r0, s22
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: mov r10, r0
+; LE-I32-NEON-NEXT: vmov r0, s21
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: mov r7, r0
+; LE-I32-NEON-NEXT: vmov r0, s19
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: mov r4, r0
+; LE-I32-NEON-NEXT: vmov r0, s25
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: mov r5, r0
+; LE-I32-NEON-NEXT: vmov r0, s23
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: mov r6, r0
+; LE-I32-NEON-NEXT: vmov r0, s20
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: vmov s0, r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.32 d10[0], r0
+; LE-I32-NEON-NEXT: vmov r0, s26
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: vmov s0, r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.32 d11[0], r0
+; LE-I32-NEON-NEXT: vmov r0, s24
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: vmov s0, r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.32 d12[0], r0
+; LE-I32-NEON-NEXT: vmov r0, s18
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: vmov s0, r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.32 d13[0], r0
+; LE-I32-NEON-NEXT: vmov r0, s16
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: vmov s0, r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.32 d8[0], r0
+; LE-I32-NEON-NEXT: vmov r0, s30
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: vmov s0, r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.32 d9[0], r0
+; LE-I32-NEON-NEXT: vmov r0, s28
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: vmov s0, r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vldr s0, [sp] @ 4-byte Reload
+; LE-I32-NEON-NEXT: vmov.32 d15[0], r0
+; LE-I32-NEON-NEXT: vmov r0, s0
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: vmov s0, r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov s0, r6
+; LE-I32-NEON-NEXT: vmov.32 d14[0], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov s0, r5
+; LE-I32-NEON-NEXT: vmov.32 d15[1], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov s0, r4
+; LE-I32-NEON-NEXT: vmov.32 d14[1], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov s0, r7
+; LE-I32-NEON-NEXT: vmov.32 d9[1], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov s0, r10
+; LE-I32-NEON-NEXT: vmov.32 d8[1], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov s0, r9
+; LE-I32-NEON-NEXT: vmov.32 d13[1], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vldr s0, [sp, #4] @ 4-byte Reload
+; LE-I32-NEON-NEXT: vmov.32 d12[1], r0
+; LE-I32-NEON-NEXT: vmov r0, s0
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: vmov s0, r0
+; LE-I32-NEON-NEXT: vmov.32 d11[1], r8
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.32 d10[1], r0
+; LE-I32-NEON-NEXT: vorr q0, q7, q7
+; LE-I32-NEON-NEXT: vorr q1, q4, q4
+; LE-I32-NEON-NEXT: vorr q2, q6, q6
+; LE-I32-NEON-NEXT: vorr q3, q5, q5
+; LE-I32-NEON-NEXT: add sp, sp, #8
+; LE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; LE-I64-NEON-LABEL: lrint_v16f16:
+; LE-I64-NEON: @ %bb.0:
+; LE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEON-NEXT: .pad #4
+; LE-I64-NEON-NEXT: sub sp, sp, #4
+; LE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT: .pad #120
+; LE-I64-NEON-NEXT: sub sp, sp, #120
+; LE-I64-NEON-NEXT: mov r11, r0
+; LE-I64-NEON-NEXT: vmov r0, s7
+; LE-I64-NEON-NEXT: vstr s15, [sp, #24] @ 4-byte Spill
+; LE-I64-NEON-NEXT: vmov.f32 s23, s13
+; LE-I64-NEON-NEXT: vstr s14, [sp, #100] @ 4-byte Spill
+; LE-I64-NEON-NEXT: vmov.f32 s25, s12
+; LE-I64-NEON-NEXT: vmov.f32 s27, s11
+; LE-I64-NEON-NEXT: vstr s10, [sp, #104] @ 4-byte Spill
+; LE-I64-NEON-NEXT: vstr s9, [sp, #108] @ 4-byte Spill
+; LE-I64-NEON-NEXT: vmov.f32 s24, s8
+; LE-I64-NEON-NEXT: vmov.f32 s19, s6
+; LE-I64-NEON-NEXT: vmov.f32 s29, s5
+; LE-I64-NEON-NEXT: vmov.f32 s17, s4
+; LE-I64-NEON-NEXT: vmov.f32 s16, s3
+; LE-I64-NEON-NEXT: vmov.f32 s21, s2
+; LE-I64-NEON-NEXT: vmov.f32 s26, s1
+; LE-I64-NEON-NEXT: vmov.f32 s18, s0
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: vmov s0, r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: mov r7, r0
+; LE-I64-NEON-NEXT: vmov r0, s25
+; LE-I64-NEON-NEXT: str r1, [sp, #56] @ 4-byte Spill
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: vmov s0, r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: mov r5, r0
+; LE-I64-NEON-NEXT: vmov r0, s27
+; LE-I64-NEON-NEXT: str r1, [sp, #116] @ 4-byte Spill
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: vmov s0, r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: mov r6, r0
+; LE-I64-NEON-NEXT: vmov r0, s29
+; LE-I64-NEON-NEXT: str r1, [sp, #112] @ 4-byte Spill
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: vmov s0, r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEON-NEXT: vmov r0, s23
+; LE-I64-NEON-NEXT: mov r4, r1
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: vmov s0, r0
+; LE-I64-NEON-NEXT: add lr, sp, #80
+; LE-I64-NEON-NEXT: vmov.32 d17[0], r6
+; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: mov r6, r0
+; LE-I64-NEON-NEXT: vmov r0, s17
+; LE-I64-NEON-NEXT: vmov r8, s21
+; LE-I64-NEON-NEXT: str r1, [sp, #76] @ 4-byte Spill
+; LE-I64-NEON-NEXT: vmov r10, s19
+; LE-I64-NEON-NEXT: vmov.32 d10[0], r5
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: vmov s0, r0
+; LE-I64-NEON-NEXT: add lr, sp, #40
+; LE-I64-NEON-NEXT: vmov.32 d11[0], r6
+; LE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEON-NEXT: mov r0, r10
+; LE-I64-NEON-NEXT: mov r9, r1
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: vmov s0, r0
+; LE-I64-NEON-NEXT: vmov.32 d11[0], r7
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT: mov r0, r8
+; LE-I64-NEON-NEXT: mov r7, r1
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: mov r6, r0
+; LE-I64-NEON-NEXT: ldr r0, [sp, #56] @ 4-byte Reload
+; LE-I64-NEON-NEXT: vmov.32 d11[1], r0
+; LE-I64-NEON-NEXT: vmov r0, s18
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: mov r5, r0
+; LE-I64-NEON-NEXT: vmov r0, s16
+; LE-I64-NEON-NEXT: vmov.32 d10[1], r7
+; LE-I64-NEON-NEXT: add lr, sp, #56
+; LE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: vmov s0, r0
+; LE-I64-NEON-NEXT: vmov.32 d15[1], r4
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEON-NEXT: vmov r0, s26
+; LE-I64-NEON-NEXT: add lr, sp, #24
+; LE-I64-NEON-NEXT: vmov r8, s24
+; LE-I64-NEON-NEXT: vmov.32 d14[1], r9
+; LE-I64-NEON-NEXT: mov r10, r1
+; LE-I64-NEON-NEXT: vmov s24, r5
+; LE-I64-NEON-NEXT: vldr s0, [sp, #24] @ 4-byte Reload
+; LE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEON-NEXT: vmov r7, s0
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: vmov.f32 s0, s24
+; LE-I64-NEON-NEXT: vmov s22, r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.f32 s0, s22
+; LE-I64-NEON-NEXT: mov r5, r1
+; LE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEON-NEXT: vmov s24, r6
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEON-NEXT: mov r0, r7
+; LE-I64-NEON-NEXT: mov r6, r1
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: vmov.f32 s0, s24
+; LE-I64-NEON-NEXT: vmov s22, r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.f32 s0, s22
+; LE-I64-NEON-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT: add lr, sp, #8
+; LE-I64-NEON-NEXT: mov r9, r1
+; LE-I64-NEON-NEXT: vmov.32 d15[1], r6
+; LE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT: mov r0, r8
+; LE-I64-NEON-NEXT: mov r6, r1
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: vldr s0, [sp, #100] @ 4-byte Reload
+; LE-I64-NEON-NEXT: mov r7, r0
+; LE-I64-NEON-NEXT: vmov.32 d14[1], r5
+; LE-I64-NEON-NEXT: vmov r0, s0
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: vldr s0, [sp, #104] @ 4-byte Reload
+; LE-I64-NEON-NEXT: vmov s20, r0
+; LE-I64-NEON-NEXT: vmov.32 d13[1], r6
+; LE-I64-NEON-NEXT: vmov r4, s0
+; LE-I64-NEON-NEXT: vldr s0, [sp, #108] @ 4-byte Reload
+; LE-I64-NEON-NEXT: vmov r0, s0
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: vmov.f32 s0, s20
+; LE-I64-NEON-NEXT: vmov s16, r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.f32 s0, s16
+; LE-I64-NEON-NEXT: mov r5, r1
+; LE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT: vmov s18, r7
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEON-NEXT: mov r0, r4
+; LE-I64-NEON-NEXT: mov r6, r1
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: vmov.f32 s0, s18
+; LE-I64-NEON-NEXT: vmov s16, r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.f32 s0, s16
+; LE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT: mov r4, r1
+; LE-I64-NEON-NEXT: vmov.32 d11[1], r6
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: add lr, sp, #80
+; LE-I64-NEON-NEXT: vmov.32 d10[1], r4
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #40
+; LE-I64-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #8
+; LE-I64-NEON-NEXT: vmov.32 d16[0], r0
+; LE-I64-NEON-NEXT: ldr r0, [sp, #76] @ 4-byte Reload
+; LE-I64-NEON-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #24
+; LE-I64-NEON-NEXT: vmov.32 d19[1], r0
+; LE-I64-NEON-NEXT: ldr r0, [sp, #116] @ 4-byte Reload
+; LE-I64-NEON-NEXT: vmov.32 d21[1], r10
+; LE-I64-NEON-NEXT: vmov.32 d18[1], r0
+; LE-I64-NEON-NEXT: ldr r0, [sp, #112] @ 4-byte Reload
+; LE-I64-NEON-NEXT: vmov.32 d12[1], r5
+; LE-I64-NEON-NEXT: vmov.32 d17[1], r0
+; LE-I64-NEON-NEXT: add r0, r11, #64
+; LE-I64-NEON-NEXT: vmov.32 d16[1], r1
+; LE-I64-NEON-NEXT: vst1.64 {d10, d11}, [r0:128]!
+; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; LE-I64-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]!
+; LE-I64-NEON-NEXT: vmov.32 d20[1], r9
+; LE-I64-NEON-NEXT: vst1.64 {d12, d13}, [r0:128]
+; LE-I64-NEON-NEXT: vst1.64 {d14, d15}, [r11:128]!
+; LE-I64-NEON-NEXT: vst1.64 {d20, d21}, [r11:128]!
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #56
+; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r11:128]!
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r11:128]
+; LE-I64-NEON-NEXT: add sp, sp, #120
+; LE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT: add sp, sp, #4
+; LE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I32-LABEL: lrint_v16f16:
+; BE-I32: @ %bb.0:
+; BE-I32-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-I32-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT: .pad #16
+; BE-I32-NEXT: sub sp, sp, #16
+; BE-I32-NEXT: vmov r0, s1
+; BE-I32-NEXT: vstr s14, [sp, #4] @ 4-byte Spill
+; BE-I32-NEXT: vmov.f32 s30, s15
+; BE-I32-NEXT: vstr s13, [sp, #12] @ 4-byte Spill
+; BE-I32-NEXT: vmov.f32 s17, s12
+; BE-I32-NEXT: vstr s10, [sp, #8] @ 4-byte Spill
+; BE-I32-NEXT: vmov.f32 s19, s11
+; BE-I32-NEXT: vstr s8, [sp] @ 4-byte Spill
+; BE-I32-NEXT: vmov.f32 s21, s9
+; BE-I32-NEXT: vmov.f32 s23, s7
+; BE-I32-NEXT: vmov.f32 s24, s6
+; BE-I32-NEXT: vmov.f32 s25, s5
+; BE-I32-NEXT: vmov.f32 s26, s4
+; BE-I32-NEXT: vmov.f32 s27, s3
+; BE-I32-NEXT: vmov.f32 s28, s2
+; BE-I32-NEXT: vmov.f32 s29, s0
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: mov r8, r0
+; BE-I32-NEXT: vmov r0, s27
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r9, r0
+; BE-I32-NEXT: vmov r0, s25
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r10, r0
+; BE-I32-NEXT: vmov r0, s23
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r7, r0
+; BE-I32-NEXT: vmov r0, s21
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r4, r0
+; BE-I32-NEXT: vmov r0, s19
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r5, r0
+; BE-I32-NEXT: vmov r0, s30
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r6, r0
+; BE-I32-NEXT: vmov r0, s17
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d8[0], r0
+; BE-I32-NEXT: vmov r0, s29
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d10[0], r0
+; BE-I32-NEXT: vmov r0, s28
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d11[0], r0
+; BE-I32-NEXT: vmov r0, s26
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d14[0], r0
+; BE-I32-NEXT: vmov r0, s24
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vldr s0, [sp] @ 4-byte Reload
+; BE-I32-NEXT: vmov.32 d15[0], r0
+; BE-I32-NEXT: vmov r0, s0
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vldr s0, [sp, #4] @ 4-byte Reload
+; BE-I32-NEXT: vmov.32 d12[0], r0
+; BE-I32-NEXT: vmov r0, s0
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vldr s0, [sp, #8] @ 4-byte Reload
+; BE-I32-NEXT: vmov.32 d9[0], r0
+; BE-I32-NEXT: vmov r0, s0
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r6
+; BE-I32-NEXT: vmov.32 d13[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r5
+; BE-I32-NEXT: vmov.32 d9[1], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r4
+; BE-I32-NEXT: vmov.32 d13[1], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r7
+; BE-I32-NEXT: vmov.32 d12[1], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r10
+; BE-I32-NEXT: vmov.32 d15[1], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r9
+; BE-I32-NEXT: vmov.32 d14[1], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vldr s0, [sp, #12] @ 4-byte Reload
+; BE-I32-NEXT: vmov.32 d11[1], r0
+; BE-I32-NEXT: vmov r0, s0
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: vmov.32 d10[1], r8
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d8[1], r0
+; BE-I32-NEXT: vrev64.32 q0, q5
+; BE-I32-NEXT: vrev64.32 q1, q7
+; BE-I32-NEXT: vrev64.32 q2, q6
+; BE-I32-NEXT: vrev64.32 q3, q4
+; BE-I32-NEXT: add sp, sp, #16
+; BE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; BE-I64-LABEL: lrint_v16f16:
+; BE-I64: @ %bb.0:
+; BE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT: .pad #4
+; BE-I64-NEXT: sub sp, sp, #4
+; BE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT: .pad #112
+; BE-I64-NEXT: sub sp, sp, #112
+; BE-I64-NEXT: mov r11, r0
+; BE-I64-NEXT: vmov r0, s14
+; BE-I64-NEXT: vmov.f32 s17, s15
+; BE-I64-NEXT: vstr s13, [sp, #52] @ 4-byte Spill
+; BE-I64-NEXT: vmov.f32 s21, s12
+; BE-I64-NEXT: vstr s10, [sp, #68] @ 4-byte Spill
+; BE-I64-NEXT: vmov.f32 s23, s11
+; BE-I64-NEXT: vstr s7, [sp, #72] @ 4-byte Spill
+; BE-I64-NEXT: vmov.f32 s19, s9
+; BE-I64-NEXT: vstr s4, [sp, #28] @ 4-byte Spill
+; BE-I64-NEXT: vmov.f32 s26, s8
+; BE-I64-NEXT: vmov.f32 s24, s6
+; BE-I64-NEXT: vmov.f32 s18, s5
+; BE-I64-NEXT: vmov.f32 s25, s3
+; BE-I64-NEXT: vmov.f32 s16, s2
+; BE-I64-NEXT: vmov.f32 s27, s1
+; BE-I64-NEXT: vmov.f32 s29, s0
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: mov r8, r0
+; BE-I64-NEXT: vmov r0, s29
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r9, r0
+; BE-I64-NEXT: vmov r0, s27
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r7, r0
+; BE-I64-NEXT: vmov r0, s21
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r6, r0
+; BE-I64-NEXT: vmov r0, s25
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r5, r0
+; BE-I64-NEXT: vmov r0, s23
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: vmov s0, r5
+; BE-I64-NEXT: str r1, [sp, #108] @ 4-byte Spill
+; BE-I64-NEXT: vstr d16, [sp, #96] @ 8-byte Spill
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: vmov s0, r6
+; BE-I64-NEXT: str r1, [sp, #92] @ 4-byte Spill
+; BE-I64-NEXT: vstr d16, [sp, #80] @ 8-byte Spill
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: vmov s0, r7
+; BE-I64-NEXT: str r1, [sp, #76] @ 4-byte Spill
+; BE-I64-NEXT: vstr d16, [sp, #56] @ 8-byte Spill
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov s0, r9
+; BE-I64-NEXT: mov r10, r1
+; BE-I64-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEXT: vmov r0, s17
+; BE-I64-NEXT: mov r5, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: vmov.32 d10[0], r8
+; BE-I64-NEXT: vmov r6, s19
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEXT: mov r0, r6
+; BE-I64-NEXT: mov r7, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r6, r0
+; BE-I64-NEXT: vmov r0, s18
+; BE-I64-NEXT: vmov.32 d10[1], r4
+; BE-I64-NEXT: vstr d10, [sp, #40] @ 8-byte Spill
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r4, r0
+; BE-I64-NEXT: vmov r0, s16
+; BE-I64-NEXT: vmov.32 d11[1], r7
+; BE-I64-NEXT: vstr d11, [sp, #32] @ 8-byte Spill
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov.32 d15[1], r5
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: vstr d15, [sp, #16] @ 8-byte Spill
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vldr s0, [sp, #28] @ 4-byte Reload
+; BE-I64-NEXT: vmov r5, s26
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: vmov s26, r4
+; BE-I64-NEXT: vmov r0, s0
+; BE-I64-NEXT: mov r8, r1
+; BE-I64-NEXT: vmov.32 d14[1], r10
+; BE-I64-NEXT: vmov r4, s24
+; BE-I64-NEXT: vstr d16, [sp] @ 8-byte Spill
+; BE-I64-NEXT: vstr d14, [sp, #8] @ 8-byte Spill
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov.f32 s0, s26
+; BE-I64-NEXT: vmov s22, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s22
+; BE-I64-NEXT: mov r7, r1
+; BE-I64-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEXT: vmov s24, r6
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEXT: mov r0, r4
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov.f32 s0, s24
+; BE-I64-NEXT: vmov s22, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s22
+; BE-I64-NEXT: mov r9, r1
+; BE-I64-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEXT: vmov.32 d14[1], r6
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEXT: mov r0, r5
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vldr s0, [sp, #52] @ 4-byte Reload
+; BE-I64-NEXT: mov r4, r0
+; BE-I64-NEXT: vmov.32 d13[1], r7
+; BE-I64-NEXT: vmov r0, s0
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vldr s0, [sp, #68] @ 4-byte Reload
+; BE-I64-NEXT: vmov s20, r0
+; BE-I64-NEXT: vmov.32 d11[1], r6
+; BE-I64-NEXT: vmov r7, s0
+; BE-I64-NEXT: vldr s0, [sp, #72] @ 4-byte Reload
+; BE-I64-NEXT: vmov r0, s0
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov.f32 s0, s20
+; BE-I64-NEXT: vmov s16, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s16
+; BE-I64-NEXT: mov r5, r1
+; BE-I64-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEXT: vmov s18, r4
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEXT: mov r0, r7
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov.f32 s0, s18
+; BE-I64-NEXT: vmov s16, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s16
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: vmov.32 d9[0], r0
+; BE-I64-NEXT: vmov.32 d15[1], r4
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d24[0], r0
+; BE-I64-NEXT: ldr r0, [sp, #76] @ 4-byte Reload
+; BE-I64-NEXT: vldr d23, [sp, #56] @ 8-byte Reload
+; BE-I64-NEXT: vldr d20, [sp, #8] @ 8-byte Reload
+; BE-I64-NEXT: vmov.32 d23[1], r0
+; BE-I64-NEXT: ldr r0, [sp, #92] @ 4-byte Reload
+; BE-I64-NEXT: vldr d22, [sp, #80] @ 8-byte Reload
+; BE-I64-NEXT: vldr d26, [sp, #16] @ 8-byte Reload
+; BE-I64-NEXT: vrev64.32 d21, d20
+; BE-I64-NEXT: vmov.32 d22[1], r0
+; BE-I64-NEXT: ldr r0, [sp, #108] @ 4-byte Reload
+; BE-I64-NEXT: vldr d30, [sp] @ 8-byte Reload
+; BE-I64-NEXT: vldr d25, [sp, #96] @ 8-byte Reload
+; BE-I64-NEXT: vrev64.32 d20, d26
+; BE-I64-NEXT: vldr d26, [sp, #32] @ 8-byte Reload
+; BE-I64-NEXT: vmov.32 d10[1], r5
+; BE-I64-NEXT: vmov.32 d12[1], r9
+; BE-I64-NEXT: vldr d28, [sp, #40] @ 8-byte Reload
+; BE-I64-NEXT: vrev64.32 d27, d26
+; BE-I64-NEXT: vmov.32 d25[1], r0
+; BE-I64-NEXT: add r0, r11, #64
+; BE-I64-NEXT: vmov.32 d30[1], r8
+; BE-I64-NEXT: vmov.32 d9[1], r6
+; BE-I64-NEXT: vrev64.32 d26, d28
+; BE-I64-NEXT: vrev64.32 d29, d10
+; BE-I64-NEXT: vmov.32 d24[1], r1
+; BE-I64-NEXT: vrev64.32 d1, d12
+; BE-I64-NEXT: vrev64.32 d28, d23
+; BE-I64-NEXT: vrev64.32 d23, d22
+; BE-I64-NEXT: vrev64.32 d22, d30
+; BE-I64-NEXT: vrev64.32 d31, d25
+; BE-I64-NEXT: vrev64.32 d0, d9
+; BE-I64-NEXT: vrev64.32 d30, d24
+; BE-I64-NEXT: vst1.64 {d0, d1}, [r0:128]!
+; BE-I64-NEXT: vst1.64 {d30, d31}, [r0:128]!
+; BE-I64-NEXT: vst1.64 {d28, d29}, [r0:128]!
+; BE-I64-NEXT: vrev64.32 d19, d13
+; BE-I64-NEXT: vst1.64 {d26, d27}, [r0:128]
+; BE-I64-NEXT: vst1.64 {d20, d21}, [r11:128]!
+; BE-I64-NEXT: vrev64.32 d18, d14
+; BE-I64-NEXT: vst1.64 {d22, d23}, [r11:128]!
+; BE-I64-NEXT: vrev64.32 d17, d15
+; BE-I64-NEXT: vrev64.32 d16, d11
+; BE-I64-NEXT: vst1.64 {d18, d19}, [r11:128]!
+; BE-I64-NEXT: vst1.64 {d16, d17}, [r11:128]
+; BE-I64-NEXT: add sp, sp, #112
+; BE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT: add sp, sp, #4
+; BE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I32-NEON-LABEL: lrint_v16f16:
+; BE-I32-NEON: @ %bb.0:
+; BE-I32-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-I32-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT: .pad #16
+; BE-I32-NEON-NEXT: sub sp, sp, #16
+; BE-I32-NEON-NEXT: vmov r0, s1
+; BE-I32-NEON-NEXT: vstr s14, [sp, #4] @ 4-byte Spill
+; BE-I32-NEON-NEXT: vmov.f32 s30, s15
+; BE-I32-NEON-NEXT: vstr s13, [sp, #12] @ 4-byte Spill
+; BE-I32-NEON-NEXT: vmov.f32 s17, s12
+; BE-I32-NEON-NEXT: vstr s10, [sp, #8] @ 4-byte Spill
+; BE-I32-NEON-NEXT: vmov.f32 s19, s11
+; BE-I32-NEON-NEXT: vstr s8, [sp] @ 4-byte Spill
+; BE-I32-NEON-NEXT: vmov.f32 s21, s9
+; BE-I32-NEON-NEXT: vmov.f32 s23, s7
+; BE-I32-NEON-NEXT: vmov.f32 s24, s6
+; BE-I32-NEON-NEXT: vmov.f32 s25, s5
+; BE-I32-NEON-NEXT: vmov.f32 s26, s4
+; BE-I32-NEON-NEXT: vmov.f32 s27, s3
+; BE-I32-NEON-NEXT: vmov.f32 s28, s2
+; BE-I32-NEON-NEXT: vmov.f32 s29, s0
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: vmov s0, r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: mov r8, r0
+; BE-I32-NEON-NEXT: vmov r0, s27
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: mov r9, r0
+; BE-I32-NEON-NEXT: vmov r0, s25
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: mov r10, r0
+; BE-I32-NEON-NEXT: vmov r0, s23
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: mov r7, r0
+; BE-I32-NEON-NEXT: vmov r0, s21
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: mov r4, r0
+; BE-I32-NEON-NEXT: vmov r0, s19
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: mov r5, r0
+; BE-I32-NEON-NEXT: vmov r0, s30
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: mov r6, r0
+; BE-I32-NEON-NEXT: vmov r0, s17
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: vmov s0, r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.32 d8[0], r0
+; BE-I32-NEON-NEXT: vmov r0, s29
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: vmov s0, r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I32-NEON-NEXT: vmov r0, s28
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: vmov s0, r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.32 d11[0], r0
+; BE-I32-NEON-NEXT: vmov r0, s26
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: vmov s0, r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.32 d14[0], r0
+; BE-I32-NEON-NEXT: vmov r0, s24
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: vmov s0, r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vldr s0, [sp] @ 4-byte Reload
+; BE-I32-NEON-NEXT: vmov.32 d15[0], r0
+; BE-I32-NEON-NEXT: vmov r0, s0
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: vmov s0, r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vldr s0, [sp, #4] @ 4-byte Reload
+; BE-I32-NEON-NEXT: vmov.32 d12[0], r0
+; BE-I32-NEON-NEXT: vmov r0, s0
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: vmov s0, r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vldr s0, [sp, #8] @ 4-byte Reload
+; BE-I32-NEON-NEXT: vmov.32 d9[0], r0
+; BE-I32-NEON-NEXT: vmov r0, s0
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: vmov s0, r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov s0, r6
+; BE-I32-NEON-NEXT: vmov.32 d13[0], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov s0, r5
+; BE-I32-NEON-NEXT: vmov.32 d9[1], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov s0, r4
+; BE-I32-NEON-NEXT: vmov.32 d13[1], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov s0, r7
+; BE-I32-NEON-NEXT: vmov.32 d12[1], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov s0, r10
+; BE-I32-NEON-NEXT: vmov.32 d15[1], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov s0, r9
+; BE-I32-NEON-NEXT: vmov.32 d14[1], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vldr s0, [sp, #12] @ 4-byte Reload
+; BE-I32-NEON-NEXT: vmov.32 d11[1], r0
+; BE-I32-NEON-NEXT: vmov r0, s0
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: vmov s0, r0
+; BE-I32-NEON-NEXT: vmov.32 d10[1], r8
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.32 d8[1], r0
+; BE-I32-NEON-NEXT: vrev64.32 q0, q5
+; BE-I32-NEON-NEXT: vrev64.32 q1, q7
+; BE-I32-NEON-NEXT: vrev64.32 q2, q6
+; BE-I32-NEON-NEXT: vrev64.32 q3, q4
+; BE-I32-NEON-NEXT: add sp, sp, #16
+; BE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; BE-I64-NEON-LABEL: lrint_v16f16:
+; BE-I64-NEON: @ %bb.0:
+; BE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEON-NEXT: .pad #4
+; BE-I64-NEON-NEXT: sub sp, sp, #4
+; BE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT: .pad #112
+; BE-I64-NEON-NEXT: sub sp, sp, #112
+; BE-I64-NEON-NEXT: mov r11, r0
+; BE-I64-NEON-NEXT: vmov r0, s14
+; BE-I64-NEON-NEXT: vmov.f32 s17, s15
+; BE-I64-NEON-NEXT: vstr s13, [sp, #52] @ 4-byte Spill
+; BE-I64-NEON-NEXT: vmov.f32 s21, s12
+; BE-I64-NEON-NEXT: vstr s10, [sp, #68] @ 4-byte Spill
+; BE-I64-NEON-NEXT: vmov.f32 s23, s11
+; BE-I64-NEON-NEXT: vstr s7, [sp, #72] @ 4-byte Spill
+; BE-I64-NEON-NEXT: vmov.f32 s19, s9
+; BE-I64-NEON-NEXT: vstr s4, [sp, #28] @ 4-byte Spill
+; BE-I64-NEON-NEXT: vmov.f32 s26, s8
+; BE-I64-NEON-NEXT: vmov.f32 s24, s6
+; BE-I64-NEON-NEXT: vmov.f32 s18, s5
+; BE-I64-NEON-NEXT: vmov.f32 s25, s3
+; BE-I64-NEON-NEXT: vmov.f32 s16, s2
+; BE-I64-NEON-NEXT: vmov.f32 s27, s1
+; BE-I64-NEON-NEXT: vmov.f32 s29, s0
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: vmov s0, r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: mov r8, r0
+; BE-I64-NEON-NEXT: vmov r0, s29
+; BE-I64-NEON-NEXT: mov r4, r1
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: mov r9, r0
+; BE-I64-NEON-NEXT: vmov r0, s27
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: mov r7, r0
+; BE-I64-NEON-NEXT: vmov r0, s21
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: mov r6, r0
+; BE-I64-NEON-NEXT: vmov r0, s25
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: mov r5, r0
+; BE-I64-NEON-NEXT: vmov r0, s23
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: vmov s0, r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEON-NEXT: vmov s0, r5
+; BE-I64-NEON-NEXT: str r1, [sp, #108] @ 4-byte Spill
+; BE-I64-NEON-NEXT: vstr d16, [sp, #96] @ 8-byte Spill
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEON-NEXT: vmov s0, r6
+; BE-I64-NEON-NEXT: str r1, [sp, #92] @ 4-byte Spill
+; BE-I64-NEON-NEXT: vstr d16, [sp, #80] @ 8-byte Spill
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEON-NEXT: vmov s0, r7
+; BE-I64-NEON-NEXT: str r1, [sp, #76] @ 4-byte Spill
+; BE-I64-NEON-NEXT: vstr d16, [sp, #56] @ 8-byte Spill
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov s0, r9
+; BE-I64-NEON-NEXT: mov r10, r1
+; BE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT: vmov r0, s17
+; BE-I64-NEON-NEXT: mov r5, r1
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: vmov s0, r0
+; BE-I64-NEON-NEXT: vmov.32 d10[0], r8
+; BE-I64-NEON-NEXT: vmov r6, s19
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT: mov r0, r6
+; BE-I64-NEON-NEXT: mov r7, r1
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: mov r6, r0
+; BE-I64-NEON-NEXT: vmov r0, s18
+; BE-I64-NEON-NEXT: vmov.32 d10[1], r4
+; BE-I64-NEON-NEXT: vstr d10, [sp, #40] @ 8-byte Spill
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: mov r4, r0
+; BE-I64-NEON-NEXT: vmov r0, s16
+; BE-I64-NEON-NEXT: vmov.32 d11[1], r7
+; BE-I64-NEON-NEXT: vstr d11, [sp, #32] @ 8-byte Spill
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: vmov.32 d15[1], r5
+; BE-I64-NEON-NEXT: vmov s0, r0
+; BE-I64-NEON-NEXT: vstr d15, [sp, #16] @ 8-byte Spill
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vldr s0, [sp, #28] @ 4-byte Reload
+; BE-I64-NEON-NEXT: vmov r5, s26
+; BE-I64-NEON-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEON-NEXT: vmov s26, r4
+; BE-I64-NEON-NEXT: vmov r0, s0
+; BE-I64-NEON-NEXT: mov r8, r1
+; BE-I64-NEON-NEXT: vmov.32 d14[1], r10
+; BE-I64-NEON-NEXT: vmov r4, s24
+; BE-I64-NEON-NEXT: vstr d16, [sp] @ 8-byte Spill
+; BE-I64-NEON-NEXT: vstr d14, [sp, #8] @ 8-byte Spill
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: vmov.f32 s0, s26
+; BE-I64-NEON-NEXT: vmov s22, r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.f32 s0, s22
+; BE-I64-NEON-NEXT: mov r7, r1
+; BE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEON-NEXT: vmov s24, r6
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT: mov r0, r4
+; BE-I64-NEON-NEXT: mov r6, r1
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: vmov.f32 s0, s24
+; BE-I64-NEON-NEXT: vmov s22, r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.f32 s0, s22
+; BE-I64-NEON-NEXT: mov r9, r1
+; BE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEON-NEXT: vmov.32 d14[1], r6
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT: mov r0, r5
+; BE-I64-NEON-NEXT: mov r6, r1
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: vldr s0, [sp, #52] @ 4-byte Reload
+; BE-I64-NEON-NEXT: mov r4, r0
+; BE-I64-NEON-NEXT: vmov.32 d13[1], r7
+; BE-I64-NEON-NEXT: vmov r0, s0
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: vldr s0, [sp, #68] @ 4-byte Reload
+; BE-I64-NEON-NEXT: vmov s20, r0
+; BE-I64-NEON-NEXT: vmov.32 d11[1], r6
+; BE-I64-NEON-NEXT: vmov r7, s0
+; BE-I64-NEON-NEXT: vldr s0, [sp, #72] @ 4-byte Reload
+; BE-I64-NEON-NEXT: vmov r0, s0
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: vmov.f32 s0, s20
+; BE-I64-NEON-NEXT: vmov s16, r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.f32 s0, s16
+; BE-I64-NEON-NEXT: mov r5, r1
+; BE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT: vmov s18, r4
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT: mov r0, r7
+; BE-I64-NEON-NEXT: mov r4, r1
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: vmov.f32 s0, s18
+; BE-I64-NEON-NEXT: vmov s16, r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.f32 s0, s16
+; BE-I64-NEON-NEXT: mov r6, r1
+; BE-I64-NEON-NEXT: vmov.32 d9[0], r0
+; BE-I64-NEON-NEXT: vmov.32 d15[1], r4
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.32 d24[0], r0
+; BE-I64-NEON-NEXT: ldr r0, [sp, #76] @ 4-byte Reload
+; BE-I64-NEON-NEXT: vldr d23, [sp, #56] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vldr d20, [sp, #8] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vmov.32 d23[1], r0
+; BE-I64-NEON-NEXT: ldr r0, [sp, #92] @ 4-byte Reload
+; BE-I64-NEON-NEXT: vldr d22, [sp, #80] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vldr d26, [sp, #16] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vrev64.32 d21, d20
+; BE-I64-NEON-NEXT: vmov.32 d22[1], r0
+; BE-I64-NEON-NEXT: ldr r0, [sp, #108] @ 4-byte Reload
+; BE-I64-NEON-NEXT: vldr d30, [sp] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vldr d25, [sp, #96] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vrev64.32 d20, d26
+; BE-I64-NEON-NEXT: vldr d26, [sp, #32] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vmov.32 d10[1], r5
+; BE-I64-NEON-NEXT: vmov.32 d12[1], r9
+; BE-I64-NEON-NEXT: vldr d28, [sp, #40] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vrev64.32 d27, d26
+; BE-I64-NEON-NEXT: vmov.32 d25[1], r0
+; BE-I64-NEON-NEXT: add r0, r11, #64
+; BE-I64-NEON-NEXT: vmov.32 d30[1], r8
+; BE-I64-NEON-NEXT: vmov.32 d9[1], r6
+; BE-I64-NEON-NEXT: vrev64.32 d26, d28
+; BE-I64-NEON-NEXT: vrev64.32 d29, d10
+; BE-I64-NEON-NEXT: vmov.32 d24[1], r1
+; BE-I64-NEON-NEXT: vrev64.32 d1, d12
+; BE-I64-NEON-NEXT: vrev64.32 d28, d23
+; BE-I64-NEON-NEXT: vrev64.32 d23, d22
+; BE-I64-NEON-NEXT: vrev64.32 d22, d30
+; BE-I64-NEON-NEXT: vrev64.32 d31, d25
+; BE-I64-NEON-NEXT: vrev64.32 d0, d9
+; BE-I64-NEON-NEXT: vrev64.32 d30, d24
+; BE-I64-NEON-NEXT: vst1.64 {d0, d1}, [r0:128]!
+; BE-I64-NEON-NEXT: vst1.64 {d30, d31}, [r0:128]!
+; BE-I64-NEON-NEXT: vst1.64 {d28, d29}, [r0:128]!
+; BE-I64-NEON-NEXT: vrev64.32 d19, d13
+; BE-I64-NEON-NEXT: vst1.64 {d26, d27}, [r0:128]
+; BE-I64-NEON-NEXT: vst1.64 {d20, d21}, [r11:128]!
+; BE-I64-NEON-NEXT: vrev64.32 d18, d14
+; BE-I64-NEON-NEXT: vst1.64 {d22, d23}, [r11:128]!
+; BE-I64-NEON-NEXT: vrev64.32 d17, d15
+; BE-I64-NEON-NEXT: vrev64.32 d16, d11
+; BE-I64-NEON-NEXT: vst1.64 {d18, d19}, [r11:128]!
+; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r11:128]
+; BE-I64-NEON-NEXT: add sp, sp, #112
+; BE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT: add sp, sp, #4
+; BE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+ %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half> %x)
+ ret <16 x iXLen> %a
+}
+declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half>)
+
+define <32 x iXLen> @lrint_v32f16(<32 x half> %x) {
+; LE-I32-LABEL: lrint_v32f16:
+; LE-I32: @ %bb.0:
+; LE-I32-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I32-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I32-NEXT: .pad #4
+; LE-I32-NEXT: sub sp, sp, #4
+; LE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT: .pad #112
+; LE-I32-NEXT: sub sp, sp, #112
+; LE-I32-NEXT: mov r4, r0
+; LE-I32-NEXT: vmov r0, s3
+; LE-I32-NEXT: vmov.f32 s19, s15
+; LE-I32-NEXT: vstr s14, [sp, #100] @ 4-byte Spill
+; LE-I32-NEXT: vmov.f32 s21, s13
+; LE-I32-NEXT: vstr s12, [sp, #104] @ 4-byte Spill
+; LE-I32-NEXT: vmov.f32 s23, s11
+; LE-I32-NEXT: vstr s10, [sp, #96] @ 4-byte Spill
+; LE-I32-NEXT: vmov.f32 s25, s9
+; LE-I32-NEXT: vstr s8, [sp, #92] @ 4-byte Spill
+; LE-I32-NEXT: vmov.f32 s27, s7
+; LE-I32-NEXT: vstr s6, [sp, #88] @ 4-byte Spill
+; LE-I32-NEXT: vmov.f32 s29, s5
+; LE-I32-NEXT: vstr s4, [sp, #84] @ 4-byte Spill
+; LE-I32-NEXT: vstr s2, [sp, #80] @ 4-byte Spill
+; LE-I32-NEXT: vstr s1, [sp, #108] @ 4-byte Spill
+; LE-I32-NEXT: vstr s0, [sp, #60] @ 4-byte Spill
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: mov r8, r0
+; LE-I32-NEXT: vmov r0, s29
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r9, r0
+; LE-I32-NEXT: vmov r0, s27
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r10, r0
+; LE-I32-NEXT: vmov r0, s25
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r11, r0
+; LE-I32-NEXT: vmov r0, s23
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r6, r0
+; LE-I32-NEXT: vmov r0, s21
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r7, r0
+; LE-I32-NEXT: vmov r0, s19
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r5, r0
+; LE-I32-NEXT: ldrh r0, [sp, #216]
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d8[0], r0
+; LE-I32-NEXT: ldrh r0, [sp, #224]
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d9[0], r0
+; LE-I32-NEXT: ldrh r0, [sp, #232]
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d10[0], r0
+; LE-I32-NEXT: ldrh r0, [sp, #240]
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d11[0], r0
+; LE-I32-NEXT: ldrh r0, [sp, #248]
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: add lr, sp, #256
+; LE-I32-NEXT: vmov.32 d12[0], r0
+; LE-I32-NEXT: ldrh r0, [lr]
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: add lr, sp, #256
+; LE-I32-NEXT: vmov.32 d13[0], r0
+; LE-I32-NEXT: ldrh r0, [lr, #8]
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: add lr, sp, #256
+; LE-I32-NEXT: vmov.32 d14[0], r0
+; LE-I32-NEXT: ldrh r0, [lr, #16]
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: add lr, sp, #256
+; LE-I32-NEXT: vmov.32 d15[0], r0
+; LE-I32-NEXT: ldrh r0, [lr, #20]
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: add lr, sp, #256
+; LE-I32-NEXT: vmov.32 d15[1], r0
+; LE-I32-NEXT: ldrh r0, [lr, #12]
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d14[1], r0
+; LE-I32-NEXT: add lr, sp, #64
+; LE-I32-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I32-NEXT: add lr, sp, #256
+; LE-I32-NEXT: ldrh r0, [lr, #4]
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d13[1], r0
+; LE-I32-NEXT: ldrh r0, [sp, #252]
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d12[1], r0
+; LE-I32-NEXT: ldrh r0, [sp, #244]
+; LE-I32-NEXT: add lr, sp, #40
+; LE-I32-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d11[1], r0
+; LE-I32-NEXT: ldrh r0, [sp, #236]
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d10[1], r0
+; LE-I32-NEXT: ldrh r0, [sp, #228]
+; LE-I32-NEXT: add lr, sp, #24
+; LE-I32-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d9[1], r0
+; LE-I32-NEXT: ldrh r0, [sp, #220]
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vldr s0, [sp, #60] @ 4-byte Reload
+; LE-I32-NEXT: add lr, sp, #8
+; LE-I32-NEXT: vmov.32 d8[1], r0
+; LE-I32-NEXT: vmov r0, s0
+; LE-I32-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vldr s0, [sp, #80] @ 4-byte Reload
+; LE-I32-NEXT: vmov.32 d10[0], r0
+; LE-I32-NEXT: vmov r0, s0
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vldr s0, [sp, #84] @ 4-byte Reload
+; LE-I32-NEXT: vmov.32 d11[0], r0
+; LE-I32-NEXT: vmov r0, s0
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vldr s0, [sp, #88] @ 4-byte Reload
+; LE-I32-NEXT: vmov.32 d12[0], r0
+; LE-I32-NEXT: vmov r0, s0
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vldr s0, [sp, #92] @ 4-byte Reload
+; LE-I32-NEXT: vmov.32 d13[0], r0
+; LE-I32-NEXT: vmov r0, s0
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vldr s0, [sp, #96] @ 4-byte Reload
+; LE-I32-NEXT: vmov.32 d14[0], r0
+; LE-I32-NEXT: vmov r0, s0
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vldr s0, [sp, #100] @ 4-byte Reload
+; LE-I32-NEXT: vmov.32 d15[0], r0
+; LE-I32-NEXT: vmov r0, s0
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vldr s0, [sp, #104] @ 4-byte Reload
+; LE-I32-NEXT: vmov.32 d9[0], r0
+; LE-I32-NEXT: vmov r0, s0
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r5
+; LE-I32-NEXT: vmov.32 d8[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r7
+; LE-I32-NEXT: vmov.32 d9[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r6
+; LE-I32-NEXT: vmov.32 d8[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r11
+; LE-I32-NEXT: vmov.32 d15[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r10
+; LE-I32-NEXT: vmov.32 d14[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r9
+; LE-I32-NEXT: vmov.32 d13[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vldr s0, [sp, #108] @ 4-byte Reload
+; LE-I32-NEXT: vmov.32 d12[1], r0
+; LE-I32-NEXT: vmov r0, s0
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: vmov.32 d11[1], r8
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: add lr, sp, #8
+; LE-I32-NEXT: vmov.32 d10[1], r0
+; LE-I32-NEXT: add r0, r4, #64
+; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT: add lr, sp, #24
+; LE-I32-NEXT: vst1.32 {d16, d17}, [r0:128]!
+; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT: add lr, sp, #40
+; LE-I32-NEXT: vst1.32 {d16, d17}, [r0:128]!
+; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT: add lr, sp, #64
+; LE-I32-NEXT: vst1.32 {d16, d17}, [r0:128]!
+; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT: vst1.64 {d16, d17}, [r0:128]
+; LE-I32-NEXT: vst1.32 {d10, d11}, [r4:128]!
+; LE-I32-NEXT: vst1.32 {d12, d13}, [r4:128]!
+; LE-I32-NEXT: vst1.32 {d14, d15}, [r4:128]!
+; LE-I32-NEXT: vst1.64 {d8, d9}, [r4:128]
+; LE-I32-NEXT: add sp, sp, #112
+; LE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT: add sp, sp, #4
+; LE-I32-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; LE-I64-LABEL: lrint_v32f16:
+; LE-I64: @ %bb.0:
+; LE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT: .pad #4
+; LE-I64-NEXT: sub sp, sp, #4
+; LE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: .pad #248
+; LE-I64-NEXT: sub sp, sp, #248
+; LE-I64-NEXT: add lr, sp, #256
+; LE-I64-NEXT: mov r11, r0
+; LE-I64-NEXT: vstr s15, [sp, #176] @ 4-byte Spill
+; LE-I64-NEXT: vmov.f32 s19, s14
+; LE-I64-NEXT: ldrh r0, [lr, #132]
+; LE-I64-NEXT: vmov.f32 s17, s11
+; LE-I64-NEXT: vstr s13, [sp, #196] @ 4-byte Spill
+; LE-I64-NEXT: vstr s12, [sp, #112] @ 4-byte Spill
+; LE-I64-NEXT: vstr s10, [sp, #136] @ 4-byte Spill
+; LE-I64-NEXT: vstr s9, [sp, #160] @ 4-byte Spill
+; LE-I64-NEXT: vstr s8, [sp, #200] @ 4-byte Spill
+; LE-I64-NEXT: vstr s7, [sp, #100] @ 4-byte Spill
+; LE-I64-NEXT: vstr s6, [sp, #116] @ 4-byte Spill
+; LE-I64-NEXT: vstr s5, [sp, #76] @ 4-byte Spill
+; LE-I64-NEXT: vstr s4, [sp, #120] @ 4-byte Spill
+; LE-I64-NEXT: vstr s3, [sp, #156] @ 4-byte Spill
+; LE-I64-NEXT: vstr s2, [sp, #192] @ 4-byte Spill
+; LE-I64-NEXT: vstr s1, [sp, #104] @ 4-byte Spill
+; LE-I64-NEXT: vstr s0, [sp, #108] @ 4-byte Spill
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: add lr, sp, #256
+; LE-I64-NEXT: str r0, [sp, #52] @ 4-byte Spill
+; LE-I64-NEXT: str r1, [sp, #56] @ 4-byte Spill
+; LE-I64-NEXT: ldrh r0, [lr, #108]
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: add lr, sp, #256
+; LE-I64-NEXT: str r0, [sp, #32] @ 4-byte Spill
+; LE-I64-NEXT: ldrh r0, [lr, #96]
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: add lr, sp, #256
+; LE-I64-NEXT: mov r5, r0
+; LE-I64-NEXT: ldrh r0, [lr, #100]
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: add lr, sp, #256
+; LE-I64-NEXT: mov r7, r0
+; LE-I64-NEXT: ldrh r0, [lr, #156]
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: add lr, sp, #256
+; LE-I64-NEXT: mov r6, r0
+; LE-I64-NEXT: ldrh r0, [lr, #152]
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: add lr, sp, #256
+; LE-I64-NEXT: mov r4, r0
+; LE-I64-NEXT: ldrh r0, [lr, #148]
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov s0, r4
+; LE-I64-NEXT: mov r8, r1
+; LE-I64-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov s0, r6
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov s0, r7
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov s0, r5
+; LE-I64-NEXT: mov r7, r1
+; LE-I64-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: add lr, sp, #256
+; LE-I64-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: ldrh r0, [lr, #144]
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: add lr, sp, #256
+; LE-I64-NEXT: mov r10, r0
+; LE-I64-NEXT: vmov.32 d11[1], r7
+; LE-I64-NEXT: ldrh r0, [lr, #104]
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov.32 d10[1], r5
+; LE-I64-NEXT: add lr, sp, #80
+; LE-I64-NEXT: mov r7, r0
+; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #256
+; LE-I64-NEXT: ldrh r0, [lr, #124]
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: add lr, sp, #256
+; LE-I64-NEXT: mov r5, r0
+; LE-I64-NEXT: vmov.32 d15[1], r6
+; LE-I64-NEXT: ldrh r0, [lr, #120]
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov.32 d14[1], r4
+; LE-I64-NEXT: add lr, sp, #16
+; LE-I64-NEXT: mov r6, r0
+; LE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #256
+; LE-I64-NEXT: ldrh r0, [lr, #116]
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: add lr, sp, #256
+; LE-I64-NEXT: vorr q5, q6, q6
+; LE-I64-NEXT: mov r4, r0
+; LE-I64-NEXT: ldrh r0, [lr, #112]
+; LE-I64-NEXT: vmov.32 d11[1], r8
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov s0, r4
+; LE-I64-NEXT: str r1, [sp, #12] @ 4-byte Spill
+; LE-I64-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov s0, r6
+; LE-I64-NEXT: add lr, sp, #216
+; LE-I64-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEXT: str r1, [sp, #8] @ 4-byte Spill
+; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov s0, r5
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov s0, r7
+; LE-I64-NEXT: add lr, sp, #232
+; LE-I64-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEXT: mov r8, r1
+; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov s0, r10
+; LE-I64-NEXT: mov r9, r1
+; LE-I64-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: add lr, sp, #256
+; LE-I64-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: ldrh r0, [lr, #140]
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: ldr r0, [sp, #32] @ 4-byte Reload
+; LE-I64-NEXT: vmov.32 d10[1], r5
+; LE-I64-NEXT: add lr, sp, #32
+; LE-I64-NEXT: vmov s16, r0
+; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: add lr, sp, #256
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEXT: ldrh r1, [lr, #128]
+; LE-I64-NEXT: mov r0, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov.f32 s0, s16
+; LE-I64-NEXT: vmov s18, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: add lr, sp, #256
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEXT: ldr r1, [sp, #52] @ 4-byte Reload
+; LE-I64-NEXT: ldrh r0, [lr, #136]
+; LE-I64-NEXT: vmov.32 d15[1], r6
+; LE-I64-NEXT: vmov.32 d11[0], r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov.f32 s0, s18
+; LE-I64-NEXT: vmov s16, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s16
+; LE-I64-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEXT: ldr r0, [sp, #56] @ 4-byte Reload
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: vmov.32 d11[1], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d13[1], r5
+; LE-I64-NEXT: add lr, sp, #56
+; LE-I64-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
+; LE-I64-NEXT: vmov.32 d12[1], r9
+; LE-I64-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEXT: vmov r0, s19
+; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #232
+; LE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I64-NEXT: vmov.32 d13[1], r8
+; LE-I64-NEXT: vmov.32 d12[1], r4
+; LE-I64-NEXT: vmov.32 d10[1], r6
+; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #216
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: vmov.32 d17[1], r2
+; LE-I64-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
+; LE-I64-NEXT: vmov.32 d14[1], r1
+; LE-I64-NEXT: add r1, r11, #192
+; LE-I64-NEXT: vmov.32 d16[1], r2
+; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #32
+; LE-I64-NEXT: vst1.64 {d10, d11}, [r1:128]!
+; LE-I64-NEXT: vst1.64 {d14, d15}, [r1:128]!
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #16
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r1:128]!
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r1:128]
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: str r0, [sp, #52] @ 4-byte Spill
+; LE-I64-NEXT: vmov r0, s17
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vldr s0, [sp, #76] @ 4-byte Reload
+; LE-I64-NEXT: mov r10, r0
+; LE-I64-NEXT: vmov r0, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vldr s0, [sp, #100] @ 4-byte Reload
+; LE-I64-NEXT: mov r4, r0
+; LE-I64-NEXT: vmov r0, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vldr s0, [sp, #104] @ 4-byte Reload
+; LE-I64-NEXT: mov r7, r0
+; LE-I64-NEXT: vmov r0, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vldr s0, [sp, #108] @ 4-byte Reload
+; LE-I64-NEXT: mov r5, r0
+; LE-I64-NEXT: vmov r0, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vldr s0, [sp, #112] @ 4-byte Reload
+; LE-I64-NEXT: mov r6, r0
+; LE-I64-NEXT: vmov r0, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov s0, r6
+; LE-I64-NEXT: str r1, [sp, #112] @ 4-byte Spill
+; LE-I64-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov s0, r5
+; LE-I64-NEXT: mov r9, r1
+; LE-I64-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov s0, r7
+; LE-I64-NEXT: mov r8, r1
+; LE-I64-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov s0, r4
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov s0, r10
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vldr s0, [sp, #116] @ 4-byte Reload
+; LE-I64-NEXT: mov r6, r0
+; LE-I64-NEXT: str r1, [sp, #108] @ 4-byte Spill
+; LE-I64-NEXT: vmov.32 d11[1], r5
+; LE-I64-NEXT: vmov r0, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: vmov.32 d13[1], r4
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vldr s0, [sp, #120] @ 4-byte Reload
+; LE-I64-NEXT: mov r4, r0
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: vmov.32 d9[1], r8
+; LE-I64-NEXT: vmov r0, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vldr s0, [sp, #136] @ 4-byte Reload
+; LE-I64-NEXT: vmov.32 d10[0], r4
+; LE-I64-NEXT: vmov r7, s0
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEXT: add lr, sp, #136
+; LE-I64-NEXT: add r10, r11, #128
+; LE-I64-NEXT: mov r0, r7
+; LE-I64-NEXT: vmov.32 d10[1], r5
+; LE-I64-NEXT: vmov.32 d12[1], r1
+; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #120
+; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEXT: add lr, sp, #80
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #56
+; LE-I64-NEXT: vmov.32 d13[0], r6
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r10:128]!
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r10:128]!
+; LE-I64-NEXT: vldr s0, [sp, #156] @ 4-byte Reload
+; LE-I64-NEXT: vmov r4, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vldr s0, [sp, #160] @ 4-byte Reload
+; LE-I64-NEXT: mov r5, r0
+; LE-I64-NEXT: ldr r0, [sp, #52] @ 4-byte Reload
+; LE-I64-NEXT: vmov.32 d8[1], r9
+; LE-I64-NEXT: vmov r7, s0
+; LE-I64-NEXT: add lr, sp, #160
+; LE-I64-NEXT: vldr s0, [sp, #176] @ 4-byte Reload
+; LE-I64-NEXT: vmov s20, r0
+; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEXT: vmov r0, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov.f32 s0, s20
+; LE-I64-NEXT: vmov s18, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s18
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEXT: vmov s16, r5
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEXT: mov r0, r7
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov.f32 s0, s16
+; LE-I64-NEXT: vmov s18, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s18
+; LE-I64-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEXT: add lr, sp, #176
+; LE-I64-NEXT: mov r8, r1
+; LE-I64-NEXT: vmov.32 d11[1], r5
+; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEXT: mov r0, r4
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vldr s0, [sp, #196] @ 4-byte Reload
+; LE-I64-NEXT: mov r7, r0
+; LE-I64-NEXT: vmov.32 d10[1], r6
+; LE-I64-NEXT: vmov r0, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vldr s0, [sp, #192] @ 4-byte Reload
+; LE-I64-NEXT: vmov s16, r0
+; LE-I64-NEXT: vmov.32 d13[1], r5
+; LE-I64-NEXT: vmov r6, s0
+; LE-I64-NEXT: vldr s0, [sp, #200] @ 4-byte Reload
+; LE-I64-NEXT: vmov r0, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov.f32 s0, s16
+; LE-I64-NEXT: vmov s18, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s18
+; LE-I64-NEXT: add lr, sp, #200
+; LE-I64-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: vmov s16, r7
+; LE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEXT: mov r0, r6
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov.f32 s0, s16
+; LE-I64-NEXT: vmov s18, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s18
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEXT: vmov.32 d12[1], r5
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: add lr, sp, #200
+; LE-I64-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEXT: ldr r0, [sp, #112] @ 4-byte Reload
+; LE-I64-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #176
+; LE-I64-NEXT: vmov.32 d19[1], r4
+; LE-I64-NEXT: vmov.32 d18[1], r0
+; LE-I64-NEXT: ldr r0, [sp, #108] @ 4-byte Reload
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #216
+; LE-I64-NEXT: vmov.32 d17[1], r0
+; LE-I64-NEXT: add r0, r11, #64
+; LE-I64-NEXT: vmov.32 d16[1], r8
+; LE-I64-NEXT: vorr q10, q8, q8
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #232
+; LE-I64-NEXT: vmov.32 d15[1], r6
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r10:128]!
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #160
+; LE-I64-NEXT: vmov.32 d14[1], r1
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r10:128]
+; LE-I64-NEXT: vst1.64 {d12, d13}, [r0:128]!
+; LE-I64-NEXT: vst1.64 {d20, d21}, [r0:128]!
+; LE-I64-NEXT: vst1.64 {d18, d19}, [r0:128]!
+; LE-I64-NEXT: vst1.64 {d10, d11}, [r0:128]
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #120
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r11:128]!
+; LE-I64-NEXT: vst1.64 {d14, d15}, [r11:128]!
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #136
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r11:128]!
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r11:128]
+; LE-I64-NEXT: add sp, sp, #248
+; LE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: add sp, sp, #4
+; LE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; LE-I32-NEON-LABEL: lrint_v32f16:
+; LE-I32-NEON: @ %bb.0:
+; LE-I32-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I32-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I32-NEON-NEXT: .pad #4
+; LE-I32-NEON-NEXT: sub sp, sp, #4
+; LE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT: .pad #112
+; LE-I32-NEON-NEXT: sub sp, sp, #112
+; LE-I32-NEON-NEXT: mov r4, r0
+; LE-I32-NEON-NEXT: vmov r0, s3
+; LE-I32-NEON-NEXT: vmov.f32 s19, s15
+; LE-I32-NEON-NEXT: vstr s14, [sp, #100] @ 4-byte Spill
+; LE-I32-NEON-NEXT: vmov.f32 s21, s13
+; LE-I32-NEON-NEXT: vstr s12, [sp, #104] @ 4-byte Spill
+; LE-I32-NEON-NEXT: vmov.f32 s23, s11
+; LE-I32-NEON-NEXT: vstr s10, [sp, #96] @ 4-byte Spill
+; LE-I32-NEON-NEXT: vmov.f32 s25, s9
+; LE-I32-NEON-NEXT: vstr s8, [sp, #92] @ 4-byte Spill
+; LE-I32-NEON-NEXT: vmov.f32 s27, s7
+; LE-I32-NEON-NEXT: vstr s6, [sp, #88] @ 4-byte Spill
+; LE-I32-NEON-NEXT: vmov.f32 s29, s5
+; LE-I32-NEON-NEXT: vstr s4, [sp, #84] @ 4-byte Spill
+; LE-I32-NEON-NEXT: vstr s2, [sp, #80] @ 4-byte Spill
+; LE-I32-NEON-NEXT: vstr s1, [sp, #108] @ 4-byte Spill
+; LE-I32-NEON-NEXT: vstr s0, [sp, #60] @ 4-byte Spill
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: vmov s0, r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: mov r8, r0
+; LE-I32-NEON-NEXT: vmov r0, s29
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: mov r9, r0
+; LE-I32-NEON-NEXT: vmov r0, s27
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: mov r10, r0
+; LE-I32-NEON-NEXT: vmov r0, s25
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: mov r11, r0
+; LE-I32-NEON-NEXT: vmov r0, s23
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: mov r6, r0
+; LE-I32-NEON-NEXT: vmov r0, s21
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: mov r7, r0
+; LE-I32-NEON-NEXT: vmov r0, s19
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: mov r5, r0
+; LE-I32-NEON-NEXT: ldrh r0, [sp, #216]
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: vmov s0, r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.32 d8[0], r0
+; LE-I32-NEON-NEXT: ldrh r0, [sp, #224]
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: vmov s0, r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.32 d9[0], r0
+; LE-I32-NEON-NEXT: ldrh r0, [sp, #232]
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: vmov s0, r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.32 d10[0], r0
+; LE-I32-NEON-NEXT: ldrh r0, [sp, #240]
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: vmov s0, r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.32 d11[0], r0
+; LE-I32-NEON-NEXT: ldrh r0, [sp, #248]
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: vmov s0, r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: add lr, sp, #256
+; LE-I32-NEON-NEXT: vmov.32 d12[0], r0
+; LE-I32-NEON-NEXT: ldrh r0, [lr]
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: vmov s0, r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: add lr, sp, #256
+; LE-I32-NEON-NEXT: vmov.32 d13[0], r0
+; LE-I32-NEON-NEXT: ldrh r0, [lr, #8]
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: vmov s0, r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: add lr, sp, #256
+; LE-I32-NEON-NEXT: vmov.32 d14[0], r0
+; LE-I32-NEON-NEXT: ldrh r0, [lr, #16]
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: vmov s0, r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: add lr, sp, #256
+; LE-I32-NEON-NEXT: vmov.32 d15[0], r0
+; LE-I32-NEON-NEXT: ldrh r0, [lr, #20]
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: vmov s0, r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: add lr, sp, #256
+; LE-I32-NEON-NEXT: vmov.32 d15[1], r0
+; LE-I32-NEON-NEXT: ldrh r0, [lr, #12]
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: vmov s0, r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.32 d14[1], r0
+; LE-I32-NEON-NEXT: add lr, sp, #64
+; LE-I32-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I32-NEON-NEXT: add lr, sp, #256
+; LE-I32-NEON-NEXT: ldrh r0, [lr, #4]
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: vmov s0, r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.32 d13[1], r0
+; LE-I32-NEON-NEXT: ldrh r0, [sp, #252]
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: vmov s0, r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.32 d12[1], r0
+; LE-I32-NEON-NEXT: ldrh r0, [sp, #244]
+; LE-I32-NEON-NEXT: add lr, sp, #40
+; LE-I32-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: vmov s0, r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.32 d11[1], r0
+; LE-I32-NEON-NEXT: ldrh r0, [sp, #236]
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: vmov s0, r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.32 d10[1], r0
+; LE-I32-NEON-NEXT: ldrh r0, [sp, #228]
+; LE-I32-NEON-NEXT: add lr, sp, #24
+; LE-I32-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: vmov s0, r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov.32 d9[1], r0
+; LE-I32-NEON-NEXT: ldrh r0, [sp, #220]
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: vmov s0, r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vldr s0, [sp, #60] @ 4-byte Reload
+; LE-I32-NEON-NEXT: add lr, sp, #8
+; LE-I32-NEON-NEXT: vmov.32 d8[1], r0
+; LE-I32-NEON-NEXT: vmov r0, s0
+; LE-I32-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: vmov s0, r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vldr s0, [sp, #80] @ 4-byte Reload
+; LE-I32-NEON-NEXT: vmov.32 d10[0], r0
+; LE-I32-NEON-NEXT: vmov r0, s0
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: vmov s0, r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vldr s0, [sp, #84] @ 4-byte Reload
+; LE-I32-NEON-NEXT: vmov.32 d11[0], r0
+; LE-I32-NEON-NEXT: vmov r0, s0
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: vmov s0, r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vldr s0, [sp, #88] @ 4-byte Reload
+; LE-I32-NEON-NEXT: vmov.32 d12[0], r0
+; LE-I32-NEON-NEXT: vmov r0, s0
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: vmov s0, r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vldr s0, [sp, #92] @ 4-byte Reload
+; LE-I32-NEON-NEXT: vmov.32 d13[0], r0
+; LE-I32-NEON-NEXT: vmov r0, s0
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: vmov s0, r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vldr s0, [sp, #96] @ 4-byte Reload
+; LE-I32-NEON-NEXT: vmov.32 d14[0], r0
+; LE-I32-NEON-NEXT: vmov r0, s0
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: vmov s0, r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vldr s0, [sp, #100] @ 4-byte Reload
+; LE-I32-NEON-NEXT: vmov.32 d15[0], r0
+; LE-I32-NEON-NEXT: vmov r0, s0
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: vmov s0, r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vldr s0, [sp, #104] @ 4-byte Reload
+; LE-I32-NEON-NEXT: vmov.32 d9[0], r0
+; LE-I32-NEON-NEXT: vmov r0, s0
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: vmov s0, r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov s0, r5
+; LE-I32-NEON-NEXT: vmov.32 d8[0], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov s0, r7
+; LE-I32-NEON-NEXT: vmov.32 d9[1], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov s0, r6
+; LE-I32-NEON-NEXT: vmov.32 d8[1], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov s0, r11
+; LE-I32-NEON-NEXT: vmov.32 d15[1], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov s0, r10
+; LE-I32-NEON-NEXT: vmov.32 d14[1], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vmov s0, r9
+; LE-I32-NEON-NEXT: vmov.32 d13[1], r0
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: vldr s0, [sp, #108] @ 4-byte Reload
+; LE-I32-NEON-NEXT: vmov.32 d12[1], r0
+; LE-I32-NEON-NEXT: vmov r0, s0
+; LE-I32-NEON-NEXT: bl __aeabi_h2f
+; LE-I32-NEON-NEXT: vmov s0, r0
+; LE-I32-NEON-NEXT: vmov.32 d11[1], r8
+; LE-I32-NEON-NEXT: bl lrintf
+; LE-I32-NEON-NEXT: add lr, sp, #8
+; LE-I32-NEON-NEXT: vmov.32 d10[1], r0
+; LE-I32-NEON-NEXT: add r0, r4, #64
+; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT: add lr, sp, #24
+; LE-I32-NEON-NEXT: vst1.32 {d16, d17}, [r0:128]!
+; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT: add lr, sp, #40
+; LE-I32-NEON-NEXT: vst1.32 {d16, d17}, [r0:128]!
+; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT: add lr, sp, #64
+; LE-I32-NEON-NEXT: vst1.32 {d16, d17}, [r0:128]!
+; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]
+; LE-I32-NEON-NEXT: vst1.32 {d10, d11}, [r4:128]!
+; LE-I32-NEON-NEXT: vst1.32 {d12, d13}, [r4:128]!
+; LE-I32-NEON-NEXT: vst1.32 {d14, d15}, [r4:128]!
+; LE-I32-NEON-NEXT: vst1.64 {d8, d9}, [r4:128]
+; LE-I32-NEON-NEXT: add sp, sp, #112
+; LE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT: add sp, sp, #4
+; LE-I32-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; LE-I64-NEON-LABEL: lrint_v32f16:
+; LE-I64-NEON: @ %bb.0:
+; LE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEON-NEXT: .pad #4
+; LE-I64-NEON-NEXT: sub sp, sp, #4
+; LE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT: .pad #248
+; LE-I64-NEON-NEXT: sub sp, sp, #248
+; LE-I64-NEON-NEXT: add lr, sp, #256
+; LE-I64-NEON-NEXT: mov r11, r0
+; LE-I64-NEON-NEXT: vstr s15, [sp, #176] @ 4-byte Spill
+; LE-I64-NEON-NEXT: vmov.f32 s19, s14
+; LE-I64-NEON-NEXT: ldrh r0, [lr, #132]
+; LE-I64-NEON-NEXT: vmov.f32 s17, s11
+; LE-I64-NEON-NEXT: vstr s13, [sp, #196] @ 4-byte Spill
+; LE-I64-NEON-NEXT: vstr s12, [sp, #112] @ 4-byte Spill
+; LE-I64-NEON-NEXT: vstr s10, [sp, #136] @ 4-byte Spill
+; LE-I64-NEON-NEXT: vstr s9, [sp, #160] @ 4-byte Spill
+; LE-I64-NEON-NEXT: vstr s8, [sp, #200] @ 4-byte Spill
+; LE-I64-NEON-NEXT: vstr s7, [sp, #100] @ 4-byte Spill
+; LE-I64-NEON-NEXT: vstr s6, [sp, #116] @ 4-byte Spill
+; LE-I64-NEON-NEXT: vstr s5, [sp, #76] @ 4-byte Spill
+; LE-I64-NEON-NEXT: vstr s4, [sp, #120] @ 4-byte Spill
+; LE-I64-NEON-NEXT: vstr s3, [sp, #156] @ 4-byte Spill
+; LE-I64-NEON-NEXT: vstr s2, [sp, #192] @ 4-byte Spill
+; LE-I64-NEON-NEXT: vstr s1, [sp, #104] @ 4-byte Spill
+; LE-I64-NEON-NEXT: vstr s0, [sp, #108] @ 4-byte Spill
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: vmov s0, r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: add lr, sp, #256
+; LE-I64-NEON-NEXT: str r0, [sp, #52] @ 4-byte Spill
+; LE-I64-NEON-NEXT: str r1, [sp, #56] @ 4-byte Spill
+; LE-I64-NEON-NEXT: ldrh r0, [lr, #108]
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: add lr, sp, #256
+; LE-I64-NEON-NEXT: str r0, [sp, #32] @ 4-byte Spill
+; LE-I64-NEON-NEXT: ldrh r0, [lr, #96]
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: add lr, sp, #256
+; LE-I64-NEON-NEXT: mov r5, r0
+; LE-I64-NEON-NEXT: ldrh r0, [lr, #100]
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: add lr, sp, #256
+; LE-I64-NEON-NEXT: mov r7, r0
+; LE-I64-NEON-NEXT: ldrh r0, [lr, #156]
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: add lr, sp, #256
+; LE-I64-NEON-NEXT: mov r6, r0
+; LE-I64-NEON-NEXT: ldrh r0, [lr, #152]
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: add lr, sp, #256
+; LE-I64-NEON-NEXT: mov r4, r0
+; LE-I64-NEON-NEXT: ldrh r0, [lr, #148]
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: vmov s0, r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov s0, r4
+; LE-I64-NEON-NEXT: mov r8, r1
+; LE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov s0, r6
+; LE-I64-NEON-NEXT: mov r4, r1
+; LE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov s0, r7
+; LE-I64-NEON-NEXT: mov r6, r1
+; LE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov s0, r5
+; LE-I64-NEON-NEXT: mov r7, r1
+; LE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: add lr, sp, #256
+; LE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT: mov r5, r1
+; LE-I64-NEON-NEXT: ldrh r0, [lr, #144]
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: add lr, sp, #256
+; LE-I64-NEON-NEXT: mov r10, r0
+; LE-I64-NEON-NEXT: vmov.32 d11[1], r7
+; LE-I64-NEON-NEXT: ldrh r0, [lr, #104]
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: vmov.32 d10[1], r5
+; LE-I64-NEON-NEXT: add lr, sp, #80
+; LE-I64-NEON-NEXT: mov r7, r0
+; LE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #256
+; LE-I64-NEON-NEXT: ldrh r0, [lr, #124]
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: add lr, sp, #256
+; LE-I64-NEON-NEXT: mov r5, r0
+; LE-I64-NEON-NEXT: vmov.32 d15[1], r6
+; LE-I64-NEON-NEXT: ldrh r0, [lr, #120]
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: vmov.32 d14[1], r4
+; LE-I64-NEON-NEXT: add lr, sp, #16
+; LE-I64-NEON-NEXT: mov r6, r0
+; LE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #256
+; LE-I64-NEON-NEXT: ldrh r0, [lr, #116]
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: add lr, sp, #256
+; LE-I64-NEON-NEXT: vorr q5, q6, q6
+; LE-I64-NEON-NEXT: mov r4, r0
+; LE-I64-NEON-NEXT: ldrh r0, [lr, #112]
+; LE-I64-NEON-NEXT: vmov.32 d11[1], r8
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: vmov s0, r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov s0, r4
+; LE-I64-NEON-NEXT: str r1, [sp, #12] @ 4-byte Spill
+; LE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov s0, r6
+; LE-I64-NEON-NEXT: add lr, sp, #216
+; LE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT: str r1, [sp, #8] @ 4-byte Spill
+; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov s0, r5
+; LE-I64-NEON-NEXT: mov r4, r1
+; LE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov s0, r7
+; LE-I64-NEON-NEXT: add lr, sp, #232
+; LE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT: mov r8, r1
+; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov s0, r10
+; LE-I64-NEON-NEXT: mov r9, r1
+; LE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: add lr, sp, #256
+; LE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT: mov r5, r1
+; LE-I64-NEON-NEXT: ldrh r0, [lr, #140]
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: vmov s0, r0
+; LE-I64-NEON-NEXT: ldr r0, [sp, #32] @ 4-byte Reload
+; LE-I64-NEON-NEXT: vmov.32 d10[1], r5
+; LE-I64-NEON-NEXT: add lr, sp, #32
+; LE-I64-NEON-NEXT: vmov s16, r0
+; LE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: add lr, sp, #256
+; LE-I64-NEON-NEXT: mov r6, r1
+; LE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEON-NEXT: ldrh r1, [lr, #128]
+; LE-I64-NEON-NEXT: mov r0, r1
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: vmov.f32 s0, s16
+; LE-I64-NEON-NEXT: vmov s18, r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: add lr, sp, #256
+; LE-I64-NEON-NEXT: mov r5, r1
+; LE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT: ldr r1, [sp, #52] @ 4-byte Reload
+; LE-I64-NEON-NEXT: ldrh r0, [lr, #136]
+; LE-I64-NEON-NEXT: vmov.32 d15[1], r6
+; LE-I64-NEON-NEXT: vmov.32 d11[0], r1
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: vmov.f32 s0, s18
+; LE-I64-NEON-NEXT: vmov s16, r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.f32 s0, s16
+; LE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT: ldr r0, [sp, #56] @ 4-byte Reload
+; LE-I64-NEON-NEXT: mov r6, r1
+; LE-I64-NEON-NEXT: vmov.32 d11[1], r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.32 d13[1], r5
+; LE-I64-NEON-NEXT: add lr, sp, #56
+; LE-I64-NEON-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
+; LE-I64-NEON-NEXT: vmov.32 d12[1], r9
+; LE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEON-NEXT: vmov r0, s19
+; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #232
+; LE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vmov.32 d13[1], r8
+; LE-I64-NEON-NEXT: vmov.32 d12[1], r4
+; LE-I64-NEON-NEXT: vmov.32 d10[1], r6
+; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #216
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vmov.32 d17[1], r2
+; LE-I64-NEON-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
+; LE-I64-NEON-NEXT: vmov.32 d14[1], r1
+; LE-I64-NEON-NEXT: add r1, r11, #192
+; LE-I64-NEON-NEXT: vmov.32 d16[1], r2
+; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #32
+; LE-I64-NEON-NEXT: vst1.64 {d10, d11}, [r1:128]!
+; LE-I64-NEON-NEXT: vst1.64 {d14, d15}, [r1:128]!
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #16
+; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r1:128]!
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r1:128]
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: str r0, [sp, #52] @ 4-byte Spill
+; LE-I64-NEON-NEXT: vmov r0, s17
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: vldr s0, [sp, #76] @ 4-byte Reload
+; LE-I64-NEON-NEXT: mov r10, r0
+; LE-I64-NEON-NEXT: vmov r0, s0
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: vldr s0, [sp, #100] @ 4-byte Reload
+; LE-I64-NEON-NEXT: mov r4, r0
+; LE-I64-NEON-NEXT: vmov r0, s0
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: vldr s0, [sp, #104] @ 4-byte Reload
+; LE-I64-NEON-NEXT: mov r7, r0
+; LE-I64-NEON-NEXT: vmov r0, s0
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: vldr s0, [sp, #108] @ 4-byte Reload
+; LE-I64-NEON-NEXT: mov r5, r0
+; LE-I64-NEON-NEXT: vmov r0, s0
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: vldr s0, [sp, #112] @ 4-byte Reload
+; LE-I64-NEON-NEXT: mov r6, r0
+; LE-I64-NEON-NEXT: vmov r0, s0
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: vmov s0, r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov s0, r6
+; LE-I64-NEON-NEXT: str r1, [sp, #112] @ 4-byte Spill
+; LE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov s0, r5
+; LE-I64-NEON-NEXT: mov r9, r1
+; LE-I64-NEON-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov s0, r7
+; LE-I64-NEON-NEXT: mov r8, r1
+; LE-I64-NEON-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov s0, r4
+; LE-I64-NEON-NEXT: mov r5, r1
+; LE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov s0, r10
+; LE-I64-NEON-NEXT: mov r4, r1
+; LE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vldr s0, [sp, #116] @ 4-byte Reload
+; LE-I64-NEON-NEXT: mov r6, r0
+; LE-I64-NEON-NEXT: str r1, [sp, #108] @ 4-byte Spill
+; LE-I64-NEON-NEXT: vmov.32 d11[1], r5
+; LE-I64-NEON-NEXT: vmov r0, s0
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: vmov s0, r0
+; LE-I64-NEON-NEXT: vmov.32 d13[1], r4
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vldr s0, [sp, #120] @ 4-byte Reload
+; LE-I64-NEON-NEXT: mov r4, r0
+; LE-I64-NEON-NEXT: mov r5, r1
+; LE-I64-NEON-NEXT: vmov.32 d9[1], r8
+; LE-I64-NEON-NEXT: vmov r0, s0
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: vldr s0, [sp, #136] @ 4-byte Reload
+; LE-I64-NEON-NEXT: vmov.32 d10[0], r4
+; LE-I64-NEON-NEXT: vmov r7, s0
+; LE-I64-NEON-NEXT: vmov s0, r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT: add lr, sp, #136
+; LE-I64-NEON-NEXT: add r10, r11, #128
+; LE-I64-NEON-NEXT: mov r0, r7
+; LE-I64-NEON-NEXT: vmov.32 d10[1], r5
+; LE-I64-NEON-NEXT: vmov.32 d12[1], r1
+; LE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #120
+; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEON-NEXT: add lr, sp, #80
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #56
+; LE-I64-NEON-NEXT: vmov.32 d13[0], r6
+; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r10:128]!
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r10:128]!
+; LE-I64-NEON-NEXT: vldr s0, [sp, #156] @ 4-byte Reload
+; LE-I64-NEON-NEXT: vmov r4, s0
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: vldr s0, [sp, #160] @ 4-byte Reload
+; LE-I64-NEON-NEXT: mov r5, r0
+; LE-I64-NEON-NEXT: ldr r0, [sp, #52] @ 4-byte Reload
+; LE-I64-NEON-NEXT: vmov.32 d8[1], r9
+; LE-I64-NEON-NEXT: vmov r7, s0
+; LE-I64-NEON-NEXT: add lr, sp, #160
+; LE-I64-NEON-NEXT: vldr s0, [sp, #176] @ 4-byte Reload
+; LE-I64-NEON-NEXT: vmov s20, r0
+; LE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEON-NEXT: vmov r0, s0
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: vmov.f32 s0, s20
+; LE-I64-NEON-NEXT: vmov s18, r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.f32 s0, s18
+; LE-I64-NEON-NEXT: mov r6, r1
+; LE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT: vmov s16, r5
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEON-NEXT: mov r0, r7
+; LE-I64-NEON-NEXT: mov r5, r1
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: vmov.f32 s0, s16
+; LE-I64-NEON-NEXT: vmov s18, r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.f32 s0, s18
+; LE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT: add lr, sp, #176
+; LE-I64-NEON-NEXT: mov r8, r1
+; LE-I64-NEON-NEXT: vmov.32 d11[1], r5
+; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT: mov r0, r4
+; LE-I64-NEON-NEXT: mov r5, r1
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: vldr s0, [sp, #196] @ 4-byte Reload
+; LE-I64-NEON-NEXT: mov r7, r0
+; LE-I64-NEON-NEXT: vmov.32 d10[1], r6
+; LE-I64-NEON-NEXT: vmov r0, s0
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: vldr s0, [sp, #192] @ 4-byte Reload
+; LE-I64-NEON-NEXT: vmov s16, r0
+; LE-I64-NEON-NEXT: vmov.32 d13[1], r5
+; LE-I64-NEON-NEXT: vmov r6, s0
+; LE-I64-NEON-NEXT: vldr s0, [sp, #200] @ 4-byte Reload
+; LE-I64-NEON-NEXT: vmov r0, s0
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: vmov.f32 s0, s16
+; LE-I64-NEON-NEXT: vmov s18, r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.f32 s0, s18
+; LE-I64-NEON-NEXT: add lr, sp, #200
+; LE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEON-NEXT: mov r4, r1
+; LE-I64-NEON-NEXT: vmov s16, r7
+; LE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT: mov r0, r6
+; LE-I64-NEON-NEXT: mov r5, r1
+; LE-I64-NEON-NEXT: bl __aeabi_h2f
+; LE-I64-NEON-NEXT: vmov.f32 s0, s16
+; LE-I64-NEON-NEXT: vmov s18, r0
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: vmov.f32 s0, s18
+; LE-I64-NEON-NEXT: mov r6, r1
+; LE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEON-NEXT: vmov.32 d12[1], r5
+; LE-I64-NEON-NEXT: bl lrintf
+; LE-I64-NEON-NEXT: add lr, sp, #200
+; LE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEON-NEXT: ldr r0, [sp, #112] @ 4-byte Reload
+; LE-I64-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #176
+; LE-I64-NEON-NEXT: vmov.32 d19[1], r4
+; LE-I64-NEON-NEXT: vmov.32 d18[1], r0
+; LE-I64-NEON-NEXT: ldr r0, [sp, #108] @ 4-byte Reload
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #216
+; LE-I64-NEON-NEXT: vmov.32 d17[1], r0
+; LE-I64-NEON-NEXT: add r0, r11, #64
+; LE-I64-NEON-NEXT: vmov.32 d16[1], r8
+; LE-I64-NEON-NEXT: vorr q10, q8, q8
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #232
+; LE-I64-NEON-NEXT: vmov.32 d15[1], r6
+; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r10:128]!
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #160
+; LE-I64-NEON-NEXT: vmov.32 d14[1], r1
+; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r10:128]
+; LE-I64-NEON-NEXT: vst1.64 {d12, d13}, [r0:128]!
+; LE-I64-NEON-NEXT: vst1.64 {d20, d21}, [r0:128]!
+; LE-I64-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]!
+; LE-I64-NEON-NEXT: vst1.64 {d10, d11}, [r0:128]
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #120
+; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r11:128]!
+; LE-I64-NEON-NEXT: vst1.64 {d14, d15}, [r11:128]!
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: add lr, sp, #136
+; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r11:128]!
+; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r11:128]
+; LE-I64-NEON-NEXT: add sp, sp, #248
+; LE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT: add sp, sp, #4
+; LE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I32-LABEL: lrint_v32f16:
+; BE-I32: @ %bb.0:
+; BE-I32-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I32-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I32-NEXT: .pad #4
+; BE-I32-NEXT: sub sp, sp, #4
+; BE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT: .pad #128
+; BE-I32-NEXT: sub sp, sp, #128
+; BE-I32-NEXT: mov r4, r0
+; BE-I32-NEXT: vmov r0, s13
+; BE-I32-NEXT: vmov.f32 s19, s15
+; BE-I32-NEXT: vstr s14, [sp, #84] @ 4-byte Spill
+; BE-I32-NEXT: vstr s12, [sp, #80] @ 4-byte Spill
+; BE-I32-NEXT: vmov.f32 s21, s11
+; BE-I32-NEXT: vstr s10, [sp, #100] @ 4-byte Spill
+; BE-I32-NEXT: vmov.f32 s23, s9
+; BE-I32-NEXT: vstr s8, [sp, #104] @ 4-byte Spill
+; BE-I32-NEXT: vmov.f32 s25, s7
+; BE-I32-NEXT: vstr s6, [sp, #96] @ 4-byte Spill
+; BE-I32-NEXT: vmov.f32 s27, s5
+; BE-I32-NEXT: vstr s4, [sp, #92] @ 4-byte Spill
+; BE-I32-NEXT: vmov.f32 s29, s3
+; BE-I32-NEXT: vstr s2, [sp, #88] @ 4-byte Spill
+; BE-I32-NEXT: vstr s1, [sp, #124] @ 4-byte Spill
+; BE-I32-NEXT: vstr s0, [sp, #60] @ 4-byte Spill
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: mov r8, r0
+; BE-I32-NEXT: vmov r0, s19
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r9, r0
+; BE-I32-NEXT: vmov r0, s29
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r10, r0
+; BE-I32-NEXT: vmov r0, s27
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r11, r0
+; BE-I32-NEXT: vmov r0, s25
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r6, r0
+; BE-I32-NEXT: vmov r0, s23
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r7, r0
+; BE-I32-NEXT: vmov r0, s21
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: add lr, sp, #256
+; BE-I32-NEXT: mov r5, r0
+; BE-I32-NEXT: ldrh r0, [lr, #26]
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: add lr, sp, #256
+; BE-I32-NEXT: vmov.32 d14[0], r0
+; BE-I32-NEXT: ldrh r0, [lr, #34]
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d15[0], r0
+; BE-I32-NEXT: ldrh r0, [sp, #234]
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d8[0], r0
+; BE-I32-NEXT: ldrh r0, [sp, #242]
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d9[0], r0
+; BE-I32-NEXT: ldrh r0, [sp, #250]
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: add lr, sp, #256
+; BE-I32-NEXT: vmov.32 d10[0], r0
+; BE-I32-NEXT: ldrh r0, [lr, #2]
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: add lr, sp, #256
+; BE-I32-NEXT: vmov.32 d11[0], r0
+; BE-I32-NEXT: ldrh r0, [lr, #10]
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: add lr, sp, #256
+; BE-I32-NEXT: vmov.32 d12[0], r0
+; BE-I32-NEXT: ldrh r0, [lr, #18]
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: add lr, sp, #256
+; BE-I32-NEXT: vmov.32 d13[0], r0
+; BE-I32-NEXT: ldrh r0, [lr, #22]
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: add lr, sp, #256
+; BE-I32-NEXT: vmov.32 d13[1], r0
+; BE-I32-NEXT: ldrh r0, [lr, #14]
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d12[1], r0
+; BE-I32-NEXT: add lr, sp, #64
+; BE-I32-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I32-NEXT: add lr, sp, #256
+; BE-I32-NEXT: ldrh r0, [lr, #6]
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d11[1], r0
+; BE-I32-NEXT: ldrh r0, [sp, #254]
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d10[1], r0
+; BE-I32-NEXT: ldrh r0, [sp, #246]
+; BE-I32-NEXT: add lr, sp, #40
+; BE-I32-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d9[1], r0
+; BE-I32-NEXT: ldrh r0, [sp, #238]
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d8[1], r0
+; BE-I32-NEXT: add lr, sp, #24
+; BE-I32-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; BE-I32-NEXT: add lr, sp, #256
+; BE-I32-NEXT: ldrh r0, [lr, #38]
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: add lr, sp, #256
+; BE-I32-NEXT: vmov.32 d15[1], r0
+; BE-I32-NEXT: ldrh r0, [lr, #30]
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vldr s0, [sp, #60] @ 4-byte Reload
+; BE-I32-NEXT: add lr, sp, #8
+; BE-I32-NEXT: vmov.32 d14[1], r0
+; BE-I32-NEXT: vmov r0, s0
+; BE-I32-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vldr s0, [sp, #80] @ 4-byte Reload
+; BE-I32-NEXT: vmov.32 d10[0], r0
+; BE-I32-NEXT: vmov r0, s0
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vldr s0, [sp, #84] @ 4-byte Reload
+; BE-I32-NEXT: vmov.32 d12[0], r0
+; BE-I32-NEXT: vmov r0, s0
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vldr s0, [sp, #88] @ 4-byte Reload
+; BE-I32-NEXT: vmov.32 d13[0], r0
+; BE-I32-NEXT: vmov r0, s0
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vldr s0, [sp, #92] @ 4-byte Reload
+; BE-I32-NEXT: vmov.32 d11[0], r0
+; BE-I32-NEXT: vmov r0, s0
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vldr s0, [sp, #96] @ 4-byte Reload
+; BE-I32-NEXT: vmov.32 d14[0], r0
+; BE-I32-NEXT: vmov r0, s0
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vldr s0, [sp, #100] @ 4-byte Reload
+; BE-I32-NEXT: vmov.32 d15[0], r0
+; BE-I32-NEXT: vmov r0, s0
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vldr s0, [sp, #104] @ 4-byte Reload
+; BE-I32-NEXT: vmov.32 d9[0], r0
+; BE-I32-NEXT: vmov r0, s0
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: add lr, sp, #8
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEXT: add lr, sp, #104
+; BE-I32-NEXT: vrev64.32 q8, q8
+; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r5
+; BE-I32-NEXT: vmov.32 d8[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r7
+; BE-I32-NEXT: vmov.32 d9[1], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r6
+; BE-I32-NEXT: vmov.32 d8[1], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r11
+; BE-I32-NEXT: vmov.32 d15[1], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r10
+; BE-I32-NEXT: vmov.32 d14[1], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r9
+; BE-I32-NEXT: vmov.32 d11[1], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vldr s0, [sp, #124] @ 4-byte Reload
+; BE-I32-NEXT: vmov.32 d13[1], r0
+; BE-I32-NEXT: vmov r0, s0
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: vmov.32 d12[1], r8
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: add lr, sp, #24
+; BE-I32-NEXT: vmov.32 d10[1], r0
+; BE-I32-NEXT: add r0, r4, #64
+; BE-I32-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I32-NEXT: add lr, sp, #40
+; BE-I32-NEXT: vrev64.32 q8, q6
+; BE-I32-NEXT: vst1.32 {d18, d19}, [r0:128]!
+; BE-I32-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I32-NEXT: add lr, sp, #64
+; BE-I32-NEXT: vst1.32 {d18, d19}, [r0:128]!
+; BE-I32-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I32-NEXT: add lr, sp, #104
+; BE-I32-NEXT: vst1.32 {d18, d19}, [r0:128]!
+; BE-I32-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I32-NEXT: vst1.64 {d18, d19}, [r0:128]
+; BE-I32-NEXT: vst1.32 {d10, d11}, [r4:128]!
+; BE-I32-NEXT: vst1.32 {d14, d15}, [r4:128]!
+; BE-I32-NEXT: vst1.32 {d8, d9}, [r4:128]!
+; BE-I32-NEXT: vst1.64 {d16, d17}, [r4:128]
+; BE-I32-NEXT: add sp, sp, #128
+; BE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT: add sp, sp, #4
+; BE-I32-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I64-LABEL: lrint_v32f16:
+; BE-I64: @ %bb.0:
+; BE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT: .pad #4
+; BE-I64-NEXT: sub sp, sp, #4
+; BE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT: .pad #176
+; BE-I64-NEXT: sub sp, sp, #176
+; BE-I64-NEXT: add lr, sp, #256
+; BE-I64-NEXT: mov r10, r0
+; BE-I64-NEXT: vstr s15, [sp, #112] @ 4-byte Spill
+; BE-I64-NEXT: ldrh r0, [lr, #74]
+; BE-I64-NEXT: vstr s14, [sp, #80] @ 4-byte Spill
+; BE-I64-NEXT: vstr s13, [sp, #48] @ 4-byte Spill
+; BE-I64-NEXT: vstr s12, [sp, #148] @ 4-byte Spill
+; BE-I64-NEXT: vstr s11, [sp, #76] @ 4-byte Spill
+; BE-I64-NEXT: vstr s10, [sp, #152] @ 4-byte Spill
+; BE-I64-NEXT: vstr s9, [sp, #156] @ 4-byte Spill
+; BE-I64-NEXT: vstr s8, [sp, #120] @ 4-byte Spill
+; BE-I64-NEXT: vstr s7, [sp, #136] @ 4-byte Spill
+; BE-I64-NEXT: vstr s6, [sp, #132] @ 4-byte Spill
+; BE-I64-NEXT: vstr s5, [sp, #144] @ 4-byte Spill
+; BE-I64-NEXT: vstr s4, [sp, #64] @ 4-byte Spill
+; BE-I64-NEXT: vstr s3, [sp, #104] @ 4-byte Spill
+; BE-I64-NEXT: vstr s2, [sp, #88] @ 4-byte Spill
+; BE-I64-NEXT: vstr s1, [sp, #56] @ 4-byte Spill
+; BE-I64-NEXT: vstr s0, [sp, #96] @ 4-byte Spill
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: add lr, sp, #256
+; BE-I64-NEXT: mov r9, r0
+; BE-I64-NEXT: mov r8, r1
+; BE-I64-NEXT: ldrh r0, [lr, #62]
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: add lr, sp, #256
+; BE-I64-NEXT: mov r6, r0
+; BE-I64-NEXT: ldrh r0, [lr, #58]
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: add lr, sp, #256
+; BE-I64-NEXT: mov r7, r0
+; BE-I64-NEXT: ldrh r0, [lr, #66]
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: add lr, sp, #256
+; BE-I64-NEXT: mov r4, r0
+; BE-I64-NEXT: ldrh r0, [lr, #54]
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: add lr, sp, #256
+; BE-I64-NEXT: mov r5, r0
+; BE-I64-NEXT: ldrh r0, [lr, #50]
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: vmov s0, r5
+; BE-I64-NEXT: str r1, [sp, #44] @ 4-byte Spill
+; BE-I64-NEXT: vstr d16, [sp, #168] @ 8-byte Spill
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: vmov s0, r4
+; BE-I64-NEXT: str r1, [sp, #40] @ 4-byte Spill
+; BE-I64-NEXT: vstr d16, [sp, #160] @ 8-byte Spill
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: vmov s0, r7
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: vstr d16, [sp, #32] @ 8-byte Spill
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: vmov s0, r6
+; BE-I64-NEXT: mov r11, r1
+; BE-I64-NEXT: vstr d16, [sp, #24] @ 8-byte Spill
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: add lr, sp, #256
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: ldrh r0, [lr, #34]
+; BE-I64-NEXT: vstr d16, [sp, #16] @ 8-byte Spill
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: vmov.32 d8[0], r9
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: add lr, sp, #256
+; BE-I64-NEXT: mov r7, r1
+; BE-I64-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEXT: ldrh r1, [lr, #38]
+; BE-I64-NEXT: mov r0, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov.32 d8[1], r8
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: vstr d8, [sp, #8] @ 8-byte Spill
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: add lr, sp, #256
+; BE-I64-NEXT: mov r5, r1
+; BE-I64-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEXT: ldrh r1, [lr, #26]
+; BE-I64-NEXT: mov r0, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: vmov.32 d12[1], r7
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: add lr, sp, #256
+; BE-I64-NEXT: mov r7, r1
+; BE-I64-NEXT: vmov.32 d9[0], r0
+; BE-I64-NEXT: ldrh r1, [lr, #30]
+; BE-I64-NEXT: mov r0, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: vmov.32 d13[1], r5
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: add lr, sp, #256
+; BE-I64-NEXT: mov r5, r1
+; BE-I64-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEXT: ldrh r1, [lr, #78]
+; BE-I64-NEXT: mov r0, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: vmov.32 d9[1], r7
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: add lr, sp, #256
+; BE-I64-NEXT: mov r7, r1
+; BE-I64-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEXT: ldrh r1, [lr, #82]
+; BE-I64-NEXT: mov r0, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: vmov.32 d15[1], r5
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: add lr, sp, #256
+; BE-I64-NEXT: mov r5, r1
+; BE-I64-NEXT: vmov.32 d8[0], r0
+; BE-I64-NEXT: ldrh r1, [lr, #86]
+; BE-I64-NEXT: mov r0, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: vmov.32 d14[1], r7
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: add lr, sp, #256
+; BE-I64-NEXT: mov r7, r1
+; BE-I64-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEXT: ldrh r1, [lr, #70]
+; BE-I64-NEXT: mov r0, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: vmov.32 d8[1], r5
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: add lr, sp, #256
+; BE-I64-NEXT: mov r5, r1
+; BE-I64-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEXT: ldrh r1, [lr, #46]
+; BE-I64-NEXT: mov r0, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: vmov.32 d10[1], r7
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d25[0], r0
+; BE-I64-NEXT: ldr r0, [sp, #40] @ 4-byte Reload
+; BE-I64-NEXT: ldr r2, [sp, #44] @ 4-byte Reload
+; BE-I64-NEXT: vldr d24, [sp, #160] @ 8-byte Reload
+; BE-I64-NEXT: vldr s0, [sp, #48] @ 4-byte Reload
+; BE-I64-NEXT: vmov.32 d24[1], r0
+; BE-I64-NEXT: vmov r0, s0
+; BE-I64-NEXT: vldr d26, [sp, #16] @ 8-byte Reload
+; BE-I64-NEXT: vstr d24, [sp, #160] @ 8-byte Spill
+; BE-I64-NEXT: vldr d24, [sp, #8] @ 8-byte Reload
+; BE-I64-NEXT: vrev64.32 d23, d14
+; BE-I64-NEXT: vldr d29, [sp, #24] @ 8-byte Reload
+; BE-I64-NEXT: vrev64.32 d22, d24
+; BE-I64-NEXT: vldr d24, [sp, #168] @ 8-byte Reload
+; BE-I64-NEXT: vmov.32 d26[1], r6
+; BE-I64-NEXT: vldr d28, [sp, #32] @ 8-byte Reload
+; BE-I64-NEXT: vmov.32 d25[1], r1
+; BE-I64-NEXT: add r1, r10, #192
+; BE-I64-NEXT: vmov.32 d29[1], r11
+; BE-I64-NEXT: add r11, r10, #128
+; BE-I64-NEXT: vmov.32 d24[1], r2
+; BE-I64-NEXT: vmov.32 d11[1], r5
+; BE-I64-NEXT: vmov.32 d28[1], r4
+; BE-I64-NEXT: vrev64.32 d27, d26
+; BE-I64-NEXT: vstr d24, [sp, #168] @ 8-byte Spill
+; BE-I64-NEXT: vstr d25, [sp, #48] @ 8-byte Spill
+; BE-I64-NEXT: vrev64.32 d25, d11
+; BE-I64-NEXT: vrev64.32 d26, d29
+; BE-I64-NEXT: vrev64.32 d24, d28
+; BE-I64-NEXT: vst1.64 {d26, d27}, [r1:128]!
+; BE-I64-NEXT: vst1.64 {d24, d25}, [r1:128]!
+; BE-I64-NEXT: vrev64.32 d21, d10
+; BE-I64-NEXT: vrev64.32 d19, d15
+; BE-I64-NEXT: vrev64.32 d17, d13
+; BE-I64-NEXT: vrev64.32 d20, d8
+; BE-I64-NEXT: vst1.64 {d22, d23}, [r1:128]!
+; BE-I64-NEXT: vrev64.32 d18, d9
+; BE-I64-NEXT: vrev64.32 d16, d12
+; BE-I64-NEXT: vst1.64 {d20, d21}, [r1:128]
+; BE-I64-NEXT: vst1.64 {d18, d19}, [r11:128]!
+; BE-I64-NEXT: vst1.64 {d16, d17}, [r11:128]!
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: add lr, sp, #256
+; BE-I64-NEXT: mov r7, r0
+; BE-I64-NEXT: mov r8, r1
+; BE-I64-NEXT: ldrh r0, [lr, #42]
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vldr s0, [sp, #56] @ 4-byte Reload
+; BE-I64-NEXT: mov r4, r0
+; BE-I64-NEXT: vmov r0, s0
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov s0, r4
+; BE-I64-NEXT: str r1, [sp, #44] @ 4-byte Spill
+; BE-I64-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vldr s0, [sp, #64] @ 4-byte Reload
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: vmov r2, s0
+; BE-I64-NEXT: vldr s0, [sp, #80] @ 4-byte Reload
+; BE-I64-NEXT: vmov.32 d16[1], r1
+; BE-I64-NEXT: vmov r4, s0
+; BE-I64-NEXT: vldr s0, [sp, #76] @ 4-byte Reload
+; BE-I64-NEXT: vstr d16, [sp, #80] @ 8-byte Spill
+; BE-I64-NEXT: vmov r5, s0
+; BE-I64-NEXT: mov r0, r2
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEXT: mov r0, r4
+; BE-I64-NEXT: mov r9, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: vmov.32 d8[0], r7
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEXT: mov r0, r5
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vldr s0, [sp, #88] @ 4-byte Reload
+; BE-I64-NEXT: mov r4, r0
+; BE-I64-NEXT: vmov.32 d8[1], r8
+; BE-I64-NEXT: vmov r7, s0
+; BE-I64-NEXT: vldr s0, [sp, #96] @ 4-byte Reload
+; BE-I64-NEXT: vstr d8, [sp, #88] @ 8-byte Spill
+; BE-I64-NEXT: vmov r0, s0
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vldr s0, [sp, #104] @ 4-byte Reload
+; BE-I64-NEXT: vmov s19, r0
+; BE-I64-NEXT: vmov.32 d12[1], r6
+; BE-I64-NEXT: vmov r5, s0
+; BE-I64-NEXT: vldr s0, [sp, #112] @ 4-byte Reload
+; BE-I64-NEXT: vstr d12, [sp, #104] @ 8-byte Spill
+; BE-I64-NEXT: vmov r0, s0
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov.f32 s0, s19
+; BE-I64-NEXT: vmov s30, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s30
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: vmov.32 d9[0], r0
+; BE-I64-NEXT: vmov s17, r4
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEXT: mov r0, r5
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov.f32 s0, s17
+; BE-I64-NEXT: vmov s30, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s30
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: str r1, [sp, #76] @ 4-byte Spill
+; BE-I64-NEXT: vmov.32 d12[1], r4
+; BE-I64-NEXT: vstr d16, [sp, #64] @ 8-byte Spill
+; BE-I64-NEXT: vstr d12, [sp, #112] @ 8-byte Spill
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: mov r0, r7
+; BE-I64-NEXT: mov r8, r1
+; BE-I64-NEXT: vmov.32 d9[1], r6
+; BE-I64-NEXT: vstr d16, [sp, #56] @ 8-byte Spill
+; BE-I64-NEXT: vstr d9, [sp, #96] @ 8-byte Spill
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vldr s0, [sp, #120] @ 4-byte Reload
+; BE-I64-NEXT: mov r5, r0
+; BE-I64-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
+; BE-I64-NEXT: vmov r7, s0
+; BE-I64-NEXT: vldr s0, [sp, #132] @ 4-byte Reload
+; BE-I64-NEXT: vmov.32 d10[1], r0
+; BE-I64-NEXT: vmov r0, s0
+; BE-I64-NEXT: vstr d10, [sp, #120] @ 8-byte Spill
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vldr s0, [sp, #136] @ 4-byte Reload
+; BE-I64-NEXT: vmov s26, r0
+; BE-I64-NEXT: vmov.32 d11[1], r9
+; BE-I64-NEXT: vmov r4, s0
+; BE-I64-NEXT: vldr s0, [sp, #144] @ 4-byte Reload
+; BE-I64-NEXT: vstr d11, [sp, #136] @ 8-byte Spill
+; BE-I64-NEXT: vmov r0, s0
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov.f32 s0, s26
+; BE-I64-NEXT: vmov s22, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s22
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEXT: vmov s24, r5
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEXT: mov r0, r4
+; BE-I64-NEXT: mov r5, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov.f32 s0, s24
+; BE-I64-NEXT: vmov s22, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s22
+; BE-I64-NEXT: mov r9, r1
+; BE-I64-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEXT: vmov.32 d14[1], r5
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEXT: mov r0, r7
+; BE-I64-NEXT: mov r5, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vldr s0, [sp, #148] @ 4-byte Reload
+; BE-I64-NEXT: mov r7, r0
+; BE-I64-NEXT: vmov.32 d13[1], r6
+; BE-I64-NEXT: vmov r0, s0
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vldr s0, [sp, #152] @ 4-byte Reload
+; BE-I64-NEXT: vmov s20, r0
+; BE-I64-NEXT: vmov.32 d11[1], r5
+; BE-I64-NEXT: vmov r4, s0
+; BE-I64-NEXT: vldr s0, [sp, #156] @ 4-byte Reload
+; BE-I64-NEXT: vmov r0, s0
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov.f32 s0, s20
+; BE-I64-NEXT: vmov s16, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s16
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEXT: vmov s18, r7
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEXT: mov r0, r4
+; BE-I64-NEXT: mov r5, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov.f32 s0, s18
+; BE-I64-NEXT: vmov s16, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s16
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: vmov.32 d9[0], r0
+; BE-I64-NEXT: vmov.32 d15[1], r5
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vldr d16, [sp, #160] @ 8-byte Reload
+; BE-I64-NEXT: vldr d20, [sp, #136] @ 8-byte Reload
+; BE-I64-NEXT: vrev64.32 d19, d14
+; BE-I64-NEXT: vrev64.32 d31, d16
+; BE-I64-NEXT: vldr d16, [sp, #168] @ 8-byte Reload
+; BE-I64-NEXT: vrev64.32 d18, d20
+; BE-I64-NEXT: vldr d20, [sp, #120] @ 8-byte Reload
+; BE-I64-NEXT: vldr d22, [sp, #96] @ 8-byte Reload
+; BE-I64-NEXT: vmov.32 d28[0], r0
+; BE-I64-NEXT: ldr r0, [sp, #76] @ 4-byte Reload
+; BE-I64-NEXT: vrev64.32 d21, d20
+; BE-I64-NEXT: vrev64.32 d30, d16
+; BE-I64-NEXT: vldr d16, [sp, #48] @ 8-byte Reload
+; BE-I64-NEXT: vldr d23, [sp, #64] @ 8-byte Reload
+; BE-I64-NEXT: vrev64.32 d20, d22
+; BE-I64-NEXT: vldr d22, [sp, #112] @ 8-byte Reload
+; BE-I64-NEXT: vrev64.32 d1, d16
+; BE-I64-NEXT: vldr d16, [sp, #80] @ 8-byte Reload
+; BE-I64-NEXT: vmov.32 d23[1], r0
+; BE-I64-NEXT: add r0, r10, #64
+; BE-I64-NEXT: vrev64.32 d25, d22
+; BE-I64-NEXT: vldr d22, [sp, #104] @ 8-byte Reload
+; BE-I64-NEXT: vmov.32 d9[1], r4
+; BE-I64-NEXT: vrev64.32 d0, d16
+; BE-I64-NEXT: vmov.32 d28[1], r1
+; BE-I64-NEXT: vldr d29, [sp, #56] @ 8-byte Reload
+; BE-I64-NEXT: vrev64.32 d3, d15
+; BE-I64-NEXT: vrev64.32 d24, d22
+; BE-I64-NEXT: vldr d22, [sp, #88] @ 8-byte Reload
+; BE-I64-NEXT: vmov.32 d10[1], r6
+; BE-I64-NEXT: vrev64.32 d5, d23
+; BE-I64-NEXT: vst1.64 {d0, d1}, [r11:128]!
+; BE-I64-NEXT: vrev64.32 d2, d9
+; BE-I64-NEXT: vrev64.32 d27, d22
+; BE-I64-NEXT: vmov.32 d29[1], r8
+; BE-I64-NEXT: vrev64.32 d4, d28
+; BE-I64-NEXT: vst1.64 {d30, d31}, [r11:128]
+; BE-I64-NEXT: vst1.64 {d2, d3}, [r0:128]!
+; BE-I64-NEXT: vmov.32 d12[1], r9
+; BE-I64-NEXT: vrev64.32 d26, d10
+; BE-I64-NEXT: vst1.64 {d4, d5}, [r0:128]!
+; BE-I64-NEXT: vrev64.32 d23, d29
+; BE-I64-NEXT: vst1.64 {d26, d27}, [r0:128]!
+; BE-I64-NEXT: vrev64.32 d22, d12
+; BE-I64-NEXT: vst1.64 {d24, d25}, [r0:128]
+; BE-I64-NEXT: vst1.64 {d20, d21}, [r10:128]!
+; BE-I64-NEXT: vst1.64 {d22, d23}, [r10:128]!
+; BE-I64-NEXT: vrev64.32 d17, d11
+; BE-I64-NEXT: vrev64.32 d16, d13
+; BE-I64-NEXT: vst1.64 {d18, d19}, [r10:128]!
+; BE-I64-NEXT: vst1.64 {d16, d17}, [r10:128]
+; BE-I64-NEXT: add sp, sp, #176
+; BE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT: add sp, sp, #4
+; BE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I32-NEON-LABEL: lrint_v32f16:
+; BE-I32-NEON: @ %bb.0:
+; BE-I32-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I32-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I32-NEON-NEXT: .pad #4
+; BE-I32-NEON-NEXT: sub sp, sp, #4
+; BE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT: .pad #128
+; BE-I32-NEON-NEXT: sub sp, sp, #128
+; BE-I32-NEON-NEXT: mov r4, r0
+; BE-I32-NEON-NEXT: vmov r0, s13
+; BE-I32-NEON-NEXT: vmov.f32 s19, s15
+; BE-I32-NEON-NEXT: vstr s14, [sp, #84] @ 4-byte Spill
+; BE-I32-NEON-NEXT: vstr s12, [sp, #80] @ 4-byte Spill
+; BE-I32-NEON-NEXT: vmov.f32 s21, s11
+; BE-I32-NEON-NEXT: vstr s10, [sp, #100] @ 4-byte Spill
+; BE-I32-NEON-NEXT: vmov.f32 s23, s9
+; BE-I32-NEON-NEXT: vstr s8, [sp, #104] @ 4-byte Spill
+; BE-I32-NEON-NEXT: vmov.f32 s25, s7
+; BE-I32-NEON-NEXT: vstr s6, [sp, #96] @ 4-byte Spill
+; BE-I32-NEON-NEXT: vmov.f32 s27, s5
+; BE-I32-NEON-NEXT: vstr s4, [sp, #92] @ 4-byte Spill
+; BE-I32-NEON-NEXT: vmov.f32 s29, s3
+; BE-I32-NEON-NEXT: vstr s2, [sp, #88] @ 4-byte Spill
+; BE-I32-NEON-NEXT: vstr s1, [sp, #124] @ 4-byte Spill
+; BE-I32-NEON-NEXT: vstr s0, [sp, #60] @ 4-byte Spill
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: vmov s0, r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: mov r8, r0
+; BE-I32-NEON-NEXT: vmov r0, s19
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: mov r9, r0
+; BE-I32-NEON-NEXT: vmov r0, s29
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: mov r10, r0
+; BE-I32-NEON-NEXT: vmov r0, s27
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: mov r11, r0
+; BE-I32-NEON-NEXT: vmov r0, s25
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: mov r6, r0
+; BE-I32-NEON-NEXT: vmov r0, s23
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: mov r7, r0
+; BE-I32-NEON-NEXT: vmov r0, s21
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: add lr, sp, #256
+; BE-I32-NEON-NEXT: mov r5, r0
+; BE-I32-NEON-NEXT: ldrh r0, [lr, #26]
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: vmov s0, r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: add lr, sp, #256
+; BE-I32-NEON-NEXT: vmov.32 d14[0], r0
+; BE-I32-NEON-NEXT: ldrh r0, [lr, #34]
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: vmov s0, r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.32 d15[0], r0
+; BE-I32-NEON-NEXT: ldrh r0, [sp, #234]
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: vmov s0, r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.32 d8[0], r0
+; BE-I32-NEON-NEXT: ldrh r0, [sp, #242]
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: vmov s0, r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.32 d9[0], r0
+; BE-I32-NEON-NEXT: ldrh r0, [sp, #250]
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: vmov s0, r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: add lr, sp, #256
+; BE-I32-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I32-NEON-NEXT: ldrh r0, [lr, #2]
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: vmov s0, r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: add lr, sp, #256
+; BE-I32-NEON-NEXT: vmov.32 d11[0], r0
+; BE-I32-NEON-NEXT: ldrh r0, [lr, #10]
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: vmov s0, r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: add lr, sp, #256
+; BE-I32-NEON-NEXT: vmov.32 d12[0], r0
+; BE-I32-NEON-NEXT: ldrh r0, [lr, #18]
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: vmov s0, r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: add lr, sp, #256
+; BE-I32-NEON-NEXT: vmov.32 d13[0], r0
+; BE-I32-NEON-NEXT: ldrh r0, [lr, #22]
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: vmov s0, r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: add lr, sp, #256
+; BE-I32-NEON-NEXT: vmov.32 d13[1], r0
+; BE-I32-NEON-NEXT: ldrh r0, [lr, #14]
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: vmov s0, r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.32 d12[1], r0
+; BE-I32-NEON-NEXT: add lr, sp, #64
+; BE-I32-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I32-NEON-NEXT: add lr, sp, #256
+; BE-I32-NEON-NEXT: ldrh r0, [lr, #6]
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: vmov s0, r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.32 d11[1], r0
+; BE-I32-NEON-NEXT: ldrh r0, [sp, #254]
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: vmov s0, r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.32 d10[1], r0
+; BE-I32-NEON-NEXT: ldrh r0, [sp, #246]
+; BE-I32-NEON-NEXT: add lr, sp, #40
+; BE-I32-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: vmov s0, r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.32 d9[1], r0
+; BE-I32-NEON-NEXT: ldrh r0, [sp, #238]
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: vmov s0, r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov.32 d8[1], r0
+; BE-I32-NEON-NEXT: add lr, sp, #24
+; BE-I32-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; BE-I32-NEON-NEXT: add lr, sp, #256
+; BE-I32-NEON-NEXT: ldrh r0, [lr, #38]
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: vmov s0, r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: add lr, sp, #256
+; BE-I32-NEON-NEXT: vmov.32 d15[1], r0
+; BE-I32-NEON-NEXT: ldrh r0, [lr, #30]
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: vmov s0, r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vldr s0, [sp, #60] @ 4-byte Reload
+; BE-I32-NEON-NEXT: add lr, sp, #8
+; BE-I32-NEON-NEXT: vmov.32 d14[1], r0
+; BE-I32-NEON-NEXT: vmov r0, s0
+; BE-I32-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: vmov s0, r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vldr s0, [sp, #80] @ 4-byte Reload
+; BE-I32-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I32-NEON-NEXT: vmov r0, s0
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: vmov s0, r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vldr s0, [sp, #84] @ 4-byte Reload
+; BE-I32-NEON-NEXT: vmov.32 d12[0], r0
+; BE-I32-NEON-NEXT: vmov r0, s0
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: vmov s0, r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vldr s0, [sp, #88] @ 4-byte Reload
+; BE-I32-NEON-NEXT: vmov.32 d13[0], r0
+; BE-I32-NEON-NEXT: vmov r0, s0
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: vmov s0, r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vldr s0, [sp, #92] @ 4-byte Reload
+; BE-I32-NEON-NEXT: vmov.32 d11[0], r0
+; BE-I32-NEON-NEXT: vmov r0, s0
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: vmov s0, r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vldr s0, [sp, #96] @ 4-byte Reload
+; BE-I32-NEON-NEXT: vmov.32 d14[0], r0
+; BE-I32-NEON-NEXT: vmov r0, s0
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: vmov s0, r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vldr s0, [sp, #100] @ 4-byte Reload
+; BE-I32-NEON-NEXT: vmov.32 d15[0], r0
+; BE-I32-NEON-NEXT: vmov r0, s0
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: vmov s0, r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vldr s0, [sp, #104] @ 4-byte Reload
+; BE-I32-NEON-NEXT: vmov.32 d9[0], r0
+; BE-I32-NEON-NEXT: vmov r0, s0
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: add lr, sp, #8
+; BE-I32-NEON-NEXT: vmov s0, r0
+; BE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEON-NEXT: add lr, sp, #104
+; BE-I32-NEON-NEXT: vrev64.32 q8, q8
+; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov s0, r5
+; BE-I32-NEON-NEXT: vmov.32 d8[0], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov s0, r7
+; BE-I32-NEON-NEXT: vmov.32 d9[1], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov s0, r6
+; BE-I32-NEON-NEXT: vmov.32 d8[1], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov s0, r11
+; BE-I32-NEON-NEXT: vmov.32 d15[1], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov s0, r10
+; BE-I32-NEON-NEXT: vmov.32 d14[1], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vmov s0, r9
+; BE-I32-NEON-NEXT: vmov.32 d11[1], r0
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: vldr s0, [sp, #124] @ 4-byte Reload
+; BE-I32-NEON-NEXT: vmov.32 d13[1], r0
+; BE-I32-NEON-NEXT: vmov r0, s0
+; BE-I32-NEON-NEXT: bl __aeabi_h2f
+; BE-I32-NEON-NEXT: vmov s0, r0
+; BE-I32-NEON-NEXT: vmov.32 d12[1], r8
+; BE-I32-NEON-NEXT: bl lrintf
+; BE-I32-NEON-NEXT: add lr, sp, #24
+; BE-I32-NEON-NEXT: vmov.32 d10[1], r0
+; BE-I32-NEON-NEXT: add r0, r4, #64
+; BE-I32-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I32-NEON-NEXT: add lr, sp, #40
+; BE-I32-NEON-NEXT: vrev64.32 q8, q6
+; BE-I32-NEON-NEXT: vst1.32 {d18, d19}, [r0:128]!
+; BE-I32-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I32-NEON-NEXT: add lr, sp, #64
+; BE-I32-NEON-NEXT: vst1.32 {d18, d19}, [r0:128]!
+; BE-I32-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I32-NEON-NEXT: add lr, sp, #104
+; BE-I32-NEON-NEXT: vst1.32 {d18, d19}, [r0:128]!
+; BE-I32-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I32-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]
+; BE-I32-NEON-NEXT: vst1.32 {d10, d11}, [r4:128]!
+; BE-I32-NEON-NEXT: vst1.32 {d14, d15}, [r4:128]!
+; BE-I32-NEON-NEXT: vst1.32 {d8, d9}, [r4:128]!
+; BE-I32-NEON-NEXT: vst1.64 {d16, d17}, [r4:128]
+; BE-I32-NEON-NEXT: add sp, sp, #128
+; BE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT: add sp, sp, #4
+; BE-I32-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I64-NEON-LABEL: lrint_v32f16:
+; BE-I64-NEON: @ %bb.0:
+; BE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEON-NEXT: .pad #4
+; BE-I64-NEON-NEXT: sub sp, sp, #4
+; BE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT: .pad #176
+; BE-I64-NEON-NEXT: sub sp, sp, #176
+; BE-I64-NEON-NEXT: add lr, sp, #256
+; BE-I64-NEON-NEXT: mov r10, r0
+; BE-I64-NEON-NEXT: vstr s15, [sp, #112] @ 4-byte Spill
+; BE-I64-NEON-NEXT: ldrh r0, [lr, #74]
+; BE-I64-NEON-NEXT: vstr s14, [sp, #80] @ 4-byte Spill
+; BE-I64-NEON-NEXT: vstr s13, [sp, #48] @ 4-byte Spill
+; BE-I64-NEON-NEXT: vstr s12, [sp, #148] @ 4-byte Spill
+; BE-I64-NEON-NEXT: vstr s11, [sp, #76] @ 4-byte Spill
+; BE-I64-NEON-NEXT: vstr s10, [sp, #152] @ 4-byte Spill
+; BE-I64-NEON-NEXT: vstr s9, [sp, #156] @ 4-byte Spill
+; BE-I64-NEON-NEXT: vstr s8, [sp, #120] @ 4-byte Spill
+; BE-I64-NEON-NEXT: vstr s7, [sp, #136] @ 4-byte Spill
+; BE-I64-NEON-NEXT: vstr s6, [sp, #132] @ 4-byte Spill
+; BE-I64-NEON-NEXT: vstr s5, [sp, #144] @ 4-byte Spill
+; BE-I64-NEON-NEXT: vstr s4, [sp, #64] @ 4-byte Spill
+; BE-I64-NEON-NEXT: vstr s3, [sp, #104] @ 4-byte Spill
+; BE-I64-NEON-NEXT: vstr s2, [sp, #88] @ 4-byte Spill
+; BE-I64-NEON-NEXT: vstr s1, [sp, #56] @ 4-byte Spill
+; BE-I64-NEON-NEXT: vstr s0, [sp, #96] @ 4-byte Spill
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: vmov s0, r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: add lr, sp, #256
+; BE-I64-NEON-NEXT: mov r9, r0
+; BE-I64-NEON-NEXT: mov r8, r1
+; BE-I64-NEON-NEXT: ldrh r0, [lr, #62]
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: add lr, sp, #256
+; BE-I64-NEON-NEXT: mov r6, r0
+; BE-I64-NEON-NEXT: ldrh r0, [lr, #58]
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: add lr, sp, #256
+; BE-I64-NEON-NEXT: mov r7, r0
+; BE-I64-NEON-NEXT: ldrh r0, [lr, #66]
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: add lr, sp, #256
+; BE-I64-NEON-NEXT: mov r4, r0
+; BE-I64-NEON-NEXT: ldrh r0, [lr, #54]
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: add lr, sp, #256
+; BE-I64-NEON-NEXT: mov r5, r0
+; BE-I64-NEON-NEXT: ldrh r0, [lr, #50]
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: vmov s0, r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEON-NEXT: vmov s0, r5
+; BE-I64-NEON-NEXT: str r1, [sp, #44] @ 4-byte Spill
+; BE-I64-NEON-NEXT: vstr d16, [sp, #168] @ 8-byte Spill
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEON-NEXT: vmov s0, r4
+; BE-I64-NEON-NEXT: str r1, [sp, #40] @ 4-byte Spill
+; BE-I64-NEON-NEXT: vstr d16, [sp, #160] @ 8-byte Spill
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEON-NEXT: vmov s0, r7
+; BE-I64-NEON-NEXT: mov r4, r1
+; BE-I64-NEON-NEXT: vstr d16, [sp, #32] @ 8-byte Spill
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEON-NEXT: vmov s0, r6
+; BE-I64-NEON-NEXT: mov r11, r1
+; BE-I64-NEON-NEXT: vstr d16, [sp, #24] @ 8-byte Spill
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: add lr, sp, #256
+; BE-I64-NEON-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEON-NEXT: mov r6, r1
+; BE-I64-NEON-NEXT: ldrh r0, [lr, #34]
+; BE-I64-NEON-NEXT: vstr d16, [sp, #16] @ 8-byte Spill
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: vmov s0, r0
+; BE-I64-NEON-NEXT: vmov.32 d8[0], r9
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: add lr, sp, #256
+; BE-I64-NEON-NEXT: mov r7, r1
+; BE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEON-NEXT: ldrh r1, [lr, #38]
+; BE-I64-NEON-NEXT: mov r0, r1
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: vmov.32 d8[1], r8
+; BE-I64-NEON-NEXT: vmov s0, r0
+; BE-I64-NEON-NEXT: vstr d8, [sp, #8] @ 8-byte Spill
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: add lr, sp, #256
+; BE-I64-NEON-NEXT: mov r5, r1
+; BE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEON-NEXT: ldrh r1, [lr, #26]
+; BE-I64-NEON-NEXT: mov r0, r1
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: vmov s0, r0
+; BE-I64-NEON-NEXT: vmov.32 d12[1], r7
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: add lr, sp, #256
+; BE-I64-NEON-NEXT: mov r7, r1
+; BE-I64-NEON-NEXT: vmov.32 d9[0], r0
+; BE-I64-NEON-NEXT: ldrh r1, [lr, #30]
+; BE-I64-NEON-NEXT: mov r0, r1
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: vmov s0, r0
+; BE-I64-NEON-NEXT: vmov.32 d13[1], r5
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: add lr, sp, #256
+; BE-I64-NEON-NEXT: mov r5, r1
+; BE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT: ldrh r1, [lr, #78]
+; BE-I64-NEON-NEXT: mov r0, r1
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: vmov s0, r0
+; BE-I64-NEON-NEXT: vmov.32 d9[1], r7
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: add lr, sp, #256
+; BE-I64-NEON-NEXT: mov r7, r1
+; BE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT: ldrh r1, [lr, #82]
+; BE-I64-NEON-NEXT: mov r0, r1
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: vmov s0, r0
+; BE-I64-NEON-NEXT: vmov.32 d15[1], r5
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: add lr, sp, #256
+; BE-I64-NEON-NEXT: mov r5, r1
+; BE-I64-NEON-NEXT: vmov.32 d8[0], r0
+; BE-I64-NEON-NEXT: ldrh r1, [lr, #86]
+; BE-I64-NEON-NEXT: mov r0, r1
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: vmov s0, r0
+; BE-I64-NEON-NEXT: vmov.32 d14[1], r7
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: add lr, sp, #256
+; BE-I64-NEON-NEXT: mov r7, r1
+; BE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT: ldrh r1, [lr, #70]
+; BE-I64-NEON-NEXT: mov r0, r1
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: vmov s0, r0
+; BE-I64-NEON-NEXT: vmov.32 d8[1], r5
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: add lr, sp, #256
+; BE-I64-NEON-NEXT: mov r5, r1
+; BE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT: ldrh r1, [lr, #46]
+; BE-I64-NEON-NEXT: mov r0, r1
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: vmov s0, r0
+; BE-I64-NEON-NEXT: vmov.32 d10[1], r7
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.32 d25[0], r0
+; BE-I64-NEON-NEXT: ldr r0, [sp, #40] @ 4-byte Reload
+; BE-I64-NEON-NEXT: ldr r2, [sp, #44] @ 4-byte Reload
+; BE-I64-NEON-NEXT: vldr d24, [sp, #160] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vldr s0, [sp, #48] @ 4-byte Reload
+; BE-I64-NEON-NEXT: vmov.32 d24[1], r0
+; BE-I64-NEON-NEXT: vmov r0, s0
+; BE-I64-NEON-NEXT: vldr d26, [sp, #16] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vstr d24, [sp, #160] @ 8-byte Spill
+; BE-I64-NEON-NEXT: vldr d24, [sp, #8] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vrev64.32 d23, d14
+; BE-I64-NEON-NEXT: vldr d29, [sp, #24] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vrev64.32 d22, d24
+; BE-I64-NEON-NEXT: vldr d24, [sp, #168] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vmov.32 d26[1], r6
+; BE-I64-NEON-NEXT: vldr d28, [sp, #32] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vmov.32 d25[1], r1
+; BE-I64-NEON-NEXT: add r1, r10, #192
+; BE-I64-NEON-NEXT: vmov.32 d29[1], r11
+; BE-I64-NEON-NEXT: add r11, r10, #128
+; BE-I64-NEON-NEXT: vmov.32 d24[1], r2
+; BE-I64-NEON-NEXT: vmov.32 d11[1], r5
+; BE-I64-NEON-NEXT: vmov.32 d28[1], r4
+; BE-I64-NEON-NEXT: vrev64.32 d27, d26
+; BE-I64-NEON-NEXT: vstr d24, [sp, #168] @ 8-byte Spill
+; BE-I64-NEON-NEXT: vstr d25, [sp, #48] @ 8-byte Spill
+; BE-I64-NEON-NEXT: vrev64.32 d25, d11
+; BE-I64-NEON-NEXT: vrev64.32 d26, d29
+; BE-I64-NEON-NEXT: vrev64.32 d24, d28
+; BE-I64-NEON-NEXT: vst1.64 {d26, d27}, [r1:128]!
+; BE-I64-NEON-NEXT: vst1.64 {d24, d25}, [r1:128]!
+; BE-I64-NEON-NEXT: vrev64.32 d21, d10
+; BE-I64-NEON-NEXT: vrev64.32 d19, d15
+; BE-I64-NEON-NEXT: vrev64.32 d17, d13
+; BE-I64-NEON-NEXT: vrev64.32 d20, d8
+; BE-I64-NEON-NEXT: vst1.64 {d22, d23}, [r1:128]!
+; BE-I64-NEON-NEXT: vrev64.32 d18, d9
+; BE-I64-NEON-NEXT: vrev64.32 d16, d12
+; BE-I64-NEON-NEXT: vst1.64 {d20, d21}, [r1:128]
+; BE-I64-NEON-NEXT: vst1.64 {d18, d19}, [r11:128]!
+; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r11:128]!
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: vmov s0, r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: add lr, sp, #256
+; BE-I64-NEON-NEXT: mov r7, r0
+; BE-I64-NEON-NEXT: mov r8, r1
+; BE-I64-NEON-NEXT: ldrh r0, [lr, #42]
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: vldr s0, [sp, #56] @ 4-byte Reload
+; BE-I64-NEON-NEXT: mov r4, r0
+; BE-I64-NEON-NEXT: vmov r0, s0
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: vmov s0, r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov s0, r4
+; BE-I64-NEON-NEXT: str r1, [sp, #44] @ 4-byte Spill
+; BE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vldr s0, [sp, #64] @ 4-byte Reload
+; BE-I64-NEON-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEON-NEXT: vmov r2, s0
+; BE-I64-NEON-NEXT: vldr s0, [sp, #80] @ 4-byte Reload
+; BE-I64-NEON-NEXT: vmov.32 d16[1], r1
+; BE-I64-NEON-NEXT: vmov r4, s0
+; BE-I64-NEON-NEXT: vldr s0, [sp, #76] @ 4-byte Reload
+; BE-I64-NEON-NEXT: vstr d16, [sp, #80] @ 8-byte Spill
+; BE-I64-NEON-NEXT: vmov r5, s0
+; BE-I64-NEON-NEXT: mov r0, r2
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: vmov s0, r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT: mov r0, r4
+; BE-I64-NEON-NEXT: mov r9, r1
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: vmov s0, r0
+; BE-I64-NEON-NEXT: vmov.32 d8[0], r7
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEON-NEXT: mov r0, r5
+; BE-I64-NEON-NEXT: mov r6, r1
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: vldr s0, [sp, #88] @ 4-byte Reload
+; BE-I64-NEON-NEXT: mov r4, r0
+; BE-I64-NEON-NEXT: vmov.32 d8[1], r8
+; BE-I64-NEON-NEXT: vmov r7, s0
+; BE-I64-NEON-NEXT: vldr s0, [sp, #96] @ 4-byte Reload
+; BE-I64-NEON-NEXT: vstr d8, [sp, #88] @ 8-byte Spill
+; BE-I64-NEON-NEXT: vmov r0, s0
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: vldr s0, [sp, #104] @ 4-byte Reload
+; BE-I64-NEON-NEXT: vmov s19, r0
+; BE-I64-NEON-NEXT: vmov.32 d12[1], r6
+; BE-I64-NEON-NEXT: vmov r5, s0
+; BE-I64-NEON-NEXT: vldr s0, [sp, #112] @ 4-byte Reload
+; BE-I64-NEON-NEXT: vstr d12, [sp, #104] @ 8-byte Spill
+; BE-I64-NEON-NEXT: vmov r0, s0
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: vmov.f32 s0, s19
+; BE-I64-NEON-NEXT: vmov s30, r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.f32 s0, s30
+; BE-I64-NEON-NEXT: mov r6, r1
+; BE-I64-NEON-NEXT: vmov.32 d9[0], r0
+; BE-I64-NEON-NEXT: vmov s17, r4
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEON-NEXT: mov r0, r5
+; BE-I64-NEON-NEXT: mov r4, r1
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: vmov.f32 s0, s17
+; BE-I64-NEON-NEXT: vmov s30, r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.f32 s0, s30
+; BE-I64-NEON-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEON-NEXT: str r1, [sp, #76] @ 4-byte Spill
+; BE-I64-NEON-NEXT: vmov.32 d12[1], r4
+; BE-I64-NEON-NEXT: vstr d16, [sp, #64] @ 8-byte Spill
+; BE-I64-NEON-NEXT: vstr d12, [sp, #112] @ 8-byte Spill
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEON-NEXT: mov r0, r7
+; BE-I64-NEON-NEXT: mov r8, r1
+; BE-I64-NEON-NEXT: vmov.32 d9[1], r6
+; BE-I64-NEON-NEXT: vstr d16, [sp, #56] @ 8-byte Spill
+; BE-I64-NEON-NEXT: vstr d9, [sp, #96] @ 8-byte Spill
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: vldr s0, [sp, #120] @ 4-byte Reload
+; BE-I64-NEON-NEXT: mov r5, r0
+; BE-I64-NEON-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
+; BE-I64-NEON-NEXT: vmov r7, s0
+; BE-I64-NEON-NEXT: vldr s0, [sp, #132] @ 4-byte Reload
+; BE-I64-NEON-NEXT: vmov.32 d10[1], r0
+; BE-I64-NEON-NEXT: vmov r0, s0
+; BE-I64-NEON-NEXT: vstr d10, [sp, #120] @ 8-byte Spill
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: vldr s0, [sp, #136] @ 4-byte Reload
+; BE-I64-NEON-NEXT: vmov s26, r0
+; BE-I64-NEON-NEXT: vmov.32 d11[1], r9
+; BE-I64-NEON-NEXT: vmov r4, s0
+; BE-I64-NEON-NEXT: vldr s0, [sp, #144] @ 4-byte Reload
+; BE-I64-NEON-NEXT: vstr d11, [sp, #136] @ 8-byte Spill
+; BE-I64-NEON-NEXT: vmov r0, s0
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: vmov.f32 s0, s26
+; BE-I64-NEON-NEXT: vmov s22, r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.f32 s0, s22
+; BE-I64-NEON-NEXT: mov r6, r1
+; BE-I64-NEON-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEON-NEXT: vmov s24, r5
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT: mov r0, r4
+; BE-I64-NEON-NEXT: mov r5, r1
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: vmov.f32 s0, s24
+; BE-I64-NEON-NEXT: vmov s22, r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.f32 s0, s22
+; BE-I64-NEON-NEXT: mov r9, r1
+; BE-I64-NEON-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEON-NEXT: vmov.32 d14[1], r5
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT: mov r0, r7
+; BE-I64-NEON-NEXT: mov r5, r1
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: vldr s0, [sp, #148] @ 4-byte Reload
+; BE-I64-NEON-NEXT: mov r7, r0
+; BE-I64-NEON-NEXT: vmov.32 d13[1], r6
+; BE-I64-NEON-NEXT: vmov r0, s0
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: vldr s0, [sp, #152] @ 4-byte Reload
+; BE-I64-NEON-NEXT: vmov s20, r0
+; BE-I64-NEON-NEXT: vmov.32 d11[1], r5
+; BE-I64-NEON-NEXT: vmov r4, s0
+; BE-I64-NEON-NEXT: vldr s0, [sp, #156] @ 4-byte Reload
+; BE-I64-NEON-NEXT: vmov r0, s0
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: vmov.f32 s0, s20
+; BE-I64-NEON-NEXT: vmov s16, r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.f32 s0, s16
+; BE-I64-NEON-NEXT: mov r6, r1
+; BE-I64-NEON-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT: vmov s18, r7
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT: mov r0, r4
+; BE-I64-NEON-NEXT: mov r5, r1
+; BE-I64-NEON-NEXT: bl __aeabi_h2f
+; BE-I64-NEON-NEXT: vmov.f32 s0, s18
+; BE-I64-NEON-NEXT: vmov s16, r0
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vmov.f32 s0, s16
+; BE-I64-NEON-NEXT: mov r4, r1
+; BE-I64-NEON-NEXT: vmov.32 d9[0], r0
+; BE-I64-NEON-NEXT: vmov.32 d15[1], r5
+; BE-I64-NEON-NEXT: bl lrintf
+; BE-I64-NEON-NEXT: vldr d16, [sp, #160] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vldr d20, [sp, #136] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vrev64.32 d19, d14
+; BE-I64-NEON-NEXT: vrev64.32 d31, d16
+; BE-I64-NEON-NEXT: vldr d16, [sp, #168] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vrev64.32 d18, d20
+; BE-I64-NEON-NEXT: vldr d20, [sp, #120] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vldr d22, [sp, #96] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vmov.32 d28[0], r0
+; BE-I64-NEON-NEXT: ldr r0, [sp, #76] @ 4-byte Reload
+; BE-I64-NEON-NEXT: vrev64.32 d21, d20
+; BE-I64-NEON-NEXT: vrev64.32 d30, d16
+; BE-I64-NEON-NEXT: vldr d16, [sp, #48] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vldr d23, [sp, #64] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vrev64.32 d20, d22
+; BE-I64-NEON-NEXT: vldr d22, [sp, #112] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vrev64.32 d1, d16
+; BE-I64-NEON-NEXT: vldr d16, [sp, #80] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vmov.32 d23[1], r0
+; BE-I64-NEON-NEXT: add r0, r10, #64
+; BE-I64-NEON-NEXT: vrev64.32 d25, d22
+; BE-I64-NEON-NEXT: vldr d22, [sp, #104] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vmov.32 d9[1], r4
+; BE-I64-NEON-NEXT: vrev64.32 d0, d16
+; BE-I64-NEON-NEXT: vmov.32 d28[1], r1
+; BE-I64-NEON-NEXT: vldr d29, [sp, #56] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vrev64.32 d3, d15
+; BE-I64-NEON-NEXT: vrev64.32 d24, d22
+; BE-I64-NEON-NEXT: vldr d22, [sp, #88] @ 8-byte Reload
+; BE-I64-NEON-NEXT: vmov.32 d10[1], r6
+; BE-I64-NEON-NEXT: vrev64.32 d5, d23
+; BE-I64-NEON-NEXT: vst1.64 {d0, d1}, [r11:128]!
+; BE-I64-NEON-NEXT: vrev64.32 d2, d9
+; BE-I64-NEON-NEXT: vrev64.32 d27, d22
+; BE-I64-NEON-NEXT: vmov.32 d29[1], r8
+; BE-I64-NEON-NEXT: vrev64.32 d4, d28
+; BE-I64-NEON-NEXT: vst1.64 {d30, d31}, [r11:128]
+; BE-I64-NEON-NEXT: vst1.64 {d2, d3}, [r0:128]!
+; BE-I64-NEON-NEXT: vmov.32 d12[1], r9
+; BE-I64-NEON-NEXT: vrev64.32 d26, d10
+; BE-I64-NEON-NEXT: vst1.64 {d4, d5}, [r0:128]!
+; BE-I64-NEON-NEXT: vrev64.32 d23, d29
+; BE-I64-NEON-NEXT: vst1.64 {d26, d27}, [r0:128]!
+; BE-I64-NEON-NEXT: vrev64.32 d22, d12
+; BE-I64-NEON-NEXT: vst1.64 {d24, d25}, [r0:128]
+; BE-I64-NEON-NEXT: vst1.64 {d20, d21}, [r10:128]!
+; BE-I64-NEON-NEXT: vst1.64 {d22, d23}, [r10:128]!
+; BE-I64-NEON-NEXT: vrev64.32 d17, d11
+; BE-I64-NEON-NEXT: vrev64.32 d16, d13
+; BE-I64-NEON-NEXT: vst1.64 {d18, d19}, [r10:128]!
+; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r10:128]
+; BE-I64-NEON-NEXT: add sp, sp, #176
+; BE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT: add sp, sp, #4
+; BE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+ %a = call <32 x iXLen> @llvm.lrint.v32iXLen.v32f16(<32 x half> %x)
+ ret <32 x iXLen> %a
+}
+declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f16(<32 x half>)
define <1 x iXLen> @lrint_v1f32(<1 x float> %x) {
; LE-I32-LABEL: lrint_v1f32:
diff --git a/llvm/test/CodeGen/LoongArch/lrint-conv.ll b/llvm/test/CodeGen/LoongArch/lrint-conv.ll
index 85de820025614..262d1c16a6486 100644
--- a/llvm/test/CodeGen/LoongArch/lrint-conv.ll
+++ b/llvm/test/CodeGen/LoongArch/lrint-conv.ll
@@ -5,16 +5,31 @@
; RUN: sed 's/ITy/i32/g' %s | llc -mtriple=loongarch64 | FileCheck %s --check-prefixes=LA64-I32
; RUN: sed 's/ITy/i64/g' %s | llc -mtriple=loongarch64 | FileCheck %s --check-prefixes=LA64-I64
-; FIXME: crash
-; define ITy @test_lrint_ixx_f16(half %x) nounwind {
-; %res = tail call ITy @llvm.lrint.ITy.f16(half %x)
-; ret ITy %res
-; }
+define ITy @test_lrint_ixx_f16(half %x) nounwind {
+; LA32-LABEL: test_lrint_ixx_f16:
+; LA32: bl lrintf
+;
+; LA64-I32-LABEL: test_lrint_ixx_f16:
+; LA64-I32: pcaddu18i $ra, %call36(lrintf)
+;
+; LA64-I64-LABEL: test_lrint_ixx_f16:
+; LA64-I64: pcaddu18i $t8, %call36(lrintf)
+ %res = tail call ITy @llvm.lrint.ITy.f16(half %x)
+ ret ITy %res
+}
-; define ITy @test_llrint_ixx_f16(half %x) nounwind {
-; %res = tail call ITy @llvm.llrint.ITy.f16(half %x)
-; ret ITy %res
-; }
+define ITy @test_llrint_ixx_f16(half %x) nounwind {
+; LA32-LABEL: test_llrint_ixx_f16:
+; LA32: bl llrintf
+;
+; LA64-I32-LABEL: test_llrint_ixx_f16:
+; LA64-I32: pcaddu18i $ra, %call36(llrintf)
+;
+; LA64-I64-LABEL: test_llrint_ixx_f16:
+; LA64-I64: pcaddu18i $t8, %call36(llrintf)
+ %res = tail call ITy @llvm.llrint.ITy.f16(half %x)
+ ret ITy %res
+}
define ITy @test_lrint_ixx_f32(float %x) nounwind {
; LA32-LABEL: test_lrint_ixx_f32:
diff --git a/llvm/test/CodeGen/Mips/llrint-conv.ll b/llvm/test/CodeGen/Mips/llrint-conv.ll
index ee3c0d99253a6..cba7aba21d9c2 100644
--- a/llvm/test/CodeGen/Mips/llrint-conv.ll
+++ b/llvm/test/CodeGen/Mips/llrint-conv.ll
@@ -1,19 +1,22 @@
; RUN: llc < %s -mtriple=mips64el -mattr=+soft-float | FileCheck %s
; RUN: llc < %s -mtriple=mips -mattr=+soft-float | FileCheck %s
-; FIXME: crash
-; define signext i32 @testmswh(half %x) {
-; entry:
-; %0 = tail call i64 @llvm.llrint.f16(half %x)
-; %conv = trunc i64 %0 to i32
-; ret i32 %conv
-; }
+define signext i32 @testmswh(half %x) {
+; CHECK-LABEL: testmswh:
+; CHECK: jal llrintf
+entry:
+ %0 = tail call i64 @llvm.llrint.f16(half %x)
+ %conv = trunc i64 %0 to i32
+ ret i32 %conv
+}
-; define i64 @testmsxh(half %x) {
-; entry:
-; %0 = tail call i64 @llvm.llrint.f16(half %x)
-; ret i64 %0
-; }
+define i64 @testmsxh(half %x) {
+; CHECK-LABEL: testmsxh:
+; CHECK: jal llrintf
+entry:
+ %0 = tail call i64 @llvm.llrint.f16(half %x)
+ ret i64 %0
+}
define signext i32 @testmsws(float %x) {
; CHECK-LABEL: testmsws:
diff --git a/llvm/test/CodeGen/Mips/lrint-conv.ll b/llvm/test/CodeGen/Mips/lrint-conv.ll
index 6d2e392675f1c..64c5cb9ac5b07 100644
--- a/llvm/test/CodeGen/Mips/lrint-conv.ll
+++ b/llvm/test/CodeGen/Mips/lrint-conv.ll
@@ -1,19 +1,22 @@
; RUN: llc < %s -mtriple=mips64el -mattr=+soft-float | FileCheck %s
; RUN: llc < %s -mtriple=mips -mattr=+soft-float | FileCheck %s
-; FIXME: crash
-; define signext i32 @testmswh(half %x) {
-; entry:
-; %0 = tail call i64 @llvm.lrint.i64.f16(half %x)
-; %conv = trunc i64 %0 to i32
-; ret i32 %conv
-; }
+define signext i32 @testmswh(half %x) {
+; CHECK-LABEL: testmswh:
+; CHECK: jal lrintf
+entry:
+ %0 = tail call i64 @llvm.lrint.i64.f16(half %x)
+ %conv = trunc i64 %0 to i32
+ ret i32 %conv
+}
-; define i64 @testmsxh(half %x) {
-; entry:
-; %0 = tail call i64 @llvm.lrint.i64.f16(half %x)
-; ret i64 %0
-; }
+define i64 @testmsxh(half %x) {
+; CHECK-LABEL: testmsxh:
+; CHECK: jal lrintf
+entry:
+ %0 = tail call i64 @llvm.lrint.i64.f16(half %x)
+ ret i64 %0
+}
define signext i32 @testmsws(float %x) {
; CHECK-LABEL: testmsws:
diff --git a/llvm/test/CodeGen/RISCV/lrint-conv.ll b/llvm/test/CodeGen/RISCV/lrint-conv.ll
index d3af2153588a1..ecb6bd0932ef3 100644
--- a/llvm/test/CodeGen/RISCV/lrint-conv.ll
+++ b/llvm/test/CodeGen/RISCV/lrint-conv.ll
@@ -5,14 +5,25 @@
; RUN: sed 's/ITy/i32/g' %s | llc -mtriple=riscv64 | FileCheck %s --check-prefixes=RV64
; RUN: sed 's/ITy/i64/g' %s | llc -mtriple=riscv64 | FileCheck %s --check-prefixes=RV64
-; FIXME: crash
-; define ITy @test_lrint_ixx_f16(half %x) nounwind {
-; %res = tail call ITy @llvm.lrint.ITy.f16(half %x)
-; }
+define ITy @test_lrint_ixx_f16(half %x) nounwind {
+; RV32-LABEL: test_lrint_ixx_f16:
+; RV32: call lrintf
+;
+; RV64-LABEL: test_lrint_ixx_f16:
+; RV64: call lrintf
+ %res = tail call ITy @llvm.lrint.ITy.f16(half %x)
+ ret ITy %res
+}
-; define ITy @test_llrint_ixx_f16(half %x) nounwind {
-; %res = tail call ITy @llvm.llrint.ITy.f16(half %x)
-; }
+define ITy @test_llrint_ixx_f16(half %x) nounwind {
+; RV32-LABEL: test_llrint_ixx_f16:
+; RV32: call llrintf
+;
+; RV64-LABEL: test_llrint_ixx_f16:
+; RV64: call llrintf
+ %res = tail call ITy @llvm.llrint.ITy.f16(half %x)
+ ret ITy %res
+}
define ITy @test_lrint_ixx_f32(float %x) nounwind {
; RV32-LABEL: test_lrint_ixx_f32:
diff --git a/llvm/test/CodeGen/X86/lrint-conv-i32.ll b/llvm/test/CodeGen/X86/lrint-conv-i32.ll
index 3c50aea1095f4..5c0a64f1477e6 100644
--- a/llvm/test/CodeGen/X86/lrint-conv-i32.ll
+++ b/llvm/test/CodeGen/X86/lrint-conv-i32.ll
@@ -7,12 +7,52 @@
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefixes=X64,X64-AVX
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X64,X64-AVX
-; FIXME: crash
-; define i32 @testmswh(half %x) nounwind {
-; entry:
-; %0 = tail call i32 @llvm.lrint.i32.f16(half %x)
-; ret i32 %0
-; }
+define i32 @testmswh(half %x) nounwind {
+; X86-NOSSE-LABEL: testmswh:
+; X86-NOSSE: # %bb.0: # %entry
+; X86-NOSSE-NEXT: pushl %eax
+; X86-NOSSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT: pushl %eax
+; X86-NOSSE-NEXT: calll __extendhfsf2
+; X86-NOSSE-NEXT: addl $4, %esp
+; X86-NOSSE-NEXT: fistpl (%esp)
+; X86-NOSSE-NEXT: movl (%esp), %eax
+; X86-NOSSE-NEXT: popl %ecx
+; X86-NOSSE-NEXT: retl
+;
+; X86-SSE2-LABEL: testmswh:
+; X86-SSE2: # %bb.0: # %entry
+; X86-SSE2-NEXT: subl $8, %esp
+; X86-SSE2-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
+; X86-SSE2-NEXT: pextrw $0, %xmm0, %eax
+; X86-SSE2-NEXT: movw %ax, (%esp)
+; X86-SSE2-NEXT: calll __extendhfsf2
+; X86-SSE2-NEXT: fstps (%esp)
+; X86-SSE2-NEXT: calll rintf
+; X86-SSE2-NEXT: fstps (%esp)
+; X86-SSE2-NEXT: calll __truncsfhf2
+; X86-SSE2-NEXT: pextrw $0, %xmm0, %eax
+; X86-SSE2-NEXT: movw %ax, (%esp)
+; X86-SSE2-NEXT: calll __extendhfsf2
+; X86-SSE2-NEXT: fstps {{[0-9]+}}(%esp)
+; X86-SSE2-NEXT: cvttss2si {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT: addl $8, %esp
+; X86-SSE2-NEXT: retl
+;
+; X64-SSE-LABEL: testmswh:
+; X64-SSE: # %bb.0: # %entry
+; X64-SSE-NEXT: pushq %rax
+; X64-SSE-NEXT: callq __extendhfsf2 at PLT
+; X64-SSE-NEXT: callq rintf at PLT
+; X64-SSE-NEXT: callq __truncsfhf2 at PLT
+; X64-SSE-NEXT: callq __extendhfsf2 at PLT
+; X64-SSE-NEXT: cvttss2si %xmm0, %eax
+; X64-SSE-NEXT: popq %rcx
+; X64-SSE-NEXT: retq
+entry:
+ %0 = tail call i32 @llvm.lrint.i32.f16(half %x)
+ ret i32 %0
+}
define i32 @testmsws(float %x) nounwind {
; X86-NOSSE-LABEL: testmsws:
>From b869a397c456b9467bb3c7d49d3b7417d16ed377 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross at umich.edu>
Date: Wed, 6 Aug 2025 07:04:23 +0000
Subject: [PATCH 3/4] [PowerPC] Extend and update the test for `half` support
(NFC)
`f16` is more functional than just a storage type on the platform,
though it does have some codegen issues [1]. To prepare for future
changes, do the following nonfunctional updates to the existing `half`
test:
* Add tests for passing and returning the type directly.
* Add tests showing bitcast behavior, which is currently incorrect but
serves as a baseline.
* Add tests for `fabs` and `copysign` (trivial operations that shouldn't
require libcalls).
* Add invocations for big-endian and for PPC32.
* Rename the test to `half.ll` to reflect its status, which also matches
other backends.
[1]: https://github.com/llvm/llvm-project/issues/97975
---
llvm/test/CodeGen/PowerPC/half.ll | 2562 +++++++++++++++++
.../PowerPC/handle-f16-storage-type.ll | 1281 ---------
2 files changed, 2562 insertions(+), 1281 deletions(-)
create mode 100644 llvm/test/CodeGen/PowerPC/half.ll
delete mode 100644 llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll
diff --git a/llvm/test/CodeGen/PowerPC/half.ll b/llvm/test/CodeGen/PowerPC/half.ll
new file mode 100644
index 0000000000000..fe0dccf63af80
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/half.ll
@@ -0,0 +1,2562 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=powerpc-unknown-unknown \
+; RUN: -verify-machineinstrs -ppc-asm-full-reg-names < %s | FileCheck %s \
+; RUN: --check-prefix=PPC32
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown \
+; RUN: -verify-machineinstrs -ppc-asm-full-reg-names < %s | FileCheck %s \
+; RUN: --check-prefix=P8
+; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \
+; RUN: -verify-machineinstrs -ppc-asm-full-reg-names < %s | FileCheck %s
+; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown -mattr=-hard-float \
+; RUN: -verify-machineinstrs -ppc-asm-full-reg-names < %s | FileCheck %s \
+; RUN: --check-prefix=SOFT
+; RUN: llc -mtriple=powerpc64-unknown-unknown \
+; RUN: -verify-machineinstrs -ppc-asm-full-reg-names < %s | FileCheck %s \
+; RUN: --check-prefix=BE
+
+; Tests for various operations on half precison float. Much of the test is
+; copied from test/CodeGen/X86/half.ll.
+
+define void @store(half %x, ptr %p) nounwind {
+; PPC32-LABEL: store:
+; PPC32: # %bb.0:
+; PPC32-NEXT: mflr r0
+; PPC32-NEXT: stwu r1, -16(r1)
+; PPC32-NEXT: stw r0, 20(r1)
+; PPC32-NEXT: stw r30, 8(r1) # 4-byte Folded Spill
+; PPC32-NEXT: mr r30, r3
+; PPC32-NEXT: bl __truncsfhf2
+; PPC32-NEXT: sth r3, 0(r30)
+; PPC32-NEXT: lwz r30, 8(r1) # 4-byte Folded Reload
+; PPC32-NEXT: lwz r0, 20(r1)
+; PPC32-NEXT: addi r1, r1, 16
+; PPC32-NEXT: mtlr r0
+; PPC32-NEXT: blr
+;
+; P8-LABEL: store:
+; P8: # %bb.0:
+; P8-NEXT: mflr r0
+; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
+; P8-NEXT: stdu r1, -48(r1)
+; P8-NEXT: std r0, 64(r1)
+; P8-NEXT: mr r30, r4
+; P8-NEXT: bl __truncsfhf2
+; P8-NEXT: nop
+; P8-NEXT: sth r3, 0(r30)
+; P8-NEXT: addi r1, r1, 48
+; P8-NEXT: ld r0, 16(r1)
+; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
+; P8-NEXT: mtlr r0
+; P8-NEXT: blr
+;
+; CHECK-LABEL: store:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xscvdphp f0, f1
+; CHECK-NEXT: stxsihx f0, 0, r4
+; CHECK-NEXT: blr
+;
+; SOFT-LABEL: store:
+; SOFT: # %bb.0:
+; SOFT-NEXT: sth r3, 0(r4)
+; SOFT-NEXT: blr
+;
+; BE-LABEL: store:
+; BE: # %bb.0:
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -128(r1)
+; BE-NEXT: std r0, 144(r1)
+; BE-NEXT: std r30, 112(r1) # 8-byte Folded Spill
+; BE-NEXT: mr r30, r4
+; BE-NEXT: bl __truncsfhf2
+; BE-NEXT: nop
+; BE-NEXT: sth r3, 0(r30)
+; BE-NEXT: ld r30, 112(r1) # 8-byte Folded Reload
+; BE-NEXT: addi r1, r1, 128
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+ store half %x, ptr %p
+ ret void
+}
+
+define half @return(ptr %p) nounwind {
+; PPC32-LABEL: return:
+; PPC32: # %bb.0:
+; PPC32-NEXT: mflr r0
+; PPC32-NEXT: stwu r1, -16(r1)
+; PPC32-NEXT: stw r0, 20(r1)
+; PPC32-NEXT: lhz r3, 0(r3)
+; PPC32-NEXT: bl __extendhfsf2
+; PPC32-NEXT: lwz r0, 20(r1)
+; PPC32-NEXT: addi r1, r1, 16
+; PPC32-NEXT: mtlr r0
+; PPC32-NEXT: blr
+;
+; P8-LABEL: return:
+; P8: # %bb.0:
+; P8-NEXT: mflr r0
+; P8-NEXT: stdu r1, -32(r1)
+; P8-NEXT: std r0, 48(r1)
+; P8-NEXT: lhz r3, 0(r3)
+; P8-NEXT: bl __extendhfsf2
+; P8-NEXT: nop
+; P8-NEXT: addi r1, r1, 32
+; P8-NEXT: ld r0, 16(r1)
+; P8-NEXT: mtlr r0
+; P8-NEXT: blr
+;
+; CHECK-LABEL: return:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lxsihzx f0, 0, r3
+; CHECK-NEXT: xscvhpdp f1, f0
+; CHECK-NEXT: blr
+;
+; SOFT-LABEL: return:
+; SOFT: # %bb.0:
+; SOFT-NEXT: lhz r3, 0(r3)
+; SOFT-NEXT: blr
+;
+; BE-LABEL: return:
+; BE: # %bb.0:
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -112(r1)
+; BE-NEXT: std r0, 128(r1)
+; BE-NEXT: lhz r3, 0(r3)
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
+; BE-NEXT: addi r1, r1, 112
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+ %r = load half, ptr %p
+ ret half %r
+}
+
+define dso_local double @loadd(ptr nocapture readonly %a) local_unnamed_addr nounwind {
+; PPC32-LABEL: loadd:
+; PPC32: # %bb.0: # %entry
+; PPC32-NEXT: mflr r0
+; PPC32-NEXT: stwu r1, -16(r1)
+; PPC32-NEXT: stw r0, 20(r1)
+; PPC32-NEXT: lhz r3, 2(r3)
+; PPC32-NEXT: bl __extendhfsf2
+; PPC32-NEXT: lwz r0, 20(r1)
+; PPC32-NEXT: addi r1, r1, 16
+; PPC32-NEXT: mtlr r0
+; PPC32-NEXT: blr
+;
+; P8-LABEL: loadd:
+; P8: # %bb.0: # %entry
+; P8-NEXT: mflr r0
+; P8-NEXT: stdu r1, -32(r1)
+; P8-NEXT: std r0, 48(r1)
+; P8-NEXT: lhz r3, 2(r3)
+; P8-NEXT: bl __extendhfsf2
+; P8-NEXT: nop
+; P8-NEXT: addi r1, r1, 32
+; P8-NEXT: ld r0, 16(r1)
+; P8-NEXT: mtlr r0
+; P8-NEXT: blr
+;
+; CHECK-LABEL: loadd:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi r3, r3, 2
+; CHECK-NEXT: lxsihzx f0, 0, r3
+; CHECK-NEXT: xscvhpdp f1, f0
+; CHECK-NEXT: blr
+;
+; SOFT-LABEL: loadd:
+; SOFT: # %bb.0: # %entry
+; SOFT-NEXT: mflr r0
+; SOFT-NEXT: stdu r1, -32(r1)
+; SOFT-NEXT: std r0, 48(r1)
+; SOFT-NEXT: lhz r3, 2(r3)
+; SOFT-NEXT: bl __extendhfsf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: bl __extendsfdf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: addi r1, r1, 32
+; SOFT-NEXT: ld r0, 16(r1)
+; SOFT-NEXT: mtlr r0
+; SOFT-NEXT: blr
+;
+; BE-LABEL: loadd:
+; BE: # %bb.0: # %entry
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -112(r1)
+; BE-NEXT: std r0, 128(r1)
+; BE-NEXT: lhz r3, 2(r3)
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
+; BE-NEXT: addi r1, r1, 112
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+entry:
+ %arrayidx = getelementptr inbounds i16, ptr %a, i64 1
+ %0 = load i16, ptr %arrayidx, align 2
+ %1 = tail call double @llvm.convert.from.fp16.f64(i16 %0)
+ ret double %1
+}
+
+declare double @llvm.convert.from.fp16.f64(i16)
+
+define dso_local float @loadf(ptr nocapture readonly %a) local_unnamed_addr nounwind {
+; PPC32-LABEL: loadf:
+; PPC32: # %bb.0: # %entry
+; PPC32-NEXT: mflr r0
+; PPC32-NEXT: stwu r1, -16(r1)
+; PPC32-NEXT: stw r0, 20(r1)
+; PPC32-NEXT: lhz r3, 2(r3)
+; PPC32-NEXT: bl __extendhfsf2
+; PPC32-NEXT: lwz r0, 20(r1)
+; PPC32-NEXT: addi r1, r1, 16
+; PPC32-NEXT: mtlr r0
+; PPC32-NEXT: blr
+;
+; P8-LABEL: loadf:
+; P8: # %bb.0: # %entry
+; P8-NEXT: mflr r0
+; P8-NEXT: stdu r1, -32(r1)
+; P8-NEXT: std r0, 48(r1)
+; P8-NEXT: lhz r3, 2(r3)
+; P8-NEXT: bl __extendhfsf2
+; P8-NEXT: nop
+; P8-NEXT: addi r1, r1, 32
+; P8-NEXT: ld r0, 16(r1)
+; P8-NEXT: mtlr r0
+; P8-NEXT: blr
+;
+; CHECK-LABEL: loadf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi r3, r3, 2
+; CHECK-NEXT: lxsihzx f0, 0, r3
+; CHECK-NEXT: xscvhpdp f1, f0
+; CHECK-NEXT: blr
+;
+; SOFT-LABEL: loadf:
+; SOFT: # %bb.0: # %entry
+; SOFT-NEXT: mflr r0
+; SOFT-NEXT: stdu r1, -32(r1)
+; SOFT-NEXT: std r0, 48(r1)
+; SOFT-NEXT: lhz r3, 2(r3)
+; SOFT-NEXT: bl __extendhfsf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: addi r1, r1, 32
+; SOFT-NEXT: ld r0, 16(r1)
+; SOFT-NEXT: mtlr r0
+; SOFT-NEXT: blr
+;
+; BE-LABEL: loadf:
+; BE: # %bb.0: # %entry
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -112(r1)
+; BE-NEXT: std r0, 128(r1)
+; BE-NEXT: lhz r3, 2(r3)
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
+; BE-NEXT: addi r1, r1, 112
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+entry:
+ %arrayidx = getelementptr inbounds i16, ptr %a, i64 1
+ %0 = load i16, ptr %arrayidx, align 2
+ %1 = tail call float @llvm.convert.from.fp16.f32(i16 %0)
+ ret float %1
+}
+
+declare float @llvm.convert.from.fp16.f32(i16)
+
+define dso_local void @stored(ptr nocapture %a, double %b) local_unnamed_addr nounwind {
+; PPC32-LABEL: stored:
+; PPC32: # %bb.0: # %entry
+; PPC32-NEXT: mflr r0
+; PPC32-NEXT: stwu r1, -16(r1)
+; PPC32-NEXT: stw r0, 20(r1)
+; PPC32-NEXT: stw r30, 8(r1) # 4-byte Folded Spill
+; PPC32-NEXT: mr r30, r3
+; PPC32-NEXT: bl __truncdfhf2
+; PPC32-NEXT: sth r3, 0(r30)
+; PPC32-NEXT: lwz r30, 8(r1) # 4-byte Folded Reload
+; PPC32-NEXT: lwz r0, 20(r1)
+; PPC32-NEXT: addi r1, r1, 16
+; PPC32-NEXT: mtlr r0
+; PPC32-NEXT: blr
+;
+; P8-LABEL: stored:
+; P8: # %bb.0: # %entry
+; P8-NEXT: mflr r0
+; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
+; P8-NEXT: stdu r1, -48(r1)
+; P8-NEXT: std r0, 64(r1)
+; P8-NEXT: mr r30, r3
+; P8-NEXT: bl __truncdfhf2
+; P8-NEXT: nop
+; P8-NEXT: sth r3, 0(r30)
+; P8-NEXT: addi r1, r1, 48
+; P8-NEXT: ld r0, 16(r1)
+; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
+; P8-NEXT: mtlr r0
+; P8-NEXT: blr
+;
+; CHECK-LABEL: stored:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xscvdphp f0, f1
+; CHECK-NEXT: stxsihx f0, 0, r3
+; CHECK-NEXT: blr
+;
+; SOFT-LABEL: stored:
+; SOFT: # %bb.0: # %entry
+; SOFT-NEXT: mflr r0
+; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill
+; SOFT-NEXT: stdu r1, -48(r1)
+; SOFT-NEXT: mr r30, r3
+; SOFT-NEXT: mr r3, r4
+; SOFT-NEXT: std r0, 64(r1)
+; SOFT-NEXT: bl __truncdfhf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: clrldi r3, r3, 48
+; SOFT-NEXT: bl __extendhfsf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: bl __truncsfhf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: sth r3, 0(r30)
+; SOFT-NEXT: addi r1, r1, 48
+; SOFT-NEXT: ld r0, 16(r1)
+; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
+; SOFT-NEXT: mtlr r0
+; SOFT-NEXT: blr
+;
+; BE-LABEL: stored:
+; BE: # %bb.0: # %entry
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -128(r1)
+; BE-NEXT: std r0, 144(r1)
+; BE-NEXT: std r30, 112(r1) # 8-byte Folded Spill
+; BE-NEXT: mr r30, r3
+; BE-NEXT: bl __truncdfhf2
+; BE-NEXT: nop
+; BE-NEXT: sth r3, 0(r30)
+; BE-NEXT: ld r30, 112(r1) # 8-byte Folded Reload
+; BE-NEXT: addi r1, r1, 128
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+entry:
+ %0 = tail call i16 @llvm.convert.to.fp16.f64(double %b)
+ store i16 %0, ptr %a, align 2
+ ret void
+}
+
+declare i16 @llvm.convert.to.fp16.f64(double)
+
+define dso_local void @storef(ptr nocapture %a, float %b) local_unnamed_addr nounwind {
+; PPC32-LABEL: storef:
+; PPC32: # %bb.0: # %entry
+; PPC32-NEXT: mflr r0
+; PPC32-NEXT: stwu r1, -16(r1)
+; PPC32-NEXT: stw r0, 20(r1)
+; PPC32-NEXT: stw r30, 8(r1) # 4-byte Folded Spill
+; PPC32-NEXT: mr r30, r3
+; PPC32-NEXT: bl __truncsfhf2
+; PPC32-NEXT: sth r3, 0(r30)
+; PPC32-NEXT: lwz r30, 8(r1) # 4-byte Folded Reload
+; PPC32-NEXT: lwz r0, 20(r1)
+; PPC32-NEXT: addi r1, r1, 16
+; PPC32-NEXT: mtlr r0
+; PPC32-NEXT: blr
+;
+; P8-LABEL: storef:
+; P8: # %bb.0: # %entry
+; P8-NEXT: mflr r0
+; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
+; P8-NEXT: stdu r1, -48(r1)
+; P8-NEXT: std r0, 64(r1)
+; P8-NEXT: mr r30, r3
+; P8-NEXT: bl __truncsfhf2
+; P8-NEXT: nop
+; P8-NEXT: sth r3, 0(r30)
+; P8-NEXT: addi r1, r1, 48
+; P8-NEXT: ld r0, 16(r1)
+; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
+; P8-NEXT: mtlr r0
+; P8-NEXT: blr
+;
+; CHECK-LABEL: storef:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xscvdphp f0, f1
+; CHECK-NEXT: stxsihx f0, 0, r3
+; CHECK-NEXT: blr
+;
+; SOFT-LABEL: storef:
+; SOFT: # %bb.0: # %entry
+; SOFT-NEXT: mflr r0
+; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill
+; SOFT-NEXT: stdu r1, -48(r1)
+; SOFT-NEXT: mr r30, r3
+; SOFT-NEXT: clrldi r3, r4, 32
+; SOFT-NEXT: std r0, 64(r1)
+; SOFT-NEXT: bl __truncsfhf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: clrldi r3, r3, 48
+; SOFT-NEXT: bl __extendhfsf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: bl __truncsfhf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: sth r3, 0(r30)
+; SOFT-NEXT: addi r1, r1, 48
+; SOFT-NEXT: ld r0, 16(r1)
+; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
+; SOFT-NEXT: mtlr r0
+; SOFT-NEXT: blr
+;
+; BE-LABEL: storef:
+; BE: # %bb.0: # %entry
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -128(r1)
+; BE-NEXT: std r0, 144(r1)
+; BE-NEXT: std r30, 112(r1) # 8-byte Folded Spill
+; BE-NEXT: mr r30, r3
+; BE-NEXT: bl __truncsfhf2
+; BE-NEXT: nop
+; BE-NEXT: sth r3, 0(r30)
+; BE-NEXT: ld r30, 112(r1) # 8-byte Folded Reload
+; BE-NEXT: addi r1, r1, 128
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+entry:
+ %0 = tail call i16 @llvm.convert.to.fp16.f32(float %b)
+ store i16 %0, ptr %a, align 2
+ ret void
+}
+
+declare i16 @llvm.convert.to.fp16.f32(float)
+define void @test_load_store(ptr %in, ptr %out) nounwind {
+; PPC32-LABEL: test_load_store:
+; PPC32: # %bb.0:
+; PPC32-NEXT: lhz r3, 0(r3)
+; PPC32-NEXT: sth r3, 0(r4)
+; PPC32-NEXT: blr
+;
+; P8-LABEL: test_load_store:
+; P8: # %bb.0:
+; P8-NEXT: lhz r3, 0(r3)
+; P8-NEXT: sth r3, 0(r4)
+; P8-NEXT: blr
+;
+; CHECK-LABEL: test_load_store:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lhz r3, 0(r3)
+; CHECK-NEXT: sth r3, 0(r4)
+; CHECK-NEXT: blr
+;
+; SOFT-LABEL: test_load_store:
+; SOFT: # %bb.0:
+; SOFT-NEXT: mflr r0
+; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill
+; SOFT-NEXT: stdu r1, -48(r1)
+; SOFT-NEXT: std r0, 64(r1)
+; SOFT-NEXT: mr r30, r4
+; SOFT-NEXT: lhz r3, 0(r3)
+; SOFT-NEXT: bl __extendhfsf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: bl __truncsfhf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: sth r3, 0(r30)
+; SOFT-NEXT: addi r1, r1, 48
+; SOFT-NEXT: ld r0, 16(r1)
+; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
+; SOFT-NEXT: mtlr r0
+; SOFT-NEXT: blr
+;
+; BE-LABEL: test_load_store:
+; BE: # %bb.0:
+; BE-NEXT: lhz r3, 0(r3)
+; BE-NEXT: sth r3, 0(r4)
+; BE-NEXT: blr
+ %val = load half, ptr %in
+ store half %val, ptr %out
+ ret void
+}
+define i16 @test_bitcast_from_half(ptr %addr) nounwind {
+; PPC32-LABEL: test_bitcast_from_half:
+; PPC32: # %bb.0:
+; PPC32-NEXT: lhz r3, 0(r3)
+; PPC32-NEXT: blr
+;
+; P8-LABEL: test_bitcast_from_half:
+; P8: # %bb.0:
+; P8-NEXT: lhz r3, 0(r3)
+; P8-NEXT: blr
+;
+; CHECK-LABEL: test_bitcast_from_half:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lhz r3, 0(r3)
+; CHECK-NEXT: blr
+;
+; SOFT-LABEL: test_bitcast_from_half:
+; SOFT: # %bb.0:
+; SOFT-NEXT: lhz r3, 0(r3)
+; SOFT-NEXT: blr
+;
+; BE-LABEL: test_bitcast_from_half:
+; BE: # %bb.0:
+; BE-NEXT: lhz r3, 0(r3)
+; BE-NEXT: blr
+ %val = load half, ptr %addr
+ %val_int = bitcast half %val to i16
+ ret i16 %val_int
+}
+define void @test_bitcast_to_half(ptr %addr, i16 %in) nounwind {
+; PPC32-LABEL: test_bitcast_to_half:
+; PPC32: # %bb.0:
+; PPC32-NEXT: sth r4, 0(r3)
+; PPC32-NEXT: blr
+;
+; P8-LABEL: test_bitcast_to_half:
+; P8: # %bb.0:
+; P8-NEXT: sth r4, 0(r3)
+; P8-NEXT: blr
+;
+; CHECK-LABEL: test_bitcast_to_half:
+; CHECK: # %bb.0:
+; CHECK-NEXT: sth r4, 0(r3)
+; CHECK-NEXT: blr
+;
+; SOFT-LABEL: test_bitcast_to_half:
+; SOFT: # %bb.0:
+; SOFT-NEXT: sth r4, 0(r3)
+; SOFT-NEXT: blr
+;
+; BE-LABEL: test_bitcast_to_half:
+; BE: # %bb.0:
+; BE-NEXT: sth r4, 0(r3)
+; BE-NEXT: blr
+ %val_fp = bitcast i16 %in to half
+ store half %val_fp, ptr %addr
+ ret void
+}
+
+
+; Checks for https://github.com/llvm/llvm-project/issues/97981
+define half @from_bits(i16 %x) nounwind {
+; PPC32-LABEL: from_bits:
+; PPC32: # %bb.0:
+; PPC32-NEXT: mflr r0
+; PPC32-NEXT: stwu r1, -16(r1)
+; PPC32-NEXT: clrlwi r3, r3, 16
+; PPC32-NEXT: stw r0, 20(r1)
+; PPC32-NEXT: bl __extendhfsf2
+; PPC32-NEXT: lwz r0, 20(r1)
+; PPC32-NEXT: addi r1, r1, 16
+; PPC32-NEXT: mtlr r0
+; PPC32-NEXT: blr
+;
+; P8-LABEL: from_bits:
+; P8: # %bb.0:
+; P8-NEXT: mflr r0
+; P8-NEXT: stdu r1, -32(r1)
+; P8-NEXT: clrldi r3, r3, 48
+; P8-NEXT: std r0, 48(r1)
+; P8-NEXT: bl __extendhfsf2
+; P8-NEXT: nop
+; P8-NEXT: addi r1, r1, 32
+; P8-NEXT: ld r0, 16(r1)
+; P8-NEXT: mtlr r0
+; P8-NEXT: blr
+;
+; CHECK-LABEL: from_bits:
+; CHECK: # %bb.0:
+; CHECK-NEXT: clrlwi r3, r3, 16
+; CHECK-NEXT: mtfprwz f0, r3
+; CHECK-NEXT: xscvhpdp f1, f0
+; CHECK-NEXT: blr
+;
+; SOFT-LABEL: from_bits:
+; SOFT: # %bb.0:
+; SOFT-NEXT: blr
+;
+; BE-LABEL: from_bits:
+; BE: # %bb.0:
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -112(r1)
+; BE-NEXT: clrldi r3, r3, 48
+; BE-NEXT: std r0, 128(r1)
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
+; BE-NEXT: addi r1, r1, 112
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+ %res = bitcast i16 %x to half
+ ret half %res
+}
+
+define i16 @to_bits(half %x) nounwind {
+; PPC32-LABEL: to_bits:
+; PPC32: # %bb.0:
+; PPC32-NEXT: mflr r0
+; PPC32-NEXT: stwu r1, -16(r1)
+; PPC32-NEXT: stw r0, 20(r1)
+; PPC32-NEXT: bl __truncsfhf2
+; PPC32-NEXT: clrlwi r3, r3, 16
+; PPC32-NEXT: lwz r0, 20(r1)
+; PPC32-NEXT: addi r1, r1, 16
+; PPC32-NEXT: mtlr r0
+; PPC32-NEXT: blr
+;
+; P8-LABEL: to_bits:
+; P8: # %bb.0:
+; P8-NEXT: mflr r0
+; P8-NEXT: stdu r1, -32(r1)
+; P8-NEXT: std r0, 48(r1)
+; P8-NEXT: bl __truncsfhf2
+; P8-NEXT: nop
+; P8-NEXT: clrldi r3, r3, 48
+; P8-NEXT: addi r1, r1, 32
+; P8-NEXT: ld r0, 16(r1)
+; P8-NEXT: mtlr r0
+; P8-NEXT: blr
+;
+; CHECK-LABEL: to_bits:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xscvdphp f0, f1
+; CHECK-NEXT: mffprwz r3, f0
+; CHECK-NEXT: clrlwi r3, r3, 16
+; CHECK-NEXT: blr
+;
+; SOFT-LABEL: to_bits:
+; SOFT: # %bb.0:
+; SOFT-NEXT: blr
+;
+; BE-LABEL: to_bits:
+; BE: # %bb.0:
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -112(r1)
+; BE-NEXT: std r0, 128(r1)
+; BE-NEXT: bl __truncsfhf2
+; BE-NEXT: nop
+; BE-NEXT: clrldi r3, r3, 48
+; BE-NEXT: addi r1, r1, 112
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+ %res = bitcast half %x to i16
+ ret i16 %res
+}
+
+define float @test_extend32(ptr %addr) nounwind {
+; PPC32-LABEL: test_extend32:
+; PPC32: # %bb.0:
+; PPC32-NEXT: mflr r0
+; PPC32-NEXT: stwu r1, -16(r1)
+; PPC32-NEXT: stw r0, 20(r1)
+; PPC32-NEXT: lhz r3, 0(r3)
+; PPC32-NEXT: bl __extendhfsf2
+; PPC32-NEXT: lwz r0, 20(r1)
+; PPC32-NEXT: addi r1, r1, 16
+; PPC32-NEXT: mtlr r0
+; PPC32-NEXT: blr
+;
+; P8-LABEL: test_extend32:
+; P8: # %bb.0:
+; P8-NEXT: mflr r0
+; P8-NEXT: stdu r1, -32(r1)
+; P8-NEXT: std r0, 48(r1)
+; P8-NEXT: lhz r3, 0(r3)
+; P8-NEXT: bl __extendhfsf2
+; P8-NEXT: nop
+; P8-NEXT: addi r1, r1, 32
+; P8-NEXT: ld r0, 16(r1)
+; P8-NEXT: mtlr r0
+; P8-NEXT: blr
+;
+; CHECK-LABEL: test_extend32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lxsihzx f0, 0, r3
+; CHECK-NEXT: xscvhpdp f1, f0
+; CHECK-NEXT: blr
+;
+; SOFT-LABEL: test_extend32:
+; SOFT: # %bb.0:
+; SOFT-NEXT: mflr r0
+; SOFT-NEXT: stdu r1, -32(r1)
+; SOFT-NEXT: std r0, 48(r1)
+; SOFT-NEXT: lhz r3, 0(r3)
+; SOFT-NEXT: bl __extendhfsf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: addi r1, r1, 32
+; SOFT-NEXT: ld r0, 16(r1)
+; SOFT-NEXT: mtlr r0
+; SOFT-NEXT: blr
+;
+; BE-LABEL: test_extend32:
+; BE: # %bb.0:
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -112(r1)
+; BE-NEXT: std r0, 128(r1)
+; BE-NEXT: lhz r3, 0(r3)
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
+; BE-NEXT: addi r1, r1, 112
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+ %val16 = load half, ptr %addr
+ %val32 = fpext half %val16 to float
+ ret float %val32
+}
+define double @test_extend64(ptr %addr) nounwind {
+; PPC32-LABEL: test_extend64:
+; PPC32: # %bb.0:
+; PPC32-NEXT: mflr r0
+; PPC32-NEXT: stwu r1, -16(r1)
+; PPC32-NEXT: stw r0, 20(r1)
+; PPC32-NEXT: lhz r3, 0(r3)
+; PPC32-NEXT: bl __extendhfsf2
+; PPC32-NEXT: lwz r0, 20(r1)
+; PPC32-NEXT: addi r1, r1, 16
+; PPC32-NEXT: mtlr r0
+; PPC32-NEXT: blr
+;
+; P8-LABEL: test_extend64:
+; P8: # %bb.0:
+; P8-NEXT: mflr r0
+; P8-NEXT: stdu r1, -32(r1)
+; P8-NEXT: std r0, 48(r1)
+; P8-NEXT: lhz r3, 0(r3)
+; P8-NEXT: bl __extendhfsf2
+; P8-NEXT: nop
+; P8-NEXT: addi r1, r1, 32
+; P8-NEXT: ld r0, 16(r1)
+; P8-NEXT: mtlr r0
+; P8-NEXT: blr
+;
+; CHECK-LABEL: test_extend64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lxsihzx f0, 0, r3
+; CHECK-NEXT: xscvhpdp f1, f0
+; CHECK-NEXT: blr
+;
+; SOFT-LABEL: test_extend64:
+; SOFT: # %bb.0:
+; SOFT-NEXT: mflr r0
+; SOFT-NEXT: stdu r1, -32(r1)
+; SOFT-NEXT: std r0, 48(r1)
+; SOFT-NEXT: lhz r3, 0(r3)
+; SOFT-NEXT: bl __extendhfsf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: bl __extendsfdf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: addi r1, r1, 32
+; SOFT-NEXT: ld r0, 16(r1)
+; SOFT-NEXT: mtlr r0
+; SOFT-NEXT: blr
+;
+; BE-LABEL: test_extend64:
+; BE: # %bb.0:
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -112(r1)
+; BE-NEXT: std r0, 128(r1)
+; BE-NEXT: lhz r3, 0(r3)
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
+; BE-NEXT: addi r1, r1, 112
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+ %val16 = load half, ptr %addr
+ %val32 = fpext half %val16 to double
+ ret double %val32
+}
+define void @test_trunc32(float %in, ptr %addr) nounwind {
+; PPC32-LABEL: test_trunc32:
+; PPC32: # %bb.0:
+; PPC32-NEXT: mflr r0
+; PPC32-NEXT: stwu r1, -16(r1)
+; PPC32-NEXT: stw r0, 20(r1)
+; PPC32-NEXT: stw r30, 8(r1) # 4-byte Folded Spill
+; PPC32-NEXT: mr r30, r3
+; PPC32-NEXT: bl __truncsfhf2
+; PPC32-NEXT: sth r3, 0(r30)
+; PPC32-NEXT: lwz r30, 8(r1) # 4-byte Folded Reload
+; PPC32-NEXT: lwz r0, 20(r1)
+; PPC32-NEXT: addi r1, r1, 16
+; PPC32-NEXT: mtlr r0
+; PPC32-NEXT: blr
+;
+; P8-LABEL: test_trunc32:
+; P8: # %bb.0:
+; P8-NEXT: mflr r0
+; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
+; P8-NEXT: stdu r1, -48(r1)
+; P8-NEXT: std r0, 64(r1)
+; P8-NEXT: mr r30, r4
+; P8-NEXT: bl __truncsfhf2
+; P8-NEXT: nop
+; P8-NEXT: sth r3, 0(r30)
+; P8-NEXT: addi r1, r1, 48
+; P8-NEXT: ld r0, 16(r1)
+; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
+; P8-NEXT: mtlr r0
+; P8-NEXT: blr
+;
+; CHECK-LABEL: test_trunc32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xscvdphp f0, f1
+; CHECK-NEXT: stxsihx f0, 0, r4
+; CHECK-NEXT: blr
+;
+; SOFT-LABEL: test_trunc32:
+; SOFT: # %bb.0:
+; SOFT-NEXT: mflr r0
+; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill
+; SOFT-NEXT: stdu r1, -48(r1)
+; SOFT-NEXT: clrldi r3, r3, 32
+; SOFT-NEXT: std r0, 64(r1)
+; SOFT-NEXT: mr r30, r4
+; SOFT-NEXT: bl __truncsfhf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: clrldi r3, r3, 48
+; SOFT-NEXT: bl __extendhfsf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: bl __truncsfhf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: sth r3, 0(r30)
+; SOFT-NEXT: addi r1, r1, 48
+; SOFT-NEXT: ld r0, 16(r1)
+; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
+; SOFT-NEXT: mtlr r0
+; SOFT-NEXT: blr
+;
+; BE-LABEL: test_trunc32:
+; BE: # %bb.0:
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -128(r1)
+; BE-NEXT: std r0, 144(r1)
+; BE-NEXT: std r30, 112(r1) # 8-byte Folded Spill
+; BE-NEXT: mr r30, r4
+; BE-NEXT: bl __truncsfhf2
+; BE-NEXT: nop
+; BE-NEXT: sth r3, 0(r30)
+; BE-NEXT: ld r30, 112(r1) # 8-byte Folded Reload
+; BE-NEXT: addi r1, r1, 128
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+ %val16 = fptrunc float %in to half
+ store half %val16, ptr %addr
+ ret void
+}
+define void @test_trunc64(double %in, ptr %addr) nounwind {
+; PPC32-LABEL: test_trunc64:
+; PPC32: # %bb.0:
+; PPC32-NEXT: mflr r0
+; PPC32-NEXT: stwu r1, -16(r1)
+; PPC32-NEXT: stw r0, 20(r1)
+; PPC32-NEXT: stw r30, 8(r1) # 4-byte Folded Spill
+; PPC32-NEXT: mr r30, r3
+; PPC32-NEXT: bl __truncdfhf2
+; PPC32-NEXT: sth r3, 0(r30)
+; PPC32-NEXT: lwz r30, 8(r1) # 4-byte Folded Reload
+; PPC32-NEXT: lwz r0, 20(r1)
+; PPC32-NEXT: addi r1, r1, 16
+; PPC32-NEXT: mtlr r0
+; PPC32-NEXT: blr
+;
+; P8-LABEL: test_trunc64:
+; P8: # %bb.0:
+; P8-NEXT: mflr r0
+; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
+; P8-NEXT: stdu r1, -48(r1)
+; P8-NEXT: std r0, 64(r1)
+; P8-NEXT: mr r30, r4
+; P8-NEXT: bl __truncdfhf2
+; P8-NEXT: nop
+; P8-NEXT: sth r3, 0(r30)
+; P8-NEXT: addi r1, r1, 48
+; P8-NEXT: ld r0, 16(r1)
+; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
+; P8-NEXT: mtlr r0
+; P8-NEXT: blr
+;
+; CHECK-LABEL: test_trunc64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xscvdphp f0, f1
+; CHECK-NEXT: stxsihx f0, 0, r4
+; CHECK-NEXT: blr
+;
+; SOFT-LABEL: test_trunc64:
+; SOFT: # %bb.0:
+; SOFT-NEXT: mflr r0
+; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill
+; SOFT-NEXT: stdu r1, -48(r1)
+; SOFT-NEXT: std r0, 64(r1)
+; SOFT-NEXT: mr r30, r4
+; SOFT-NEXT: bl __truncdfhf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: clrldi r3, r3, 48
+; SOFT-NEXT: bl __extendhfsf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: bl __truncsfhf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: sth r3, 0(r30)
+; SOFT-NEXT: addi r1, r1, 48
+; SOFT-NEXT: ld r0, 16(r1)
+; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
+; SOFT-NEXT: mtlr r0
+; SOFT-NEXT: blr
+;
+; BE-LABEL: test_trunc64:
+; BE: # %bb.0:
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -128(r1)
+; BE-NEXT: std r0, 144(r1)
+; BE-NEXT: std r30, 112(r1) # 8-byte Folded Spill
+; BE-NEXT: mr r30, r4
+; BE-NEXT: bl __truncdfhf2
+; BE-NEXT: nop
+; BE-NEXT: sth r3, 0(r30)
+; BE-NEXT: ld r30, 112(r1) # 8-byte Folded Reload
+; BE-NEXT: addi r1, r1, 128
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+ %val16 = fptrunc double %in to half
+ store half %val16, ptr %addr
+ ret void
+}
+define i64 @test_fptosi_i64(ptr %p) nounwind {
+; PPC32-LABEL: test_fptosi_i64:
+; PPC32: # %bb.0:
+; PPC32-NEXT: mflr r0
+; PPC32-NEXT: stwu r1, -16(r1)
+; PPC32-NEXT: stw r0, 20(r1)
+; PPC32-NEXT: lhz r3, 0(r3)
+; PPC32-NEXT: bl __extendhfsf2
+; PPC32-NEXT: bl __fixsfdi
+; PPC32-NEXT: lwz r0, 20(r1)
+; PPC32-NEXT: addi r1, r1, 16
+; PPC32-NEXT: mtlr r0
+; PPC32-NEXT: blr
+;
+; P8-LABEL: test_fptosi_i64:
+; P8: # %bb.0:
+; P8-NEXT: mflr r0
+; P8-NEXT: stdu r1, -32(r1)
+; P8-NEXT: std r0, 48(r1)
+; P8-NEXT: lhz r3, 0(r3)
+; P8-NEXT: bl __extendhfsf2
+; P8-NEXT: nop
+; P8-NEXT: xscvdpsxds f0, f1
+; P8-NEXT: mffprd r3, f0
+; P8-NEXT: addi r1, r1, 32
+; P8-NEXT: ld r0, 16(r1)
+; P8-NEXT: mtlr r0
+; P8-NEXT: blr
+;
+; CHECK-LABEL: test_fptosi_i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lhz r3, 0(r3)
+; CHECK-NEXT: mtfprwz f0, r3
+; CHECK-NEXT: xscvhpdp f0, f0
+; CHECK-NEXT: xscvdpsxds f0, f0
+; CHECK-NEXT: mffprd r3, f0
+; CHECK-NEXT: blr
+;
+; SOFT-LABEL: test_fptosi_i64:
+; SOFT: # %bb.0:
+; SOFT-NEXT: mflr r0
+; SOFT-NEXT: stdu r1, -32(r1)
+; SOFT-NEXT: std r0, 48(r1)
+; SOFT-NEXT: lhz r3, 0(r3)
+; SOFT-NEXT: bl __extendhfsf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: bl __fixsfdi
+; SOFT-NEXT: nop
+; SOFT-NEXT: addi r1, r1, 32
+; SOFT-NEXT: ld r0, 16(r1)
+; SOFT-NEXT: mtlr r0
+; SOFT-NEXT: blr
+;
+; BE-LABEL: test_fptosi_i64:
+; BE: # %bb.0:
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -128(r1)
+; BE-NEXT: std r0, 144(r1)
+; BE-NEXT: lhz r3, 0(r3)
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
+; BE-NEXT: fctidz f0, f1
+; BE-NEXT: stfd f0, 120(r1)
+; BE-NEXT: ld r3, 120(r1)
+; BE-NEXT: addi r1, r1, 128
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+ %a = load half, ptr %p, align 2
+ %r = fptosi half %a to i64
+ ret i64 %r
+}
+define void @test_sitofp_i64(i64 %a, ptr %p) nounwind {
+; PPC32-LABEL: test_sitofp_i64:
+; PPC32: # %bb.0:
+; PPC32-NEXT: mflr r0
+; PPC32-NEXT: stwu r1, -16(r1)
+; PPC32-NEXT: stw r0, 20(r1)
+; PPC32-NEXT: stw r30, 8(r1) # 4-byte Folded Spill
+; PPC32-NEXT: mr r30, r5
+; PPC32-NEXT: bl __floatdisf
+; PPC32-NEXT: bl __truncsfhf2
+; PPC32-NEXT: sth r3, 0(r30)
+; PPC32-NEXT: lwz r30, 8(r1) # 4-byte Folded Reload
+; PPC32-NEXT: lwz r0, 20(r1)
+; PPC32-NEXT: addi r1, r1, 16
+; PPC32-NEXT: mtlr r0
+; PPC32-NEXT: blr
+;
+; P8-LABEL: test_sitofp_i64:
+; P8: # %bb.0:
+; P8-NEXT: mflr r0
+; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
+; P8-NEXT: stdu r1, -48(r1)
+; P8-NEXT: mtfprd f0, r3
+; P8-NEXT: std r0, 64(r1)
+; P8-NEXT: mr r30, r4
+; P8-NEXT: xscvsxdsp f1, f0
+; P8-NEXT: bl __truncsfhf2
+; P8-NEXT: nop
+; P8-NEXT: sth r3, 0(r30)
+; P8-NEXT: addi r1, r1, 48
+; P8-NEXT: ld r0, 16(r1)
+; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
+; P8-NEXT: mtlr r0
+; P8-NEXT: blr
+;
+; CHECK-LABEL: test_sitofp_i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: xscvsxdsp f0, f0
+; CHECK-NEXT: xscvdphp f0, f0
+; CHECK-NEXT: mffprwz r3, f0
+; CHECK-NEXT: sth r3, 0(r4)
+; CHECK-NEXT: blr
+;
+; SOFT-LABEL: test_sitofp_i64:
+; SOFT: # %bb.0:
+; SOFT-NEXT: mflr r0
+; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill
+; SOFT-NEXT: stdu r1, -48(r1)
+; SOFT-NEXT: std r0, 64(r1)
+; SOFT-NEXT: mr r30, r4
+; SOFT-NEXT: bl __floatdisf
+; SOFT-NEXT: nop
+; SOFT-NEXT: clrldi r3, r3, 32
+; SOFT-NEXT: bl __truncsfhf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: clrldi r3, r3, 48
+; SOFT-NEXT: bl __extendhfsf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: bl __truncsfhf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: sth r3, 0(r30)
+; SOFT-NEXT: addi r1, r1, 48
+; SOFT-NEXT: ld r0, 16(r1)
+; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
+; SOFT-NEXT: mtlr r0
+; SOFT-NEXT: blr
+;
+; BE-LABEL: test_sitofp_i64:
+; BE: # %bb.0:
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -144(r1)
+; BE-NEXT: sradi r5, r3, 53
+; BE-NEXT: std r0, 160(r1)
+; BE-NEXT: addi r5, r5, 1
+; BE-NEXT: cmpldi r5, 1
+; BE-NEXT: std r30, 128(r1) # 8-byte Folded Spill
+; BE-NEXT: mr r30, r4
+; BE-NEXT: ble cr0, .LBB16_2
+; BE-NEXT: # %bb.1:
+; BE-NEXT: clrldi r4, r3, 53
+; BE-NEXT: addi r4, r4, 2047
+; BE-NEXT: or r3, r4, r3
+; BE-NEXT: rldicr r3, r3, 0, 52
+; BE-NEXT: .LBB16_2:
+; BE-NEXT: std r3, 120(r1)
+; BE-NEXT: lfd f0, 120(r1)
+; BE-NEXT: fcfid f0, f0
+; BE-NEXT: frsp f1, f0
+; BE-NEXT: bl __truncsfhf2
+; BE-NEXT: nop
+; BE-NEXT: sth r3, 0(r30)
+; BE-NEXT: ld r30, 128(r1) # 8-byte Folded Reload
+; BE-NEXT: addi r1, r1, 144
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+ %r = sitofp i64 %a to half
+ store half %r, ptr %p
+ ret void
+}
+define i64 @test_fptoui_i64(ptr %p) nounwind {
+; PPC32-LABEL: test_fptoui_i64:
+; PPC32: # %bb.0:
+; PPC32-NEXT: mflr r0
+; PPC32-NEXT: stwu r1, -16(r1)
+; PPC32-NEXT: stw r0, 20(r1)
+; PPC32-NEXT: lhz r3, 0(r3)
+; PPC32-NEXT: bl __extendhfsf2
+; PPC32-NEXT: bl __fixunssfdi
+; PPC32-NEXT: lwz r0, 20(r1)
+; PPC32-NEXT: addi r1, r1, 16
+; PPC32-NEXT: mtlr r0
+; PPC32-NEXT: blr
+;
+; P8-LABEL: test_fptoui_i64:
+; P8: # %bb.0:
+; P8-NEXT: mflr r0
+; P8-NEXT: stdu r1, -32(r1)
+; P8-NEXT: std r0, 48(r1)
+; P8-NEXT: lhz r3, 0(r3)
+; P8-NEXT: bl __extendhfsf2
+; P8-NEXT: nop
+; P8-NEXT: xscvdpuxds f0, f1
+; P8-NEXT: mffprd r3, f0
+; P8-NEXT: addi r1, r1, 32
+; P8-NEXT: ld r0, 16(r1)
+; P8-NEXT: mtlr r0
+; P8-NEXT: blr
+;
+; CHECK-LABEL: test_fptoui_i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lhz r3, 0(r3)
+; CHECK-NEXT: mtfprwz f0, r3
+; CHECK-NEXT: xscvhpdp f0, f0
+; CHECK-NEXT: xscvdpuxds f0, f0
+; CHECK-NEXT: mffprd r3, f0
+; CHECK-NEXT: blr
+;
+; SOFT-LABEL: test_fptoui_i64:
+; SOFT: # %bb.0:
+; SOFT-NEXT: mflr r0
+; SOFT-NEXT: stdu r1, -32(r1)
+; SOFT-NEXT: std r0, 48(r1)
+; SOFT-NEXT: lhz r3, 0(r3)
+; SOFT-NEXT: bl __extendhfsf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: bl __fixunssfdi
+; SOFT-NEXT: nop
+; SOFT-NEXT: addi r1, r1, 32
+; SOFT-NEXT: ld r0, 16(r1)
+; SOFT-NEXT: mtlr r0
+; SOFT-NEXT: blr
+;
+; BE-LABEL: test_fptoui_i64:
+; BE: # %bb.0:
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -128(r1)
+; BE-NEXT: std r0, 144(r1)
+; BE-NEXT: lhz r3, 0(r3)
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
+; BE-NEXT: addis r3, r2, .LCPI17_0 at toc@ha
+; BE-NEXT: lfs f0, .LCPI17_0 at toc@l(r3)
+; BE-NEXT: fsubs f2, f1, f0
+; BE-NEXT: fcmpu cr0, f1, f0
+; BE-NEXT: fctidz f2, f2
+; BE-NEXT: stfd f2, 120(r1)
+; BE-NEXT: fctidz f2, f1
+; BE-NEXT: stfd f2, 112(r1)
+; BE-NEXT: blt cr0, .LBB17_2
+; BE-NEXT: # %bb.1:
+; BE-NEXT: ld r3, 120(r1)
+; BE-NEXT: li r4, 1
+; BE-NEXT: rldic r4, r4, 63, 0
+; BE-NEXT: xor r3, r3, r4
+; BE-NEXT: b .LBB17_3
+; BE-NEXT: .LBB17_2:
+; BE-NEXT: ld r3, 112(r1)
+; BE-NEXT: .LBB17_3:
+; BE-NEXT: addi r1, r1, 128
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+ %a = load half, ptr %p, align 2
+ %r = fptoui half %a to i64
+ ret i64 %r
+}
+define void @test_uitofp_i64(i64 %a, ptr %p) nounwind {
+; PPC32-LABEL: test_uitofp_i64:
+; PPC32: # %bb.0:
+; PPC32-NEXT: mflr r0
+; PPC32-NEXT: stwu r1, -16(r1)
+; PPC32-NEXT: stw r0, 20(r1)
+; PPC32-NEXT: stw r30, 8(r1) # 4-byte Folded Spill
+; PPC32-NEXT: mr r30, r5
+; PPC32-NEXT: bl __floatundisf
+; PPC32-NEXT: bl __truncsfhf2
+; PPC32-NEXT: sth r3, 0(r30)
+; PPC32-NEXT: lwz r30, 8(r1) # 4-byte Folded Reload
+; PPC32-NEXT: lwz r0, 20(r1)
+; PPC32-NEXT: addi r1, r1, 16
+; PPC32-NEXT: mtlr r0
+; PPC32-NEXT: blr
+;
+; P8-LABEL: test_uitofp_i64:
+; P8: # %bb.0:
+; P8-NEXT: mflr r0
+; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
+; P8-NEXT: stdu r1, -48(r1)
+; P8-NEXT: mtfprd f0, r3
+; P8-NEXT: std r0, 64(r1)
+; P8-NEXT: mr r30, r4
+; P8-NEXT: xscvuxdsp f1, f0
+; P8-NEXT: bl __truncsfhf2
+; P8-NEXT: nop
+; P8-NEXT: sth r3, 0(r30)
+; P8-NEXT: addi r1, r1, 48
+; P8-NEXT: ld r0, 16(r1)
+; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
+; P8-NEXT: mtlr r0
+; P8-NEXT: blr
+;
+; CHECK-LABEL: test_uitofp_i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: xscvuxdsp f0, f0
+; CHECK-NEXT: xscvdphp f0, f0
+; CHECK-NEXT: mffprwz r3, f0
+; CHECK-NEXT: sth r3, 0(r4)
+; CHECK-NEXT: blr
+;
+; SOFT-LABEL: test_uitofp_i64:
+; SOFT: # %bb.0:
+; SOFT-NEXT: mflr r0
+; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill
+; SOFT-NEXT: stdu r1, -48(r1)
+; SOFT-NEXT: std r0, 64(r1)
+; SOFT-NEXT: mr r30, r4
+; SOFT-NEXT: bl __floatundisf
+; SOFT-NEXT: nop
+; SOFT-NEXT: bl __truncsfhf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: clrldi r3, r3, 48
+; SOFT-NEXT: bl __extendhfsf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: bl __truncsfhf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: sth r3, 0(r30)
+; SOFT-NEXT: addi r1, r1, 48
+; SOFT-NEXT: ld r0, 16(r1)
+; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
+; SOFT-NEXT: mtlr r0
+; SOFT-NEXT: blr
+;
+; BE-LABEL: test_uitofp_i64:
+; BE: # %bb.0:
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -144(r1)
+; BE-NEXT: sradi r5, r3, 53
+; BE-NEXT: std r0, 160(r1)
+; BE-NEXT: addi r5, r5, 1
+; BE-NEXT: cmpldi r5, 1
+; BE-NEXT: std r30, 128(r1) # 8-byte Folded Spill
+; BE-NEXT: mr r30, r4
+; BE-NEXT: bgt cr0, .LBB18_2
+; BE-NEXT: # %bb.1:
+; BE-NEXT: mr r4, r3
+; BE-NEXT: b .LBB18_3
+; BE-NEXT: .LBB18_2:
+; BE-NEXT: clrldi r4, r3, 53
+; BE-NEXT: addi r4, r4, 2047
+; BE-NEXT: or r4, r4, r3
+; BE-NEXT: rldicr r4, r4, 0, 52
+; BE-NEXT: .LBB18_3:
+; BE-NEXT: rldicl r5, r3, 10, 54
+; BE-NEXT: clrldi r6, r3, 63
+; BE-NEXT: std r4, 112(r1)
+; BE-NEXT: addi r5, r5, 1
+; BE-NEXT: cmpldi r5, 1
+; BE-NEXT: rldicl r5, r3, 63, 1
+; BE-NEXT: or r4, r6, r5
+; BE-NEXT: ble cr0, .LBB18_5
+; BE-NEXT: # %bb.4:
+; BE-NEXT: clrldi r4, r4, 53
+; BE-NEXT: addi r4, r4, 2047
+; BE-NEXT: or r4, r4, r5
+; BE-NEXT: rldicl r4, r4, 53, 11
+; BE-NEXT: rldicl r4, r4, 11, 1
+; BE-NEXT: .LBB18_5:
+; BE-NEXT: cmpdi r3, 0
+; BE-NEXT: std r4, 120(r1)
+; BE-NEXT: bc 12, lt, .LBB18_7
+; BE-NEXT: # %bb.6:
+; BE-NEXT: lfd f0, 112(r1)
+; BE-NEXT: fcfid f0, f0
+; BE-NEXT: frsp f1, f0
+; BE-NEXT: b .LBB18_8
+; BE-NEXT: .LBB18_7:
+; BE-NEXT: lfd f0, 120(r1)
+; BE-NEXT: fcfid f0, f0
+; BE-NEXT: frsp f0, f0
+; BE-NEXT: fadds f1, f0, f0
+; BE-NEXT: .LBB18_8:
+; BE-NEXT: bl __truncsfhf2
+; BE-NEXT: nop
+; BE-NEXT: sth r3, 0(r30)
+; BE-NEXT: ld r30, 128(r1) # 8-byte Folded Reload
+; BE-NEXT: addi r1, r1, 144
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+ %r = uitofp i64 %a to half
+ store half %r, ptr %p
+ ret void
+}
+define <4 x float> @test_extend32_vec4(ptr %p) nounwind {
+; PPC32-LABEL: test_extend32_vec4:
+; PPC32: # %bb.0:
+; PPC32-NEXT: mflr r0
+; PPC32-NEXT: stwu r1, -48(r1)
+; PPC32-NEXT: stw r0, 52(r1)
+; PPC32-NEXT: stw r30, 16(r1) # 4-byte Folded Spill
+; PPC32-NEXT: mr r30, r3
+; PPC32-NEXT: lhz r3, 0(r3)
+; PPC32-NEXT: stfd f29, 24(r1) # 8-byte Folded Spill
+; PPC32-NEXT: stfd f30, 32(r1) # 8-byte Folded Spill
+; PPC32-NEXT: stfd f31, 40(r1) # 8-byte Folded Spill
+; PPC32-NEXT: bl __extendhfsf2
+; PPC32-NEXT: lhz r3, 2(r30)
+; PPC32-NEXT: fmr f31, f1
+; PPC32-NEXT: bl __extendhfsf2
+; PPC32-NEXT: lhz r3, 4(r30)
+; PPC32-NEXT: fmr f30, f1
+; PPC32-NEXT: bl __extendhfsf2
+; PPC32-NEXT: lhz r3, 6(r30)
+; PPC32-NEXT: fmr f29, f1
+; PPC32-NEXT: bl __extendhfsf2
+; PPC32-NEXT: fmr f4, f1
+; PPC32-NEXT: fmr f1, f31
+; PPC32-NEXT: fmr f2, f30
+; PPC32-NEXT: fmr f3, f29
+; PPC32-NEXT: lfd f31, 40(r1) # 8-byte Folded Reload
+; PPC32-NEXT: lfd f30, 32(r1) # 8-byte Folded Reload
+; PPC32-NEXT: lfd f29, 24(r1) # 8-byte Folded Reload
+; PPC32-NEXT: lwz r30, 16(r1) # 4-byte Folded Reload
+; PPC32-NEXT: lwz r0, 52(r1)
+; PPC32-NEXT: addi r1, r1, 48
+; PPC32-NEXT: mtlr r0
+; PPC32-NEXT: blr
+;
+; P8-LABEL: test_extend32_vec4:
+; P8: # %bb.0:
+; P8-NEXT: mflr r0
+; P8-NEXT: stdu r1, -112(r1)
+; P8-NEXT: li r4, 48
+; P8-NEXT: std r0, 128(r1)
+; P8-NEXT: std r30, 96(r1) # 8-byte Folded Spill
+; P8-NEXT: mr r30, r3
+; P8-NEXT: lhz r3, 6(r3)
+; P8-NEXT: stxvd2x vs61, r1, r4 # 16-byte Folded Spill
+; P8-NEXT: li r4, 64
+; P8-NEXT: stxvd2x vs62, r1, r4 # 16-byte Folded Spill
+; P8-NEXT: li r4, 80
+; P8-NEXT: stxvd2x vs63, r1, r4 # 16-byte Folded Spill
+; P8-NEXT: bl __extendhfsf2
+; P8-NEXT: nop
+; P8-NEXT: lhz r3, 2(r30)
+; P8-NEXT: xxlor vs63, f1, f1
+; P8-NEXT: bl __extendhfsf2
+; P8-NEXT: nop
+; P8-NEXT: lhz r3, 4(r30)
+; P8-NEXT: xxlor vs62, f1, f1
+; P8-NEXT: bl __extendhfsf2
+; P8-NEXT: nop
+; P8-NEXT: lhz r3, 0(r30)
+; P8-NEXT: xxlor vs61, f1, f1
+; P8-NEXT: bl __extendhfsf2
+; P8-NEXT: nop
+; P8-NEXT: li r3, 80
+; P8-NEXT: xxmrghd vs0, vs61, vs1
+; P8-NEXT: xxmrghd vs1, vs63, vs62
+; P8-NEXT: ld r30, 96(r1) # 8-byte Folded Reload
+; P8-NEXT: lxvd2x vs63, r1, r3 # 16-byte Folded Reload
+; P8-NEXT: li r3, 64
+; P8-NEXT: xvcvdpsp vs34, vs0
+; P8-NEXT: xvcvdpsp vs35, vs1
+; P8-NEXT: lxvd2x vs62, r1, r3 # 16-byte Folded Reload
+; P8-NEXT: li r3, 48
+; P8-NEXT: lxvd2x vs61, r1, r3 # 16-byte Folded Reload
+; P8-NEXT: vmrgew v2, v3, v2
+; P8-NEXT: addi r1, r1, 112
+; P8-NEXT: ld r0, 16(r1)
+; P8-NEXT: mtlr r0
+; P8-NEXT: blr
+;
+; CHECK-LABEL: test_extend32_vec4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lhz r4, 6(r3)
+; CHECK-NEXT: mtfprwz f0, r4
+; CHECK-NEXT: xscvhpdp f0, f0
+; CHECK-NEXT: lhz r4, 2(r3)
+; CHECK-NEXT: mtfprwz f1, r4
+; CHECK-NEXT: xscvhpdp f1, f1
+; CHECK-NEXT: lhz r4, 4(r3)
+; CHECK-NEXT: mtfprwz f2, r4
+; CHECK-NEXT: xscvhpdp f2, f2
+; CHECK-NEXT: lhz r3, 0(r3)
+; CHECK-NEXT: xxmrghd vs0, vs0, vs1
+; CHECK-NEXT: mtfprwz f3, r3
+; CHECK-NEXT: xvcvdpsp vs35, vs0
+; CHECK-NEXT: xscvhpdp f3, f3
+; CHECK-NEXT: xxmrghd vs2, vs2, vs3
+; CHECK-NEXT: xvcvdpsp vs34, vs2
+; CHECK-NEXT: vmrgew v2, v3, v2
+; CHECK-NEXT: blr
+;
+; SOFT-LABEL: test_extend32_vec4:
+; SOFT: # %bb.0:
+; SOFT-NEXT: mflr r0
+; SOFT-NEXT: std r27, -40(r1) # 8-byte Folded Spill
+; SOFT-NEXT: std r28, -32(r1) # 8-byte Folded Spill
+; SOFT-NEXT: std r29, -24(r1) # 8-byte Folded Spill
+; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill
+; SOFT-NEXT: stdu r1, -80(r1)
+; SOFT-NEXT: std r0, 96(r1)
+; SOFT-NEXT: mr r30, r3
+; SOFT-NEXT: lhz r3, 0(r3)
+; SOFT-NEXT: bl __extendhfsf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: mr r29, r3
+; SOFT-NEXT: lhz r3, 2(r30)
+; SOFT-NEXT: bl __extendhfsf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: mr r28, r3
+; SOFT-NEXT: lhz r3, 4(r30)
+; SOFT-NEXT: bl __extendhfsf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: mr r27, r3
+; SOFT-NEXT: lhz r3, 6(r30)
+; SOFT-NEXT: bl __extendhfsf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: mr r6, r3
+; SOFT-NEXT: mr r3, r29
+; SOFT-NEXT: mr r4, r28
+; SOFT-NEXT: mr r5, r27
+; SOFT-NEXT: addi r1, r1, 80
+; SOFT-NEXT: ld r0, 16(r1)
+; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
+; SOFT-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
+; SOFT-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
+; SOFT-NEXT: mtlr r0
+; SOFT-NEXT: ld r27, -40(r1) # 8-byte Folded Reload
+; SOFT-NEXT: blr
+;
+; BE-LABEL: test_extend32_vec4:
+; BE: # %bb.0:
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -176(r1)
+; BE-NEXT: std r0, 192(r1)
+; BE-NEXT: std r30, 136(r1) # 8-byte Folded Spill
+; BE-NEXT: mr r30, r3
+; BE-NEXT: lhz r3, 0(r3)
+; BE-NEXT: stfd f29, 152(r1) # 8-byte Folded Spill
+; BE-NEXT: stfd f30, 160(r1) # 8-byte Folded Spill
+; BE-NEXT: stfd f31, 168(r1) # 8-byte Folded Spill
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
+; BE-NEXT: lhz r3, 2(r30)
+; BE-NEXT: fmr f31, f1
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
+; BE-NEXT: lhz r3, 4(r30)
+; BE-NEXT: fmr f30, f1
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
+; BE-NEXT: lhz r3, 6(r30)
+; BE-NEXT: fmr f29, f1
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
+; BE-NEXT: stfs f29, 120(r1)
+; BE-NEXT: addi r3, r1, 112
+; BE-NEXT: stfs f30, 116(r1)
+; BE-NEXT: stfs f31, 112(r1)
+; BE-NEXT: stfs f1, 124(r1)
+; BE-NEXT: lvx v2, 0, r3
+; BE-NEXT: lfd f31, 168(r1) # 8-byte Folded Reload
+; BE-NEXT: lfd f30, 160(r1) # 8-byte Folded Reload
+; BE-NEXT: lfd f29, 152(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r30, 136(r1) # 8-byte Folded Reload
+; BE-NEXT: addi r1, r1, 176
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+ %a = load <4 x half>, ptr %p, align 8
+ %b = fpext <4 x half> %a to <4 x float>
+ ret <4 x float> %b
+}
+define <4 x double> @test_extend64_vec4(ptr %p) nounwind {
+; PPC32-LABEL: test_extend64_vec4:
+; PPC32: # %bb.0:
+; PPC32-NEXT: mflr r0
+; PPC32-NEXT: stwu r1, -48(r1)
+; PPC32-NEXT: stw r0, 52(r1)
+; PPC32-NEXT: stw r30, 16(r1) # 4-byte Folded Spill
+; PPC32-NEXT: mr r30, r3
+; PPC32-NEXT: lhz r3, 0(r3)
+; PPC32-NEXT: stfd f29, 24(r1) # 8-byte Folded Spill
+; PPC32-NEXT: stfd f30, 32(r1) # 8-byte Folded Spill
+; PPC32-NEXT: stfd f31, 40(r1) # 8-byte Folded Spill
+; PPC32-NEXT: bl __extendhfsf2
+; PPC32-NEXT: lhz r3, 2(r30)
+; PPC32-NEXT: fmr f31, f1
+; PPC32-NEXT: bl __extendhfsf2
+; PPC32-NEXT: lhz r3, 4(r30)
+; PPC32-NEXT: fmr f30, f1
+; PPC32-NEXT: bl __extendhfsf2
+; PPC32-NEXT: lhz r3, 6(r30)
+; PPC32-NEXT: fmr f29, f1
+; PPC32-NEXT: bl __extendhfsf2
+; PPC32-NEXT: fmr f4, f1
+; PPC32-NEXT: fmr f1, f31
+; PPC32-NEXT: fmr f2, f30
+; PPC32-NEXT: fmr f3, f29
+; PPC32-NEXT: lfd f31, 40(r1) # 8-byte Folded Reload
+; PPC32-NEXT: lfd f30, 32(r1) # 8-byte Folded Reload
+; PPC32-NEXT: lfd f29, 24(r1) # 8-byte Folded Reload
+; PPC32-NEXT: lwz r30, 16(r1) # 4-byte Folded Reload
+; PPC32-NEXT: lwz r0, 52(r1)
+; PPC32-NEXT: addi r1, r1, 48
+; PPC32-NEXT: mtlr r0
+; PPC32-NEXT: blr
+;
+; P8-LABEL: test_extend64_vec4:
+; P8: # %bb.0:
+; P8-NEXT: mflr r0
+; P8-NEXT: stdu r1, -112(r1)
+; P8-NEXT: li r4, 48
+; P8-NEXT: std r0, 128(r1)
+; P8-NEXT: std r30, 96(r1) # 8-byte Folded Spill
+; P8-NEXT: mr r30, r3
+; P8-NEXT: lhz r3, 6(r3)
+; P8-NEXT: stxvd2x vs61, r1, r4 # 16-byte Folded Spill
+; P8-NEXT: li r4, 64
+; P8-NEXT: stxvd2x vs62, r1, r4 # 16-byte Folded Spill
+; P8-NEXT: li r4, 80
+; P8-NEXT: stxvd2x vs63, r1, r4 # 16-byte Folded Spill
+; P8-NEXT: bl __extendhfsf2
+; P8-NEXT: nop
+; P8-NEXT: lhz r3, 4(r30)
+; P8-NEXT: xxlor vs63, f1, f1
+; P8-NEXT: bl __extendhfsf2
+; P8-NEXT: nop
+; P8-NEXT: lhz r3, 2(r30)
+; P8-NEXT: xxlor vs62, f1, f1
+; P8-NEXT: bl __extendhfsf2
+; P8-NEXT: nop
+; P8-NEXT: lhz r3, 0(r30)
+; P8-NEXT: xxlor vs61, f1, f1
+; P8-NEXT: bl __extendhfsf2
+; P8-NEXT: nop
+; P8-NEXT: li r3, 80
+; P8-NEXT: xxmrghd vs35, vs63, vs62
+; P8-NEXT: xxmrghd vs34, vs61, vs1
+; P8-NEXT: ld r30, 96(r1) # 8-byte Folded Reload
+; P8-NEXT: lxvd2x vs63, r1, r3 # 16-byte Folded Reload
+; P8-NEXT: li r3, 64
+; P8-NEXT: lxvd2x vs62, r1, r3 # 16-byte Folded Reload
+; P8-NEXT: li r3, 48
+; P8-NEXT: lxvd2x vs61, r1, r3 # 16-byte Folded Reload
+; P8-NEXT: addi r1, r1, 112
+; P8-NEXT: ld r0, 16(r1)
+; P8-NEXT: mtlr r0
+; P8-NEXT: blr
+;
+; CHECK-LABEL: test_extend64_vec4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lhz r4, 6(r3)
+; CHECK-NEXT: lhz r5, 4(r3)
+; CHECK-NEXT: lhz r6, 2(r3)
+; CHECK-NEXT: lhz r3, 0(r3)
+; CHECK-NEXT: mtfprwz f0, r3
+; CHECK-NEXT: mtfprwz f1, r6
+; CHECK-NEXT: xscvhpdp f0, f0
+; CHECK-NEXT: xscvhpdp f1, f1
+; CHECK-NEXT: xxmrghd vs34, vs1, vs0
+; CHECK-NEXT: mtfprwz f0, r5
+; CHECK-NEXT: mtfprwz f1, r4
+; CHECK-NEXT: xscvhpdp f0, f0
+; CHECK-NEXT: xscvhpdp f1, f1
+; CHECK-NEXT: xxmrghd vs35, vs1, vs0
+; CHECK-NEXT: blr
+;
+; SOFT-LABEL: test_extend64_vec4:
+; SOFT: # %bb.0:
+; SOFT-NEXT: mflr r0
+; SOFT-NEXT: std r27, -40(r1) # 8-byte Folded Spill
+; SOFT-NEXT: std r28, -32(r1) # 8-byte Folded Spill
+; SOFT-NEXT: std r29, -24(r1) # 8-byte Folded Spill
+; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill
+; SOFT-NEXT: stdu r1, -80(r1)
+; SOFT-NEXT: std r0, 96(r1)
+; SOFT-NEXT: mr r30, r3
+; SOFT-NEXT: lhz r3, 0(r3)
+; SOFT-NEXT: bl __extendhfsf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: bl __extendsfdf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: mr r29, r3
+; SOFT-NEXT: lhz r3, 2(r30)
+; SOFT-NEXT: bl __extendhfsf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: bl __extendsfdf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: mr r28, r3
+; SOFT-NEXT: lhz r3, 4(r30)
+; SOFT-NEXT: bl __extendhfsf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: bl __extendsfdf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: mr r27, r3
+; SOFT-NEXT: lhz r3, 6(r30)
+; SOFT-NEXT: bl __extendhfsf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: bl __extendsfdf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: mr r6, r3
+; SOFT-NEXT: mr r3, r29
+; SOFT-NEXT: mr r4, r28
+; SOFT-NEXT: mr r5, r27
+; SOFT-NEXT: addi r1, r1, 80
+; SOFT-NEXT: ld r0, 16(r1)
+; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
+; SOFT-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
+; SOFT-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
+; SOFT-NEXT: mtlr r0
+; SOFT-NEXT: ld r27, -40(r1) # 8-byte Folded Reload
+; SOFT-NEXT: blr
+;
+; BE-LABEL: test_extend64_vec4:
+; BE: # %bb.0:
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -160(r1)
+; BE-NEXT: std r0, 176(r1)
+; BE-NEXT: std r30, 120(r1) # 8-byte Folded Spill
+; BE-NEXT: mr r30, r3
+; BE-NEXT: lhz r3, 6(r3)
+; BE-NEXT: stfd f29, 136(r1) # 8-byte Folded Spill
+; BE-NEXT: stfd f30, 144(r1) # 8-byte Folded Spill
+; BE-NEXT: stfd f31, 152(r1) # 8-byte Folded Spill
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
+; BE-NEXT: lhz r3, 4(r30)
+; BE-NEXT: fmr f31, f1
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
+; BE-NEXT: lhz r3, 2(r30)
+; BE-NEXT: fmr f30, f1
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
+; BE-NEXT: lhz r3, 0(r30)
+; BE-NEXT: fmr f29, f1
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
+; BE-NEXT: fmr f2, f29
+; BE-NEXT: fmr f3, f30
+; BE-NEXT: lfd f30, 144(r1) # 8-byte Folded Reload
+; BE-NEXT: lfd f29, 136(r1) # 8-byte Folded Reload
+; BE-NEXT: fmr f4, f31
+; BE-NEXT: lfd f31, 152(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r30, 120(r1) # 8-byte Folded Reload
+; BE-NEXT: addi r1, r1, 160
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+ %a = load <4 x half>, ptr %p, align 8
+ %b = fpext <4 x half> %a to <4 x double>
+ ret <4 x double> %b
+}
+define void @test_trunc32_vec4(<4 x float> %a, ptr %p) nounwind {
+; PPC32-LABEL: test_trunc32_vec4:
+; PPC32: # %bb.0:
+; PPC32-NEXT: mflr r0
+; PPC32-NEXT: stwu r1, -64(r1)
+; PPC32-NEXT: stw r0, 68(r1)
+; PPC32-NEXT: stw r27, 20(r1) # 4-byte Folded Spill
+; PPC32-NEXT: stw r28, 24(r1) # 4-byte Folded Spill
+; PPC32-NEXT: stw r29, 28(r1) # 4-byte Folded Spill
+; PPC32-NEXT: stw r30, 32(r1) # 4-byte Folded Spill
+; PPC32-NEXT: mr r30, r3
+; PPC32-NEXT: stfd f29, 40(r1) # 8-byte Folded Spill
+; PPC32-NEXT: fmr f29, f2
+; PPC32-NEXT: stfd f30, 48(r1) # 8-byte Folded Spill
+; PPC32-NEXT: fmr f30, f3
+; PPC32-NEXT: stfd f31, 56(r1) # 8-byte Folded Spill
+; PPC32-NEXT: fmr f31, f4
+; PPC32-NEXT: bl __truncsfhf2
+; PPC32-NEXT: fmr f1, f29
+; PPC32-NEXT: mr r29, r3
+; PPC32-NEXT: bl __truncsfhf2
+; PPC32-NEXT: fmr f1, f30
+; PPC32-NEXT: mr r28, r3
+; PPC32-NEXT: bl __truncsfhf2
+; PPC32-NEXT: fmr f1, f31
+; PPC32-NEXT: mr r27, r3
+; PPC32-NEXT: bl __truncsfhf2
+; PPC32-NEXT: sth r27, 4(r30)
+; PPC32-NEXT: sth r28, 2(r30)
+; PPC32-NEXT: sth r3, 6(r30)
+; PPC32-NEXT: sth r29, 0(r30)
+; PPC32-NEXT: lfd f31, 56(r1) # 8-byte Folded Reload
+; PPC32-NEXT: lfd f30, 48(r1) # 8-byte Folded Reload
+; PPC32-NEXT: lfd f29, 40(r1) # 8-byte Folded Reload
+; PPC32-NEXT: lwz r30, 32(r1) # 4-byte Folded Reload
+; PPC32-NEXT: lwz r29, 28(r1) # 4-byte Folded Reload
+; PPC32-NEXT: lwz r28, 24(r1) # 4-byte Folded Reload
+; PPC32-NEXT: lwz r27, 20(r1) # 4-byte Folded Reload
+; PPC32-NEXT: lwz r0, 68(r1)
+; PPC32-NEXT: addi r1, r1, 64
+; PPC32-NEXT: mtlr r0
+; PPC32-NEXT: blr
+;
+; P8-LABEL: test_trunc32_vec4:
+; P8: # %bb.0:
+; P8-NEXT: mflr r0
+; P8-NEXT: stdu r1, -112(r1)
+; P8-NEXT: xxsldwi vs0, vs34, vs34, 3
+; P8-NEXT: li r3, 48
+; P8-NEXT: std r0, 128(r1)
+; P8-NEXT: std r27, 72(r1) # 8-byte Folded Spill
+; P8-NEXT: std r28, 80(r1) # 8-byte Folded Spill
+; P8-NEXT: std r29, 88(r1) # 8-byte Folded Spill
+; P8-NEXT: xscvspdpn f1, vs0
+; P8-NEXT: std r30, 96(r1) # 8-byte Folded Spill
+; P8-NEXT: stxvd2x vs63, r1, r3 # 16-byte Folded Spill
+; P8-NEXT: mr r30, r5
+; P8-NEXT: vmr v31, v2
+; P8-NEXT: bl __truncsfhf2
+; P8-NEXT: nop
+; P8-NEXT: xxswapd vs0, vs63
+; P8-NEXT: mr r29, r3
+; P8-NEXT: xscvspdpn f1, vs0
+; P8-NEXT: bl __truncsfhf2
+; P8-NEXT: nop
+; P8-NEXT: xxsldwi vs0, vs63, vs63, 1
+; P8-NEXT: mr r28, r3
+; P8-NEXT: xscvspdpn f1, vs0
+; P8-NEXT: bl __truncsfhf2
+; P8-NEXT: nop
+; P8-NEXT: xscvspdpn f1, vs63
+; P8-NEXT: mr r27, r3
+; P8-NEXT: bl __truncsfhf2
+; P8-NEXT: nop
+; P8-NEXT: sth r3, 6(r30)
+; P8-NEXT: li r3, 48
+; P8-NEXT: sth r27, 4(r30)
+; P8-NEXT: ld r27, 72(r1) # 8-byte Folded Reload
+; P8-NEXT: sth r28, 2(r30)
+; P8-NEXT: sth r29, 0(r30)
+; P8-NEXT: ld r30, 96(r1) # 8-byte Folded Reload
+; P8-NEXT: ld r29, 88(r1) # 8-byte Folded Reload
+; P8-NEXT: lxvd2x vs63, r1, r3 # 16-byte Folded Reload
+; P8-NEXT: ld r28, 80(r1) # 8-byte Folded Reload
+; P8-NEXT: addi r1, r1, 112
+; P8-NEXT: ld r0, 16(r1)
+; P8-NEXT: mtlr r0
+; P8-NEXT: blr
+;
+; CHECK-LABEL: test_trunc32_vec4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 3
+; CHECK-NEXT: xxsldwi vs1, vs34, vs34, 1
+; CHECK-NEXT: xscvspdpn f0, vs0
+; CHECK-NEXT: xscvspdpn f1, vs1
+; CHECK-NEXT: xscvdphp f0, f0
+; CHECK-NEXT: mffprwz r3, f0
+; CHECK-NEXT: xxswapd vs0, vs34
+; CHECK-NEXT: xscvspdpn f0, vs0
+; CHECK-NEXT: xscvdphp f0, f0
+; CHECK-NEXT: xscvdphp f1, f1
+; CHECK-NEXT: mffprwz r4, f1
+; CHECK-NEXT: xscvspdpn f1, vs34
+; CHECK-NEXT: xscvdphp f1, f1
+; CHECK-NEXT: sth r4, 4(r5)
+; CHECK-NEXT: mffprwz r4, f0
+; CHECK-NEXT: sth r3, 0(r5)
+; CHECK-NEXT: sth r4, 2(r5)
+; CHECK-NEXT: mffprwz r6, f1
+; CHECK-NEXT: sth r6, 6(r5)
+; CHECK-NEXT: blr
+;
+; SOFT-LABEL: test_trunc32_vec4:
+; SOFT: # %bb.0:
+; SOFT-NEXT: mflr r0
+; SOFT-NEXT: std r26, -48(r1) # 8-byte Folded Spill
+; SOFT-NEXT: std r27, -40(r1) # 8-byte Folded Spill
+; SOFT-NEXT: std r28, -32(r1) # 8-byte Folded Spill
+; SOFT-NEXT: std r29, -24(r1) # 8-byte Folded Spill
+; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill
+; SOFT-NEXT: stdu r1, -80(r1)
+; SOFT-NEXT: mr r27, r3
+; SOFT-NEXT: clrldi r3, r6, 32
+; SOFT-NEXT: std r0, 96(r1)
+; SOFT-NEXT: mr r30, r7
+; SOFT-NEXT: mr r29, r5
+; SOFT-NEXT: mr r28, r4
+; SOFT-NEXT: bl __truncsfhf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: mr r26, r3
+; SOFT-NEXT: clrldi r3, r29, 32
+; SOFT-NEXT: bl __truncsfhf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: mr r29, r3
+; SOFT-NEXT: clrldi r3, r28, 32
+; SOFT-NEXT: bl __truncsfhf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: mr r28, r3
+; SOFT-NEXT: clrldi r3, r27, 32
+; SOFT-NEXT: bl __truncsfhf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: clrldi r3, r3, 48
+; SOFT-NEXT: bl __extendhfsf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: mr r27, r3
+; SOFT-NEXT: clrldi r3, r28, 48
+; SOFT-NEXT: bl __extendhfsf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: mr r28, r3
+; SOFT-NEXT: clrldi r3, r29, 48
+; SOFT-NEXT: bl __extendhfsf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: mr r29, r3
+; SOFT-NEXT: clrldi r3, r26, 48
+; SOFT-NEXT: bl __extendhfsf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: bl __truncsfhf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: sth r3, 6(r30)
+; SOFT-NEXT: mr r3, r29
+; SOFT-NEXT: bl __truncsfhf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: sth r3, 4(r30)
+; SOFT-NEXT: mr r3, r28
+; SOFT-NEXT: bl __truncsfhf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: sth r3, 2(r30)
+; SOFT-NEXT: mr r3, r27
+; SOFT-NEXT: bl __truncsfhf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: sth r3, 0(r30)
+; SOFT-NEXT: addi r1, r1, 80
+; SOFT-NEXT: ld r0, 16(r1)
+; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
+; SOFT-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
+; SOFT-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
+; SOFT-NEXT: mtlr r0
+; SOFT-NEXT: ld r27, -40(r1) # 8-byte Folded Reload
+; SOFT-NEXT: ld r26, -48(r1) # 8-byte Folded Reload
+; SOFT-NEXT: blr
+;
+; BE-LABEL: test_trunc32_vec4:
+; BE: # %bb.0:
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -176(r1)
+; BE-NEXT: addi r3, r1, 112
+; BE-NEXT: std r0, 192(r1)
+; BE-NEXT: std r27, 136(r1) # 8-byte Folded Spill
+; BE-NEXT: std r28, 144(r1) # 8-byte Folded Spill
+; BE-NEXT: std r29, 152(r1) # 8-byte Folded Spill
+; BE-NEXT: std r30, 160(r1) # 8-byte Folded Spill
+; BE-NEXT: mr r30, r5
+; BE-NEXT: stvx v2, 0, r3
+; BE-NEXT: lfs f1, 112(r1)
+; BE-NEXT: bl __truncsfhf2
+; BE-NEXT: nop
+; BE-NEXT: lfs f1, 116(r1)
+; BE-NEXT: mr r29, r3
+; BE-NEXT: bl __truncsfhf2
+; BE-NEXT: nop
+; BE-NEXT: lfs f1, 120(r1)
+; BE-NEXT: mr r28, r3
+; BE-NEXT: bl __truncsfhf2
+; BE-NEXT: nop
+; BE-NEXT: lfs f1, 124(r1)
+; BE-NEXT: mr r27, r3
+; BE-NEXT: bl __truncsfhf2
+; BE-NEXT: nop
+; BE-NEXT: sth r27, 4(r30)
+; BE-NEXT: sth r28, 2(r30)
+; BE-NEXT: sth r3, 6(r30)
+; BE-NEXT: sth r29, 0(r30)
+; BE-NEXT: ld r30, 160(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r29, 152(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r28, 144(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r27, 136(r1) # 8-byte Folded Reload
+; BE-NEXT: addi r1, r1, 176
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+ %v = fptrunc <4 x float> %a to <4 x half>
+ store <4 x half> %v, ptr %p
+ ret void
+}
+define void @test_trunc64_vec4(<4 x double> %a, ptr %p) nounwind {
+; PPC32-LABEL: test_trunc64_vec4:
+; PPC32: # %bb.0:
+; PPC32-NEXT: mflr r0
+; PPC32-NEXT: stwu r1, -64(r1)
+; PPC32-NEXT: stw r0, 68(r1)
+; PPC32-NEXT: stw r27, 20(r1) # 4-byte Folded Spill
+; PPC32-NEXT: stw r28, 24(r1) # 4-byte Folded Spill
+; PPC32-NEXT: stw r29, 28(r1) # 4-byte Folded Spill
+; PPC32-NEXT: stw r30, 32(r1) # 4-byte Folded Spill
+; PPC32-NEXT: mr r30, r3
+; PPC32-NEXT: stfd f29, 40(r1) # 8-byte Folded Spill
+; PPC32-NEXT: fmr f29, f2
+; PPC32-NEXT: stfd f30, 48(r1) # 8-byte Folded Spill
+; PPC32-NEXT: fmr f30, f3
+; PPC32-NEXT: stfd f31, 56(r1) # 8-byte Folded Spill
+; PPC32-NEXT: fmr f31, f4
+; PPC32-NEXT: bl __truncdfhf2
+; PPC32-NEXT: fmr f1, f29
+; PPC32-NEXT: mr r29, r3
+; PPC32-NEXT: bl __truncdfhf2
+; PPC32-NEXT: fmr f1, f30
+; PPC32-NEXT: mr r28, r3
+; PPC32-NEXT: bl __truncdfhf2
+; PPC32-NEXT: fmr f1, f31
+; PPC32-NEXT: mr r27, r3
+; PPC32-NEXT: bl __truncdfhf2
+; PPC32-NEXT: sth r27, 4(r30)
+; PPC32-NEXT: sth r28, 2(r30)
+; PPC32-NEXT: sth r3, 6(r30)
+; PPC32-NEXT: sth r29, 0(r30)
+; PPC32-NEXT: lfd f31, 56(r1) # 8-byte Folded Reload
+; PPC32-NEXT: lfd f30, 48(r1) # 8-byte Folded Reload
+; PPC32-NEXT: lfd f29, 40(r1) # 8-byte Folded Reload
+; PPC32-NEXT: lwz r30, 32(r1) # 4-byte Folded Reload
+; PPC32-NEXT: lwz r29, 28(r1) # 4-byte Folded Reload
+; PPC32-NEXT: lwz r28, 24(r1) # 4-byte Folded Reload
+; PPC32-NEXT: lwz r27, 20(r1) # 4-byte Folded Reload
+; PPC32-NEXT: lwz r0, 68(r1)
+; PPC32-NEXT: addi r1, r1, 64
+; PPC32-NEXT: mtlr r0
+; PPC32-NEXT: blr
+;
+; P8-LABEL: test_trunc64_vec4:
+; P8: # %bb.0:
+; P8-NEXT: mflr r0
+; P8-NEXT: stdu r1, -128(r1)
+; P8-NEXT: li r3, 48
+; P8-NEXT: std r0, 144(r1)
+; P8-NEXT: xxswapd vs1, vs34
+; P8-NEXT: std r27, 88(r1) # 8-byte Folded Spill
+; P8-NEXT: std r28, 96(r1) # 8-byte Folded Spill
+; P8-NEXT: std r29, 104(r1) # 8-byte Folded Spill
+; P8-NEXT: std r30, 112(r1) # 8-byte Folded Spill
+; P8-NEXT: mr r30, r7
+; P8-NEXT: stxvd2x vs62, r1, r3 # 16-byte Folded Spill
+; P8-NEXT: li r3, 64
+; P8-NEXT: vmr v30, v2
+; P8-NEXT: stxvd2x vs63, r1, r3 # 16-byte Folded Spill
+; P8-NEXT: vmr v31, v3
+; P8-NEXT: bl __truncdfhf2
+; P8-NEXT: nop
+; P8-NEXT: xxswapd vs1, vs63
+; P8-NEXT: mr r29, r3
+; P8-NEXT: bl __truncdfhf2
+; P8-NEXT: nop
+; P8-NEXT: xxlor f1, vs62, vs62
+; P8-NEXT: mr r28, r3
+; P8-NEXT: bl __truncdfhf2
+; P8-NEXT: nop
+; P8-NEXT: xxlor f1, vs63, vs63
+; P8-NEXT: mr r27, r3
+; P8-NEXT: bl __truncdfhf2
+; P8-NEXT: nop
+; P8-NEXT: sth r3, 6(r30)
+; P8-NEXT: li r3, 64
+; P8-NEXT: sth r27, 2(r30)
+; P8-NEXT: ld r27, 88(r1) # 8-byte Folded Reload
+; P8-NEXT: sth r28, 4(r30)
+; P8-NEXT: sth r29, 0(r30)
+; P8-NEXT: ld r30, 112(r1) # 8-byte Folded Reload
+; P8-NEXT: ld r29, 104(r1) # 8-byte Folded Reload
+; P8-NEXT: lxvd2x vs63, r1, r3 # 16-byte Folded Reload
+; P8-NEXT: li r3, 48
+; P8-NEXT: ld r28, 96(r1) # 8-byte Folded Reload
+; P8-NEXT: lxvd2x vs62, r1, r3 # 16-byte Folded Reload
+; P8-NEXT: addi r1, r1, 128
+; P8-NEXT: ld r0, 16(r1)
+; P8-NEXT: mtlr r0
+; P8-NEXT: blr
+;
+; CHECK-LABEL: test_trunc64_vec4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xxswapd vs0, vs34
+; CHECK-NEXT: xscvdphp f0, f0
+; CHECK-NEXT: mffprwz r3, f0
+; CHECK-NEXT: xxswapd vs0, vs35
+; CHECK-NEXT: xscvdphp f0, f0
+; CHECK-NEXT: xscvdphp f1, vs34
+; CHECK-NEXT: mffprwz r4, f1
+; CHECK-NEXT: xscvdphp f1, vs35
+; CHECK-NEXT: sth r3, 0(r7)
+; CHECK-NEXT: sth r4, 2(r7)
+; CHECK-NEXT: mffprwz r4, f0
+; CHECK-NEXT: sth r4, 4(r7)
+; CHECK-NEXT: mffprwz r5, f1
+; CHECK-NEXT: sth r5, 6(r7)
+; CHECK-NEXT: blr
+;
+; SOFT-LABEL: test_trunc64_vec4:
+; SOFT: # %bb.0:
+; SOFT-NEXT: mflr r0
+; SOFT-NEXT: std r26, -48(r1) # 8-byte Folded Spill
+; SOFT-NEXT: std r27, -40(r1) # 8-byte Folded Spill
+; SOFT-NEXT: std r28, -32(r1) # 8-byte Folded Spill
+; SOFT-NEXT: std r29, -24(r1) # 8-byte Folded Spill
+; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill
+; SOFT-NEXT: stdu r1, -80(r1)
+; SOFT-NEXT: mr r27, r3
+; SOFT-NEXT: mr r3, r6
+; SOFT-NEXT: std r0, 96(r1)
+; SOFT-NEXT: mr r30, r7
+; SOFT-NEXT: mr r29, r5
+; SOFT-NEXT: mr r28, r4
+; SOFT-NEXT: bl __truncdfhf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: mr r26, r3
+; SOFT-NEXT: mr r3, r29
+; SOFT-NEXT: bl __truncdfhf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: mr r29, r3
+; SOFT-NEXT: mr r3, r28
+; SOFT-NEXT: bl __truncdfhf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: mr r28, r3
+; SOFT-NEXT: mr r3, r27
+; SOFT-NEXT: bl __truncdfhf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: clrldi r3, r3, 48
+; SOFT-NEXT: bl __extendhfsf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: mr r27, r3
+; SOFT-NEXT: clrldi r3, r28, 48
+; SOFT-NEXT: bl __extendhfsf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: mr r28, r3
+; SOFT-NEXT: clrldi r3, r29, 48
+; SOFT-NEXT: bl __extendhfsf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: mr r29, r3
+; SOFT-NEXT: clrldi r3, r26, 48
+; SOFT-NEXT: bl __extendhfsf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: bl __truncsfhf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: sth r3, 6(r30)
+; SOFT-NEXT: mr r3, r29
+; SOFT-NEXT: bl __truncsfhf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: sth r3, 4(r30)
+; SOFT-NEXT: mr r3, r28
+; SOFT-NEXT: bl __truncsfhf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: sth r3, 2(r30)
+; SOFT-NEXT: mr r3, r27
+; SOFT-NEXT: bl __truncsfhf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: sth r3, 0(r30)
+; SOFT-NEXT: addi r1, r1, 80
+; SOFT-NEXT: ld r0, 16(r1)
+; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
+; SOFT-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
+; SOFT-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
+; SOFT-NEXT: mtlr r0
+; SOFT-NEXT: ld r27, -40(r1) # 8-byte Folded Reload
+; SOFT-NEXT: ld r26, -48(r1) # 8-byte Folded Reload
+; SOFT-NEXT: blr
+;
+; BE-LABEL: test_trunc64_vec4:
+; BE: # %bb.0:
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -176(r1)
+; BE-NEXT: std r0, 192(r1)
+; BE-NEXT: std r27, 112(r1) # 8-byte Folded Spill
+; BE-NEXT: std r28, 120(r1) # 8-byte Folded Spill
+; BE-NEXT: std r29, 128(r1) # 8-byte Folded Spill
+; BE-NEXT: std r30, 136(r1) # 8-byte Folded Spill
+; BE-NEXT: mr r30, r7
+; BE-NEXT: stfd f29, 152(r1) # 8-byte Folded Spill
+; BE-NEXT: fmr f29, f2
+; BE-NEXT: stfd f30, 160(r1) # 8-byte Folded Spill
+; BE-NEXT: fmr f30, f3
+; BE-NEXT: stfd f31, 168(r1) # 8-byte Folded Spill
+; BE-NEXT: fmr f31, f4
+; BE-NEXT: bl __truncdfhf2
+; BE-NEXT: nop
+; BE-NEXT: fmr f1, f29
+; BE-NEXT: mr r29, r3
+; BE-NEXT: bl __truncdfhf2
+; BE-NEXT: nop
+; BE-NEXT: fmr f1, f30
+; BE-NEXT: mr r28, r3
+; BE-NEXT: bl __truncdfhf2
+; BE-NEXT: nop
+; BE-NEXT: fmr f1, f31
+; BE-NEXT: mr r27, r3
+; BE-NEXT: bl __truncdfhf2
+; BE-NEXT: nop
+; BE-NEXT: sth r27, 4(r30)
+; BE-NEXT: sth r28, 2(r30)
+; BE-NEXT: sth r3, 6(r30)
+; BE-NEXT: sth r29, 0(r30)
+; BE-NEXT: lfd f31, 168(r1) # 8-byte Folded Reload
+; BE-NEXT: lfd f30, 160(r1) # 8-byte Folded Reload
+; BE-NEXT: lfd f29, 152(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r30, 136(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r29, 128(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r28, 120(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r27, 112(r1) # 8-byte Folded Reload
+; BE-NEXT: addi r1, r1, 176
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+ %v = fptrunc <4 x double> %a to <4 x half>
+ store <4 x half> %v, ptr %p
+ ret void
+}
+define float @test_sitofp_fadd_i32(i32 %a, ptr %b) nounwind {
+; PPC32-LABEL: test_sitofp_fadd_i32:
+; PPC32: # %bb.0:
+; PPC32-NEXT: mflr r0
+; PPC32-NEXT: stwu r1, -32(r1)
+; PPC32-NEXT: stw r0, 36(r1)
+; PPC32-NEXT: stw r30, 16(r1) # 4-byte Folded Spill
+; PPC32-NEXT: mr r30, r3
+; PPC32-NEXT: lhz r3, 0(r4)
+; PPC32-NEXT: stfd f31, 24(r1) # 8-byte Folded Spill
+; PPC32-NEXT: bl __extendhfsf2
+; PPC32-NEXT: lis r3, 17200
+; PPC32-NEXT: stw r3, 8(r1)
+; PPC32-NEXT: xoris r3, r30, 32768
+; PPC32-NEXT: stw r3, 12(r1)
+; PPC32-NEXT: lis r3, .LCPI23_0 at ha
+; PPC32-NEXT: fmr f31, f1
+; PPC32-NEXT: lfd f0, 8(r1)
+; PPC32-NEXT: lfs f1, .LCPI23_0 at l(r3)
+; PPC32-NEXT: fsub f0, f0, f1
+; PPC32-NEXT: frsp f1, f0
+; PPC32-NEXT: bl __truncsfhf2
+; PPC32-NEXT: clrlwi r3, r3, 16
+; PPC32-NEXT: bl __extendhfsf2
+; PPC32-NEXT: fadds f1, f31, f1
+; PPC32-NEXT: lfd f31, 24(r1) # 8-byte Folded Reload
+; PPC32-NEXT: lwz r30, 16(r1) # 4-byte Folded Reload
+; PPC32-NEXT: lwz r0, 36(r1)
+; PPC32-NEXT: addi r1, r1, 32
+; PPC32-NEXT: mtlr r0
+; PPC32-NEXT: blr
+;
+; P8-LABEL: test_sitofp_fadd_i32:
+; P8: # %bb.0:
+; P8-NEXT: mflr r0
+; P8-NEXT: std r30, -24(r1) # 8-byte Folded Spill
+; P8-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
+; P8-NEXT: stdu r1, -64(r1)
+; P8-NEXT: std r0, 80(r1)
+; P8-NEXT: mr r30, r3
+; P8-NEXT: lhz r3, 0(r4)
+; P8-NEXT: bl __extendhfsf2
+; P8-NEXT: nop
+; P8-NEXT: mtfprwa f0, r30
+; P8-NEXT: fmr f31, f1
+; P8-NEXT: xscvsxdsp f1, f0
+; P8-NEXT: bl __truncsfhf2
+; P8-NEXT: nop
+; P8-NEXT: clrldi r3, r3, 48
+; P8-NEXT: bl __extendhfsf2
+; P8-NEXT: nop
+; P8-NEXT: xsaddsp f1, f31, f1
+; P8-NEXT: addi r1, r1, 64
+; P8-NEXT: ld r0, 16(r1)
+; P8-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
+; P8-NEXT: ld r30, -24(r1) # 8-byte Folded Reload
+; P8-NEXT: mtlr r0
+; P8-NEXT: blr
+;
+; CHECK-LABEL: test_sitofp_fadd_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mtfprwa f1, r3
+; CHECK-NEXT: lhz r4, 0(r4)
+; CHECK-NEXT: xscvsxdsp f1, f1
+; CHECK-NEXT: mtfprwz f0, r4
+; CHECK-NEXT: xscvhpdp f0, f0
+; CHECK-NEXT: xscvdphp f1, f1
+; CHECK-NEXT: mffprwz r3, f1
+; CHECK-NEXT: clrlwi r3, r3, 16
+; CHECK-NEXT: mtfprwz f1, r3
+; CHECK-NEXT: xscvhpdp f1, f1
+; CHECK-NEXT: xsaddsp f1, f0, f1
+; CHECK-NEXT: blr
+;
+; SOFT-LABEL: test_sitofp_fadd_i32:
+; SOFT: # %bb.0:
+; SOFT-NEXT: mflr r0
+; SOFT-NEXT: std r29, -24(r1) # 8-byte Folded Spill
+; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill
+; SOFT-NEXT: stdu r1, -64(r1)
+; SOFT-NEXT: std r0, 80(r1)
+; SOFT-NEXT: mr r30, r3
+; SOFT-NEXT: lhz r3, 0(r4)
+; SOFT-NEXT: bl __extendhfsf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: mr r29, r3
+; SOFT-NEXT: extsw r3, r30
+; SOFT-NEXT: bl __floatsisf
+; SOFT-NEXT: nop
+; SOFT-NEXT: clrldi r3, r3, 32
+; SOFT-NEXT: bl __truncsfhf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: clrldi r3, r3, 48
+; SOFT-NEXT: bl __extendhfsf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: mr r4, r3
+; SOFT-NEXT: mr r3, r29
+; SOFT-NEXT: bl __addsf3
+; SOFT-NEXT: nop
+; SOFT-NEXT: addi r1, r1, 64
+; SOFT-NEXT: ld r0, 16(r1)
+; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
+; SOFT-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
+; SOFT-NEXT: mtlr r0
+; SOFT-NEXT: blr
+;
+; BE-LABEL: test_sitofp_fadd_i32:
+; BE: # %bb.0:
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -144(r1)
+; BE-NEXT: std r0, 160(r1)
+; BE-NEXT: std r30, 120(r1) # 8-byte Folded Spill
+; BE-NEXT: mr r30, r3
+; BE-NEXT: lhz r3, 0(r4)
+; BE-NEXT: stfd f31, 136(r1) # 8-byte Folded Spill
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
+; BE-NEXT: extsw r3, r30
+; BE-NEXT: fmr f31, f1
+; BE-NEXT: std r3, 112(r1)
+; BE-NEXT: lfd f0, 112(r1)
+; BE-NEXT: fcfid f0, f0
+; BE-NEXT: frsp f1, f0
+; BE-NEXT: bl __truncsfhf2
+; BE-NEXT: nop
+; BE-NEXT: clrldi r3, r3, 48
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
+; BE-NEXT: fadds f1, f31, f1
+; BE-NEXT: lfd f31, 136(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r30, 120(r1) # 8-byte Folded Reload
+; BE-NEXT: addi r1, r1, 144
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+ %tmp0 = load half, ptr %b
+ %tmp1 = sitofp i32 %a to half
+ %tmp2 = fadd half %tmp0, %tmp1
+ %tmp3 = fpext half %tmp2 to float
+ ret float %tmp3
+}
+define half @PR40273(half) nounwind {
+; PPC32-LABEL: PR40273:
+; PPC32: # %bb.0:
+; PPC32-NEXT: mflr r0
+; PPC32-NEXT: stwu r1, -16(r1)
+; PPC32-NEXT: stw r0, 20(r1)
+; PPC32-NEXT: bl __truncsfhf2
+; PPC32-NEXT: clrlwi r3, r3, 16
+; PPC32-NEXT: bl __extendhfsf2
+; PPC32-NEXT: lis r3, .LCPI24_0 at ha
+; PPC32-NEXT: lfs f0, .LCPI24_0 at l(r3)
+; PPC32-NEXT: li r3, 0
+; PPC32-NEXT: fcmpu cr0, f1, f0
+; PPC32-NEXT: bc 12, eq, .LBB24_2
+; PPC32-NEXT: # %bb.1:
+; PPC32-NEXT: li r3, 4
+; PPC32-NEXT: .LBB24_2:
+; PPC32-NEXT: li r4, .LCPI24_1 at l
+; PPC32-NEXT: addis r4, r4, .LCPI24_1 at ha
+; PPC32-NEXT: lfsx f1, r4, r3
+; PPC32-NEXT: lwz r0, 20(r1)
+; PPC32-NEXT: addi r1, r1, 16
+; PPC32-NEXT: mtlr r0
+; PPC32-NEXT: blr
+;
+; P8-LABEL: PR40273:
+; P8: # %bb.0:
+; P8-NEXT: mflr r0
+; P8-NEXT: stdu r1, -32(r1)
+; P8-NEXT: std r0, 48(r1)
+; P8-NEXT: bl __truncsfhf2
+; P8-NEXT: nop
+; P8-NEXT: clrldi r3, r3, 48
+; P8-NEXT: bl __extendhfsf2
+; P8-NEXT: nop
+; P8-NEXT: fmr f0, f1
+; P8-NEXT: xxlxor f1, f1, f1
+; P8-NEXT: fcmpu cr0, f0, f1
+; P8-NEXT: beq cr0, .LBB24_2
+; P8-NEXT: # %bb.1:
+; P8-NEXT: vspltisw v2, 1
+; P8-NEXT: xvcvsxwdp vs1, vs34
+; P8-NEXT: .LBB24_2:
+; P8-NEXT: addi r1, r1, 32
+; P8-NEXT: ld r0, 16(r1)
+; P8-NEXT: mtlr r0
+; P8-NEXT: blr
+;
+; CHECK-LABEL: PR40273:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xscvdphp f0, f1
+; CHECK-NEXT: xxlxor f1, f1, f1
+; CHECK-NEXT: mffprwz r3, f0
+; CHECK-NEXT: clrlwi r3, r3, 16
+; CHECK-NEXT: mtfprwz f0, r3
+; CHECK-NEXT: xscvhpdp f0, f0
+; CHECK-NEXT: fcmpu cr0, f0, f1
+; CHECK-NEXT: beqlr cr0
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: vspltisw v2, 1
+; CHECK-NEXT: xvcvsxwdp vs1, vs34
+; CHECK-NEXT: blr
+;
+; SOFT-LABEL: PR40273:
+; SOFT: # %bb.0:
+; SOFT-NEXT: mflr r0
+; SOFT-NEXT: stdu r1, -32(r1)
+; SOFT-NEXT: clrldi r3, r3, 48
+; SOFT-NEXT: std r0, 48(r1)
+; SOFT-NEXT: bl __extendhfsf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: li r4, 0
+; SOFT-NEXT: bl __nesf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: cmplwi r3, 0
+; SOFT-NEXT: lis r3, 16256
+; SOFT-NEXT: iseleq r3, 0, r3
+; SOFT-NEXT: bl __truncsfhf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: addi r1, r1, 32
+; SOFT-NEXT: ld r0, 16(r1)
+; SOFT-NEXT: mtlr r0
+; SOFT-NEXT: blr
+;
+; BE-LABEL: PR40273:
+; BE: # %bb.0:
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -112(r1)
+; BE-NEXT: std r0, 128(r1)
+; BE-NEXT: bl __truncsfhf2
+; BE-NEXT: nop
+; BE-NEXT: clrldi r3, r3, 48
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
+; BE-NEXT: addis r3, r2, .LCPI24_0 at toc@ha
+; BE-NEXT: lfs f0, .LCPI24_0 at toc@l(r3)
+; BE-NEXT: li r3, 0
+; BE-NEXT: fcmpu cr0, f1, f0
+; BE-NEXT: bc 12, eq, .LBB24_2
+; BE-NEXT: # %bb.1:
+; BE-NEXT: li r3, 4
+; BE-NEXT: .LBB24_2:
+; BE-NEXT: addis r4, r2, .LCPI24_1 at toc@ha
+; BE-NEXT: addi r4, r4, .LCPI24_1 at toc@l
+; BE-NEXT: lfsx f1, r4, r3
+; BE-NEXT: addi r1, r1, 112
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+ %2 = fcmp une half %0, 0xH0000
+ %3 = uitofp i1 %2 to half
+ ret half %3
+}
+
+; Trivial operations shouldn't need a libcall
+
+define half @fabs(half %x) nounwind {
+; PPC32-LABEL: fabs:
+; PPC32: # %bb.0:
+; PPC32-NEXT: mflr r0
+; PPC32-NEXT: stwu r1, -16(r1)
+; PPC32-NEXT: stw r0, 20(r1)
+; PPC32-NEXT: bl __truncsfhf2
+; PPC32-NEXT: clrlwi r3, r3, 16
+; PPC32-NEXT: bl __extendhfsf2
+; PPC32-NEXT: fabs f1, f1
+; PPC32-NEXT: lwz r0, 20(r1)
+; PPC32-NEXT: addi r1, r1, 16
+; PPC32-NEXT: mtlr r0
+; PPC32-NEXT: blr
+;
+; P8-LABEL: fabs:
+; P8: # %bb.0:
+; P8-NEXT: mflr r0
+; P8-NEXT: stdu r1, -32(r1)
+; P8-NEXT: std r0, 48(r1)
+; P8-NEXT: bl __truncsfhf2
+; P8-NEXT: nop
+; P8-NEXT: clrldi r3, r3, 48
+; P8-NEXT: bl __extendhfsf2
+; P8-NEXT: nop
+; P8-NEXT: xsabsdp f1, f1
+; P8-NEXT: addi r1, r1, 32
+; P8-NEXT: ld r0, 16(r1)
+; P8-NEXT: mtlr r0
+; P8-NEXT: blr
+;
+; CHECK-LABEL: fabs:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xscvdphp f0, f1
+; CHECK-NEXT: mffprwz r3, f0
+; CHECK-NEXT: clrlwi r3, r3, 16
+; CHECK-NEXT: mtfprwz f0, r3
+; CHECK-NEXT: xscvhpdp f0, f0
+; CHECK-NEXT: xsabsdp f1, f0
+; CHECK-NEXT: blr
+;
+; SOFT-LABEL: fabs:
+; SOFT: # %bb.0:
+; SOFT-NEXT: clrldi r3, r3, 49
+; SOFT-NEXT: blr
+;
+; BE-LABEL: fabs:
+; BE: # %bb.0:
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -112(r1)
+; BE-NEXT: std r0, 128(r1)
+; BE-NEXT: bl __truncsfhf2
+; BE-NEXT: nop
+; BE-NEXT: clrldi r3, r3, 48
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
+; BE-NEXT: fabs f1, f1
+; BE-NEXT: addi r1, r1, 112
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+ %a = call half @llvm.fabs.f16(half %x)
+ ret half %a
+}
+
+define half @fcopysign(half %x, half %y) nounwind {
+; PPC32-LABEL: fcopysign:
+; PPC32: # %bb.0:
+; PPC32-NEXT: mflr r0
+; PPC32-NEXT: stwu r1, -32(r1)
+; PPC32-NEXT: stw r0, 36(r1)
+; PPC32-NEXT: stfd f31, 24(r1) # 8-byte Folded Spill
+; PPC32-NEXT: fmr f31, f2
+; PPC32-NEXT: bl __truncsfhf2
+; PPC32-NEXT: clrlwi r3, r3, 16
+; PPC32-NEXT: bl __extendhfsf2
+; PPC32-NEXT: stfs f31, 20(r1)
+; PPC32-NEXT: lwz r3, 20(r1)
+; PPC32-NEXT: srwi r3, r3, 31
+; PPC32-NEXT: andi. r3, r3, 1
+; PPC32-NEXT: bc 12, gt, .LBB26_2
+; PPC32-NEXT: # %bb.1:
+; PPC32-NEXT: fabs f1, f1
+; PPC32-NEXT: b .LBB26_3
+; PPC32-NEXT: .LBB26_2:
+; PPC32-NEXT: fnabs f1, f1
+; PPC32-NEXT: .LBB26_3:
+; PPC32-NEXT: lfd f31, 24(r1) # 8-byte Folded Reload
+; PPC32-NEXT: lwz r0, 36(r1)
+; PPC32-NEXT: addi r1, r1, 32
+; PPC32-NEXT: mtlr r0
+; PPC32-NEXT: blr
+;
+; P8-LABEL: fcopysign:
+; P8: # %bb.0:
+; P8-NEXT: mflr r0
+; P8-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
+; P8-NEXT: stdu r1, -48(r1)
+; P8-NEXT: std r0, 64(r1)
+; P8-NEXT: fmr f31, f2
+; P8-NEXT: bl __truncsfhf2
+; P8-NEXT: nop
+; P8-NEXT: clrldi r3, r3, 48
+; P8-NEXT: bl __extendhfsf2
+; P8-NEXT: nop
+; P8-NEXT: fcpsgn f1, f31, f1
+; P8-NEXT: addi r1, r1, 48
+; P8-NEXT: ld r0, 16(r1)
+; P8-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
+; P8-NEXT: mtlr r0
+; P8-NEXT: blr
+;
+; CHECK-LABEL: fcopysign:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xscvdphp f0, f1
+; CHECK-NEXT: mffprwz r3, f0
+; CHECK-NEXT: clrlwi r3, r3, 16
+; CHECK-NEXT: mtfprwz f0, r3
+; CHECK-NEXT: xscvhpdp f0, f0
+; CHECK-NEXT: fcpsgn f1, f2, f0
+; CHECK-NEXT: blr
+;
+; SOFT-LABEL: fcopysign:
+; SOFT: # %bb.0:
+; SOFT-NEXT: mflr r0
+; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill
+; SOFT-NEXT: stdu r1, -48(r1)
+; SOFT-NEXT: clrldi r3, r3, 48
+; SOFT-NEXT: std r0, 64(r1)
+; SOFT-NEXT: mr r30, r4
+; SOFT-NEXT: bl __extendhfsf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: rlwimi r3, r30, 16, 0, 0
+; SOFT-NEXT: clrldi r3, r3, 32
+; SOFT-NEXT: bl __truncsfhf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: addi r1, r1, 48
+; SOFT-NEXT: ld r0, 16(r1)
+; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
+; SOFT-NEXT: mtlr r0
+; SOFT-NEXT: blr
+;
+; BE-LABEL: fcopysign:
+; BE: # %bb.0:
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -128(r1)
+; BE-NEXT: std r0, 144(r1)
+; BE-NEXT: stfd f31, 120(r1) # 8-byte Folded Spill
+; BE-NEXT: fmr f31, f2
+; BE-NEXT: bl __truncsfhf2
+; BE-NEXT: nop
+; BE-NEXT: clrldi r3, r3, 48
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
+; BE-NEXT: stfs f31, 116(r1)
+; BE-NEXT: lwz r3, 116(r1)
+; BE-NEXT: srwi r3, r3, 31
+; BE-NEXT: andi. r3, r3, 1
+; BE-NEXT: bc 12, gt, .LBB26_2
+; BE-NEXT: # %bb.1:
+; BE-NEXT: fabs f1, f1
+; BE-NEXT: b .LBB26_3
+; BE-NEXT: .LBB26_2:
+; BE-NEXT: fnabs f1, f1
+; BE-NEXT: .LBB26_3:
+; BE-NEXT: lfd f31, 120(r1) # 8-byte Folded Reload
+; BE-NEXT: addi r1, r1, 128
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+ %a = call half @llvm.copysign.f16(half %x, half %y)
+ ret half %a
+}
diff --git a/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll b/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll
deleted file mode 100644
index 50f05cca80458..0000000000000
--- a/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll
+++ /dev/null
@@ -1,1281 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown \
-; RUN: -verify-machineinstrs -ppc-asm-full-reg-names < %s | FileCheck %s \
-; RUN: --check-prefix=P8
-; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \
-; RUN: -verify-machineinstrs -ppc-asm-full-reg-names < %s | FileCheck %s
-; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown -mattr=-hard-float \
-; RUN: -verify-machineinstrs -ppc-asm-full-reg-names < %s | FileCheck %s \
-; RUN: --check-prefix=SOFT
-
-; Tests for various operations on half precison float. Much of the test is
-; copied from test/CodeGen/X86/half.ll.
-define dso_local double @loadd(ptr nocapture readonly %a) local_unnamed_addr #0 {
-; P8-LABEL: loadd:
-; P8: # %bb.0: # %entry
-; P8-NEXT: mflr r0
-; P8-NEXT: stdu r1, -32(r1)
-; P8-NEXT: std r0, 48(r1)
-; P8-NEXT: lhz r3, 2(r3)
-; P8-NEXT: bl __extendhfsf2
-; P8-NEXT: nop
-; P8-NEXT: addi r1, r1, 32
-; P8-NEXT: ld r0, 16(r1)
-; P8-NEXT: mtlr r0
-; P8-NEXT: blr
-;
-; CHECK-LABEL: loadd:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addi r3, r3, 2
-; CHECK-NEXT: lxsihzx f0, 0, r3
-; CHECK-NEXT: xscvhpdp f1, f0
-; CHECK-NEXT: blr
-;
-; SOFT-LABEL: loadd:
-; SOFT: # %bb.0: # %entry
-; SOFT-NEXT: mflr r0
-; SOFT-NEXT: stdu r1, -32(r1)
-; SOFT-NEXT: std r0, 48(r1)
-; SOFT-NEXT: lhz r3, 2(r3)
-; SOFT-NEXT: bl __extendhfsf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: bl __extendsfdf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: addi r1, r1, 32
-; SOFT-NEXT: ld r0, 16(r1)
-; SOFT-NEXT: mtlr r0
-; SOFT-NEXT: blr
-entry:
- %arrayidx = getelementptr inbounds i16, ptr %a, i64 1
- %0 = load i16, ptr %arrayidx, align 2
- %1 = tail call double @llvm.convert.from.fp16.f64(i16 %0)
- ret double %1
-}
-
-declare double @llvm.convert.from.fp16.f64(i16)
-
-define dso_local float @loadf(ptr nocapture readonly %a) local_unnamed_addr #0 {
-; P8-LABEL: loadf:
-; P8: # %bb.0: # %entry
-; P8-NEXT: mflr r0
-; P8-NEXT: stdu r1, -32(r1)
-; P8-NEXT: std r0, 48(r1)
-; P8-NEXT: lhz r3, 2(r3)
-; P8-NEXT: bl __extendhfsf2
-; P8-NEXT: nop
-; P8-NEXT: addi r1, r1, 32
-; P8-NEXT: ld r0, 16(r1)
-; P8-NEXT: mtlr r0
-; P8-NEXT: blr
-;
-; CHECK-LABEL: loadf:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addi r3, r3, 2
-; CHECK-NEXT: lxsihzx f0, 0, r3
-; CHECK-NEXT: xscvhpdp f1, f0
-; CHECK-NEXT: blr
-;
-; SOFT-LABEL: loadf:
-; SOFT: # %bb.0: # %entry
-; SOFT-NEXT: mflr r0
-; SOFT-NEXT: stdu r1, -32(r1)
-; SOFT-NEXT: std r0, 48(r1)
-; SOFT-NEXT: lhz r3, 2(r3)
-; SOFT-NEXT: bl __extendhfsf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: addi r1, r1, 32
-; SOFT-NEXT: ld r0, 16(r1)
-; SOFT-NEXT: mtlr r0
-; SOFT-NEXT: blr
-entry:
- %arrayidx = getelementptr inbounds i16, ptr %a, i64 1
- %0 = load i16, ptr %arrayidx, align 2
- %1 = tail call float @llvm.convert.from.fp16.f32(i16 %0)
- ret float %1
-}
-
-declare float @llvm.convert.from.fp16.f32(i16)
-
-define dso_local void @stored(ptr nocapture %a, double %b) local_unnamed_addr #0 {
-; P8-LABEL: stored:
-; P8: # %bb.0: # %entry
-; P8-NEXT: mflr r0
-; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
-; P8-NEXT: stdu r1, -48(r1)
-; P8-NEXT: std r0, 64(r1)
-; P8-NEXT: mr r30, r3
-; P8-NEXT: bl __truncdfhf2
-; P8-NEXT: nop
-; P8-NEXT: sth r3, 0(r30)
-; P8-NEXT: addi r1, r1, 48
-; P8-NEXT: ld r0, 16(r1)
-; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
-; P8-NEXT: mtlr r0
-; P8-NEXT: blr
-;
-; CHECK-LABEL: stored:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xscvdphp f0, f1
-; CHECK-NEXT: stxsihx f0, 0, r3
-; CHECK-NEXT: blr
-;
-; SOFT-LABEL: stored:
-; SOFT: # %bb.0: # %entry
-; SOFT-NEXT: mflr r0
-; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill
-; SOFT-NEXT: stdu r1, -48(r1)
-; SOFT-NEXT: mr r30, r3
-; SOFT-NEXT: mr r3, r4
-; SOFT-NEXT: std r0, 64(r1)
-; SOFT-NEXT: bl __truncdfhf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: clrldi r3, r3, 48
-; SOFT-NEXT: bl __extendhfsf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: bl __truncsfhf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: sth r3, 0(r30)
-; SOFT-NEXT: addi r1, r1, 48
-; SOFT-NEXT: ld r0, 16(r1)
-; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
-; SOFT-NEXT: mtlr r0
-; SOFT-NEXT: blr
-entry:
- %0 = tail call i16 @llvm.convert.to.fp16.f64(double %b)
- store i16 %0, ptr %a, align 2
- ret void
-}
-
-declare i16 @llvm.convert.to.fp16.f64(double)
-
-define dso_local void @storef(ptr nocapture %a, float %b) local_unnamed_addr #0 {
-; P8-LABEL: storef:
-; P8: # %bb.0: # %entry
-; P8-NEXT: mflr r0
-; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
-; P8-NEXT: stdu r1, -48(r1)
-; P8-NEXT: std r0, 64(r1)
-; P8-NEXT: mr r30, r3
-; P8-NEXT: bl __truncsfhf2
-; P8-NEXT: nop
-; P8-NEXT: sth r3, 0(r30)
-; P8-NEXT: addi r1, r1, 48
-; P8-NEXT: ld r0, 16(r1)
-; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
-; P8-NEXT: mtlr r0
-; P8-NEXT: blr
-;
-; CHECK-LABEL: storef:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xscvdphp f0, f1
-; CHECK-NEXT: stxsihx f0, 0, r3
-; CHECK-NEXT: blr
-;
-; SOFT-LABEL: storef:
-; SOFT: # %bb.0: # %entry
-; SOFT-NEXT: mflr r0
-; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill
-; SOFT-NEXT: stdu r1, -48(r1)
-; SOFT-NEXT: mr r30, r3
-; SOFT-NEXT: clrldi r3, r4, 32
-; SOFT-NEXT: std r0, 64(r1)
-; SOFT-NEXT: bl __truncsfhf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: clrldi r3, r3, 48
-; SOFT-NEXT: bl __extendhfsf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: bl __truncsfhf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: sth r3, 0(r30)
-; SOFT-NEXT: addi r1, r1, 48
-; SOFT-NEXT: ld r0, 16(r1)
-; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
-; SOFT-NEXT: mtlr r0
-; SOFT-NEXT: blr
-entry:
- %0 = tail call i16 @llvm.convert.to.fp16.f32(float %b)
- store i16 %0, ptr %a, align 2
- ret void
-}
-
-declare i16 @llvm.convert.to.fp16.f32(float)
-define void @test_load_store(ptr %in, ptr %out) #0 {
-; P8-LABEL: test_load_store:
-; P8: # %bb.0:
-; P8-NEXT: lhz r3, 0(r3)
-; P8-NEXT: sth r3, 0(r4)
-; P8-NEXT: blr
-;
-; CHECK-LABEL: test_load_store:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lhz r3, 0(r3)
-; CHECK-NEXT: sth r3, 0(r4)
-; CHECK-NEXT: blr
-;
-; SOFT-LABEL: test_load_store:
-; SOFT: # %bb.0:
-; SOFT-NEXT: mflr r0
-; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill
-; SOFT-NEXT: stdu r1, -48(r1)
-; SOFT-NEXT: std r0, 64(r1)
-; SOFT-NEXT: mr r30, r4
-; SOFT-NEXT: lhz r3, 0(r3)
-; SOFT-NEXT: bl __extendhfsf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: bl __truncsfhf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: sth r3, 0(r30)
-; SOFT-NEXT: addi r1, r1, 48
-; SOFT-NEXT: ld r0, 16(r1)
-; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
-; SOFT-NEXT: mtlr r0
-; SOFT-NEXT: blr
- %val = load half, ptr %in
- store half %val, ptr %out
- ret void
-}
-define i16 @test_bitcast_from_half(ptr %addr) #0 {
-; P8-LABEL: test_bitcast_from_half:
-; P8: # %bb.0:
-; P8-NEXT: lhz r3, 0(r3)
-; P8-NEXT: blr
-;
-; CHECK-LABEL: test_bitcast_from_half:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lhz r3, 0(r3)
-; CHECK-NEXT: blr
-;
-; SOFT-LABEL: test_bitcast_from_half:
-; SOFT: # %bb.0:
-; SOFT-NEXT: lhz r3, 0(r3)
-; SOFT-NEXT: blr
- %val = load half, ptr %addr
- %val_int = bitcast half %val to i16
- ret i16 %val_int
-}
-define void @test_bitcast_to_half(ptr %addr, i16 %in) #0 {
-; P8-LABEL: test_bitcast_to_half:
-; P8: # %bb.0:
-; P8-NEXT: sth r4, 0(r3)
-; P8-NEXT: blr
-;
-; CHECK-LABEL: test_bitcast_to_half:
-; CHECK: # %bb.0:
-; CHECK-NEXT: sth r4, 0(r3)
-; CHECK-NEXT: blr
-;
-; SOFT-LABEL: test_bitcast_to_half:
-; SOFT: # %bb.0:
-; SOFT-NEXT: sth r4, 0(r3)
-; SOFT-NEXT: blr
- %val_fp = bitcast i16 %in to half
- store half %val_fp, ptr %addr
- ret void
-}
-define float @test_extend32(ptr %addr) #0 {
-; P8-LABEL: test_extend32:
-; P8: # %bb.0:
-; P8-NEXT: mflr r0
-; P8-NEXT: stdu r1, -32(r1)
-; P8-NEXT: std r0, 48(r1)
-; P8-NEXT: lhz r3, 0(r3)
-; P8-NEXT: bl __extendhfsf2
-; P8-NEXT: nop
-; P8-NEXT: addi r1, r1, 32
-; P8-NEXT: ld r0, 16(r1)
-; P8-NEXT: mtlr r0
-; P8-NEXT: blr
-;
-; CHECK-LABEL: test_extend32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lxsihzx f0, 0, r3
-; CHECK-NEXT: xscvhpdp f1, f0
-; CHECK-NEXT: blr
-;
-; SOFT-LABEL: test_extend32:
-; SOFT: # %bb.0:
-; SOFT-NEXT: mflr r0
-; SOFT-NEXT: stdu r1, -32(r1)
-; SOFT-NEXT: std r0, 48(r1)
-; SOFT-NEXT: lhz r3, 0(r3)
-; SOFT-NEXT: bl __extendhfsf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: addi r1, r1, 32
-; SOFT-NEXT: ld r0, 16(r1)
-; SOFT-NEXT: mtlr r0
-; SOFT-NEXT: blr
- %val16 = load half, ptr %addr
- %val32 = fpext half %val16 to float
- ret float %val32
-}
-define double @test_extend64(ptr %addr) #0 {
-; P8-LABEL: test_extend64:
-; P8: # %bb.0:
-; P8-NEXT: mflr r0
-; P8-NEXT: stdu r1, -32(r1)
-; P8-NEXT: std r0, 48(r1)
-; P8-NEXT: lhz r3, 0(r3)
-; P8-NEXT: bl __extendhfsf2
-; P8-NEXT: nop
-; P8-NEXT: addi r1, r1, 32
-; P8-NEXT: ld r0, 16(r1)
-; P8-NEXT: mtlr r0
-; P8-NEXT: blr
-;
-; CHECK-LABEL: test_extend64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lxsihzx f0, 0, r3
-; CHECK-NEXT: xscvhpdp f1, f0
-; CHECK-NEXT: blr
-;
-; SOFT-LABEL: test_extend64:
-; SOFT: # %bb.0:
-; SOFT-NEXT: mflr r0
-; SOFT-NEXT: stdu r1, -32(r1)
-; SOFT-NEXT: std r0, 48(r1)
-; SOFT-NEXT: lhz r3, 0(r3)
-; SOFT-NEXT: bl __extendhfsf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: bl __extendsfdf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: addi r1, r1, 32
-; SOFT-NEXT: ld r0, 16(r1)
-; SOFT-NEXT: mtlr r0
-; SOFT-NEXT: blr
- %val16 = load half, ptr %addr
- %val32 = fpext half %val16 to double
- ret double %val32
-}
-define void @test_trunc32(float %in, ptr %addr) #0 {
-; P8-LABEL: test_trunc32:
-; P8: # %bb.0:
-; P8-NEXT: mflr r0
-; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
-; P8-NEXT: stdu r1, -48(r1)
-; P8-NEXT: std r0, 64(r1)
-; P8-NEXT: mr r30, r4
-; P8-NEXT: bl __truncsfhf2
-; P8-NEXT: nop
-; P8-NEXT: sth r3, 0(r30)
-; P8-NEXT: addi r1, r1, 48
-; P8-NEXT: ld r0, 16(r1)
-; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
-; P8-NEXT: mtlr r0
-; P8-NEXT: blr
-;
-; CHECK-LABEL: test_trunc32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xscvdphp f0, f1
-; CHECK-NEXT: stxsihx f0, 0, r4
-; CHECK-NEXT: blr
-;
-; SOFT-LABEL: test_trunc32:
-; SOFT: # %bb.0:
-; SOFT-NEXT: mflr r0
-; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill
-; SOFT-NEXT: stdu r1, -48(r1)
-; SOFT-NEXT: clrldi r3, r3, 32
-; SOFT-NEXT: std r0, 64(r1)
-; SOFT-NEXT: mr r30, r4
-; SOFT-NEXT: bl __truncsfhf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: clrldi r3, r3, 48
-; SOFT-NEXT: bl __extendhfsf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: bl __truncsfhf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: sth r3, 0(r30)
-; SOFT-NEXT: addi r1, r1, 48
-; SOFT-NEXT: ld r0, 16(r1)
-; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
-; SOFT-NEXT: mtlr r0
-; SOFT-NEXT: blr
- %val16 = fptrunc float %in to half
- store half %val16, ptr %addr
- ret void
-}
-define void @test_trunc64(double %in, ptr %addr) #0 {
-; P8-LABEL: test_trunc64:
-; P8: # %bb.0:
-; P8-NEXT: mflr r0
-; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
-; P8-NEXT: stdu r1, -48(r1)
-; P8-NEXT: std r0, 64(r1)
-; P8-NEXT: mr r30, r4
-; P8-NEXT: bl __truncdfhf2
-; P8-NEXT: nop
-; P8-NEXT: sth r3, 0(r30)
-; P8-NEXT: addi r1, r1, 48
-; P8-NEXT: ld r0, 16(r1)
-; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
-; P8-NEXT: mtlr r0
-; P8-NEXT: blr
-;
-; CHECK-LABEL: test_trunc64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xscvdphp f0, f1
-; CHECK-NEXT: stxsihx f0, 0, r4
-; CHECK-NEXT: blr
-;
-; SOFT-LABEL: test_trunc64:
-; SOFT: # %bb.0:
-; SOFT-NEXT: mflr r0
-; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill
-; SOFT-NEXT: stdu r1, -48(r1)
-; SOFT-NEXT: std r0, 64(r1)
-; SOFT-NEXT: mr r30, r4
-; SOFT-NEXT: bl __truncdfhf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: clrldi r3, r3, 48
-; SOFT-NEXT: bl __extendhfsf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: bl __truncsfhf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: sth r3, 0(r30)
-; SOFT-NEXT: addi r1, r1, 48
-; SOFT-NEXT: ld r0, 16(r1)
-; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
-; SOFT-NEXT: mtlr r0
-; SOFT-NEXT: blr
- %val16 = fptrunc double %in to half
- store half %val16, ptr %addr
- ret void
-}
-define i64 @test_fptosi_i64(ptr %p) #0 {
-; P8-LABEL: test_fptosi_i64:
-; P8: # %bb.0:
-; P8-NEXT: mflr r0
-; P8-NEXT: stdu r1, -32(r1)
-; P8-NEXT: std r0, 48(r1)
-; P8-NEXT: lhz r3, 0(r3)
-; P8-NEXT: bl __extendhfsf2
-; P8-NEXT: nop
-; P8-NEXT: xscvdpsxds f0, f1
-; P8-NEXT: mffprd r3, f0
-; P8-NEXT: addi r1, r1, 32
-; P8-NEXT: ld r0, 16(r1)
-; P8-NEXT: mtlr r0
-; P8-NEXT: blr
-;
-; CHECK-LABEL: test_fptosi_i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lhz r3, 0(r3)
-; CHECK-NEXT: mtfprwz f0, r3
-; CHECK-NEXT: xscvhpdp f0, f0
-; CHECK-NEXT: xscvdpsxds f0, f0
-; CHECK-NEXT: mffprd r3, f0
-; CHECK-NEXT: blr
-;
-; SOFT-LABEL: test_fptosi_i64:
-; SOFT: # %bb.0:
-; SOFT-NEXT: mflr r0
-; SOFT-NEXT: stdu r1, -32(r1)
-; SOFT-NEXT: std r0, 48(r1)
-; SOFT-NEXT: lhz r3, 0(r3)
-; SOFT-NEXT: bl __extendhfsf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: bl __fixsfdi
-; SOFT-NEXT: nop
-; SOFT-NEXT: addi r1, r1, 32
-; SOFT-NEXT: ld r0, 16(r1)
-; SOFT-NEXT: mtlr r0
-; SOFT-NEXT: blr
- %a = load half, ptr %p, align 2
- %r = fptosi half %a to i64
- ret i64 %r
-}
-define void @test_sitofp_i64(i64 %a, ptr %p) #0 {
-; P8-LABEL: test_sitofp_i64:
-; P8: # %bb.0:
-; P8-NEXT: mflr r0
-; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
-; P8-NEXT: stdu r1, -48(r1)
-; P8-NEXT: mtfprd f0, r3
-; P8-NEXT: std r0, 64(r1)
-; P8-NEXT: mr r30, r4
-; P8-NEXT: xscvsxdsp f1, f0
-; P8-NEXT: bl __truncsfhf2
-; P8-NEXT: nop
-; P8-NEXT: sth r3, 0(r30)
-; P8-NEXT: addi r1, r1, 48
-; P8-NEXT: ld r0, 16(r1)
-; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
-; P8-NEXT: mtlr r0
-; P8-NEXT: blr
-;
-; CHECK-LABEL: test_sitofp_i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: xscvsxdsp f0, f0
-; CHECK-NEXT: xscvdphp f0, f0
-; CHECK-NEXT: mffprwz r3, f0
-; CHECK-NEXT: sth r3, 0(r4)
-; CHECK-NEXT: blr
-;
-; SOFT-LABEL: test_sitofp_i64:
-; SOFT: # %bb.0:
-; SOFT-NEXT: mflr r0
-; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill
-; SOFT-NEXT: stdu r1, -48(r1)
-; SOFT-NEXT: std r0, 64(r1)
-; SOFT-NEXT: mr r30, r4
-; SOFT-NEXT: bl __floatdisf
-; SOFT-NEXT: nop
-; SOFT-NEXT: clrldi r3, r3, 32
-; SOFT-NEXT: bl __truncsfhf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: clrldi r3, r3, 48
-; SOFT-NEXT: bl __extendhfsf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: bl __truncsfhf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: sth r3, 0(r30)
-; SOFT-NEXT: addi r1, r1, 48
-; SOFT-NEXT: ld r0, 16(r1)
-; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
-; SOFT-NEXT: mtlr r0
-; SOFT-NEXT: blr
- %r = sitofp i64 %a to half
- store half %r, ptr %p
- ret void
-}
-define i64 @test_fptoui_i64(ptr %p) #0 {
-; P8-LABEL: test_fptoui_i64:
-; P8: # %bb.0:
-; P8-NEXT: mflr r0
-; P8-NEXT: stdu r1, -32(r1)
-; P8-NEXT: std r0, 48(r1)
-; P8-NEXT: lhz r3, 0(r3)
-; P8-NEXT: bl __extendhfsf2
-; P8-NEXT: nop
-; P8-NEXT: xscvdpuxds f0, f1
-; P8-NEXT: mffprd r3, f0
-; P8-NEXT: addi r1, r1, 32
-; P8-NEXT: ld r0, 16(r1)
-; P8-NEXT: mtlr r0
-; P8-NEXT: blr
-;
-; CHECK-LABEL: test_fptoui_i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lhz r3, 0(r3)
-; CHECK-NEXT: mtfprwz f0, r3
-; CHECK-NEXT: xscvhpdp f0, f0
-; CHECK-NEXT: xscvdpuxds f0, f0
-; CHECK-NEXT: mffprd r3, f0
-; CHECK-NEXT: blr
-;
-; SOFT-LABEL: test_fptoui_i64:
-; SOFT: # %bb.0:
-; SOFT-NEXT: mflr r0
-; SOFT-NEXT: stdu r1, -32(r1)
-; SOFT-NEXT: std r0, 48(r1)
-; SOFT-NEXT: lhz r3, 0(r3)
-; SOFT-NEXT: bl __extendhfsf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: bl __fixunssfdi
-; SOFT-NEXT: nop
-; SOFT-NEXT: addi r1, r1, 32
-; SOFT-NEXT: ld r0, 16(r1)
-; SOFT-NEXT: mtlr r0
-; SOFT-NEXT: blr
- %a = load half, ptr %p, align 2
- %r = fptoui half %a to i64
- ret i64 %r
-}
-define void @test_uitofp_i64(i64 %a, ptr %p) #0 {
-; P8-LABEL: test_uitofp_i64:
-; P8: # %bb.0:
-; P8-NEXT: mflr r0
-; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
-; P8-NEXT: stdu r1, -48(r1)
-; P8-NEXT: mtfprd f0, r3
-; P8-NEXT: std r0, 64(r1)
-; P8-NEXT: mr r30, r4
-; P8-NEXT: xscvuxdsp f1, f0
-; P8-NEXT: bl __truncsfhf2
-; P8-NEXT: nop
-; P8-NEXT: sth r3, 0(r30)
-; P8-NEXT: addi r1, r1, 48
-; P8-NEXT: ld r0, 16(r1)
-; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
-; P8-NEXT: mtlr r0
-; P8-NEXT: blr
-;
-; CHECK-LABEL: test_uitofp_i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: xscvuxdsp f0, f0
-; CHECK-NEXT: xscvdphp f0, f0
-; CHECK-NEXT: mffprwz r3, f0
-; CHECK-NEXT: sth r3, 0(r4)
-; CHECK-NEXT: blr
-;
-; SOFT-LABEL: test_uitofp_i64:
-; SOFT: # %bb.0:
-; SOFT-NEXT: mflr r0
-; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill
-; SOFT-NEXT: stdu r1, -48(r1)
-; SOFT-NEXT: std r0, 64(r1)
-; SOFT-NEXT: mr r30, r4
-; SOFT-NEXT: bl __floatundisf
-; SOFT-NEXT: nop
-; SOFT-NEXT: bl __truncsfhf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: clrldi r3, r3, 48
-; SOFT-NEXT: bl __extendhfsf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: bl __truncsfhf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: sth r3, 0(r30)
-; SOFT-NEXT: addi r1, r1, 48
-; SOFT-NEXT: ld r0, 16(r1)
-; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
-; SOFT-NEXT: mtlr r0
-; SOFT-NEXT: blr
- %r = uitofp i64 %a to half
- store half %r, ptr %p
- ret void
-}
-define <4 x float> @test_extend32_vec4(ptr %p) #0 {
-; P8-LABEL: test_extend32_vec4:
-; P8: # %bb.0:
-; P8-NEXT: mflr r0
-; P8-NEXT: stdu r1, -112(r1)
-; P8-NEXT: li r4, 48
-; P8-NEXT: std r0, 128(r1)
-; P8-NEXT: std r30, 96(r1) # 8-byte Folded Spill
-; P8-NEXT: mr r30, r3
-; P8-NEXT: lhz r3, 6(r3)
-; P8-NEXT: stxvd2x vs61, r1, r4 # 16-byte Folded Spill
-; P8-NEXT: li r4, 64
-; P8-NEXT: stxvd2x vs62, r1, r4 # 16-byte Folded Spill
-; P8-NEXT: li r4, 80
-; P8-NEXT: stxvd2x vs63, r1, r4 # 16-byte Folded Spill
-; P8-NEXT: bl __extendhfsf2
-; P8-NEXT: nop
-; P8-NEXT: lhz r3, 2(r30)
-; P8-NEXT: xxlor vs63, f1, f1
-; P8-NEXT: bl __extendhfsf2
-; P8-NEXT: nop
-; P8-NEXT: lhz r3, 4(r30)
-; P8-NEXT: xxlor vs62, f1, f1
-; P8-NEXT: bl __extendhfsf2
-; P8-NEXT: nop
-; P8-NEXT: lhz r3, 0(r30)
-; P8-NEXT: xxlor vs61, f1, f1
-; P8-NEXT: bl __extendhfsf2
-; P8-NEXT: nop
-; P8-NEXT: li r3, 80
-; P8-NEXT: xxmrghd vs0, vs61, vs1
-; P8-NEXT: xxmrghd vs1, vs63, vs62
-; P8-NEXT: ld r30, 96(r1) # 8-byte Folded Reload
-; P8-NEXT: lxvd2x vs63, r1, r3 # 16-byte Folded Reload
-; P8-NEXT: li r3, 64
-; P8-NEXT: xvcvdpsp vs34, vs0
-; P8-NEXT: xvcvdpsp vs35, vs1
-; P8-NEXT: lxvd2x vs62, r1, r3 # 16-byte Folded Reload
-; P8-NEXT: li r3, 48
-; P8-NEXT: lxvd2x vs61, r1, r3 # 16-byte Folded Reload
-; P8-NEXT: vmrgew v2, v3, v2
-; P8-NEXT: addi r1, r1, 112
-; P8-NEXT: ld r0, 16(r1)
-; P8-NEXT: mtlr r0
-; P8-NEXT: blr
-;
-; CHECK-LABEL: test_extend32_vec4:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lhz r4, 6(r3)
-; CHECK-NEXT: mtfprwz f0, r4
-; CHECK-NEXT: xscvhpdp f0, f0
-; CHECK-NEXT: lhz r4, 2(r3)
-; CHECK-NEXT: mtfprwz f1, r4
-; CHECK-NEXT: xscvhpdp f1, f1
-; CHECK-NEXT: lhz r4, 4(r3)
-; CHECK-NEXT: mtfprwz f2, r4
-; CHECK-NEXT: xscvhpdp f2, f2
-; CHECK-NEXT: lhz r3, 0(r3)
-; CHECK-NEXT: xxmrghd vs0, vs0, vs1
-; CHECK-NEXT: mtfprwz f3, r3
-; CHECK-NEXT: xvcvdpsp vs35, vs0
-; CHECK-NEXT: xscvhpdp f3, f3
-; CHECK-NEXT: xxmrghd vs2, vs2, vs3
-; CHECK-NEXT: xvcvdpsp vs34, vs2
-; CHECK-NEXT: vmrgew v2, v3, v2
-; CHECK-NEXT: blr
-;
-; SOFT-LABEL: test_extend32_vec4:
-; SOFT: # %bb.0:
-; SOFT-NEXT: mflr r0
-; SOFT-NEXT: std r27, -40(r1) # 8-byte Folded Spill
-; SOFT-NEXT: std r28, -32(r1) # 8-byte Folded Spill
-; SOFT-NEXT: std r29, -24(r1) # 8-byte Folded Spill
-; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill
-; SOFT-NEXT: stdu r1, -80(r1)
-; SOFT-NEXT: std r0, 96(r1)
-; SOFT-NEXT: mr r30, r3
-; SOFT-NEXT: lhz r3, 0(r3)
-; SOFT-NEXT: bl __extendhfsf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: mr r29, r3
-; SOFT-NEXT: lhz r3, 2(r30)
-; SOFT-NEXT: bl __extendhfsf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: mr r28, r3
-; SOFT-NEXT: lhz r3, 4(r30)
-; SOFT-NEXT: bl __extendhfsf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: mr r27, r3
-; SOFT-NEXT: lhz r3, 6(r30)
-; SOFT-NEXT: bl __extendhfsf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: mr r6, r3
-; SOFT-NEXT: mr r3, r29
-; SOFT-NEXT: mr r4, r28
-; SOFT-NEXT: mr r5, r27
-; SOFT-NEXT: addi r1, r1, 80
-; SOFT-NEXT: ld r0, 16(r1)
-; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
-; SOFT-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
-; SOFT-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
-; SOFT-NEXT: mtlr r0
-; SOFT-NEXT: ld r27, -40(r1) # 8-byte Folded Reload
-; SOFT-NEXT: blr
- %a = load <4 x half>, ptr %p, align 8
- %b = fpext <4 x half> %a to <4 x float>
- ret <4 x float> %b
-}
-define <4 x double> @test_extend64_vec4(ptr %p) #0 {
-; P8-LABEL: test_extend64_vec4:
-; P8: # %bb.0:
-; P8-NEXT: mflr r0
-; P8-NEXT: stdu r1, -112(r1)
-; P8-NEXT: li r4, 48
-; P8-NEXT: std r0, 128(r1)
-; P8-NEXT: std r30, 96(r1) # 8-byte Folded Spill
-; P8-NEXT: mr r30, r3
-; P8-NEXT: lhz r3, 6(r3)
-; P8-NEXT: stxvd2x vs61, r1, r4 # 16-byte Folded Spill
-; P8-NEXT: li r4, 64
-; P8-NEXT: stxvd2x vs62, r1, r4 # 16-byte Folded Spill
-; P8-NEXT: li r4, 80
-; P8-NEXT: stxvd2x vs63, r1, r4 # 16-byte Folded Spill
-; P8-NEXT: bl __extendhfsf2
-; P8-NEXT: nop
-; P8-NEXT: lhz r3, 4(r30)
-; P8-NEXT: xxlor vs63, f1, f1
-; P8-NEXT: bl __extendhfsf2
-; P8-NEXT: nop
-; P8-NEXT: lhz r3, 2(r30)
-; P8-NEXT: xxlor vs62, f1, f1
-; P8-NEXT: bl __extendhfsf2
-; P8-NEXT: nop
-; P8-NEXT: lhz r3, 0(r30)
-; P8-NEXT: xxlor vs61, f1, f1
-; P8-NEXT: bl __extendhfsf2
-; P8-NEXT: nop
-; P8-NEXT: li r3, 80
-; P8-NEXT: xxmrghd vs35, vs63, vs62
-; P8-NEXT: xxmrghd vs34, vs61, vs1
-; P8-NEXT: ld r30, 96(r1) # 8-byte Folded Reload
-; P8-NEXT: lxvd2x vs63, r1, r3 # 16-byte Folded Reload
-; P8-NEXT: li r3, 64
-; P8-NEXT: lxvd2x vs62, r1, r3 # 16-byte Folded Reload
-; P8-NEXT: li r3, 48
-; P8-NEXT: lxvd2x vs61, r1, r3 # 16-byte Folded Reload
-; P8-NEXT: addi r1, r1, 112
-; P8-NEXT: ld r0, 16(r1)
-; P8-NEXT: mtlr r0
-; P8-NEXT: blr
-;
-; CHECK-LABEL: test_extend64_vec4:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lhz r4, 6(r3)
-; CHECK-NEXT: lhz r5, 4(r3)
-; CHECK-NEXT: lhz r6, 2(r3)
-; CHECK-NEXT: lhz r3, 0(r3)
-; CHECK-NEXT: mtfprwz f0, r3
-; CHECK-NEXT: mtfprwz f1, r6
-; CHECK-NEXT: xscvhpdp f0, f0
-; CHECK-NEXT: xscvhpdp f1, f1
-; CHECK-NEXT: xxmrghd vs34, vs1, vs0
-; CHECK-NEXT: mtfprwz f0, r5
-; CHECK-NEXT: mtfprwz f1, r4
-; CHECK-NEXT: xscvhpdp f0, f0
-; CHECK-NEXT: xscvhpdp f1, f1
-; CHECK-NEXT: xxmrghd vs35, vs1, vs0
-; CHECK-NEXT: blr
-;
-; SOFT-LABEL: test_extend64_vec4:
-; SOFT: # %bb.0:
-; SOFT-NEXT: mflr r0
-; SOFT-NEXT: std r27, -40(r1) # 8-byte Folded Spill
-; SOFT-NEXT: std r28, -32(r1) # 8-byte Folded Spill
-; SOFT-NEXT: std r29, -24(r1) # 8-byte Folded Spill
-; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill
-; SOFT-NEXT: stdu r1, -80(r1)
-; SOFT-NEXT: std r0, 96(r1)
-; SOFT-NEXT: mr r30, r3
-; SOFT-NEXT: lhz r3, 0(r3)
-; SOFT-NEXT: bl __extendhfsf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: bl __extendsfdf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: mr r29, r3
-; SOFT-NEXT: lhz r3, 2(r30)
-; SOFT-NEXT: bl __extendhfsf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: bl __extendsfdf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: mr r28, r3
-; SOFT-NEXT: lhz r3, 4(r30)
-; SOFT-NEXT: bl __extendhfsf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: bl __extendsfdf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: mr r27, r3
-; SOFT-NEXT: lhz r3, 6(r30)
-; SOFT-NEXT: bl __extendhfsf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: bl __extendsfdf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: mr r6, r3
-; SOFT-NEXT: mr r3, r29
-; SOFT-NEXT: mr r4, r28
-; SOFT-NEXT: mr r5, r27
-; SOFT-NEXT: addi r1, r1, 80
-; SOFT-NEXT: ld r0, 16(r1)
-; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
-; SOFT-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
-; SOFT-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
-; SOFT-NEXT: mtlr r0
-; SOFT-NEXT: ld r27, -40(r1) # 8-byte Folded Reload
-; SOFT-NEXT: blr
- %a = load <4 x half>, ptr %p, align 8
- %b = fpext <4 x half> %a to <4 x double>
- ret <4 x double> %b
-}
-define void @test_trunc32_vec4(<4 x float> %a, ptr %p) #0 {
-; P8-LABEL: test_trunc32_vec4:
-; P8: # %bb.0:
-; P8-NEXT: mflr r0
-; P8-NEXT: stdu r1, -112(r1)
-; P8-NEXT: xxsldwi vs0, vs34, vs34, 3
-; P8-NEXT: li r3, 48
-; P8-NEXT: std r0, 128(r1)
-; P8-NEXT: std r27, 72(r1) # 8-byte Folded Spill
-; P8-NEXT: std r28, 80(r1) # 8-byte Folded Spill
-; P8-NEXT: std r29, 88(r1) # 8-byte Folded Spill
-; P8-NEXT: xscvspdpn f1, vs0
-; P8-NEXT: std r30, 96(r1) # 8-byte Folded Spill
-; P8-NEXT: stxvd2x vs63, r1, r3 # 16-byte Folded Spill
-; P8-NEXT: mr r30, r5
-; P8-NEXT: vmr v31, v2
-; P8-NEXT: bl __truncsfhf2
-; P8-NEXT: nop
-; P8-NEXT: xxswapd vs0, vs63
-; P8-NEXT: mr r29, r3
-; P8-NEXT: xscvspdpn f1, vs0
-; P8-NEXT: bl __truncsfhf2
-; P8-NEXT: nop
-; P8-NEXT: xxsldwi vs0, vs63, vs63, 1
-; P8-NEXT: mr r28, r3
-; P8-NEXT: xscvspdpn f1, vs0
-; P8-NEXT: bl __truncsfhf2
-; P8-NEXT: nop
-; P8-NEXT: xscvspdpn f1, vs63
-; P8-NEXT: mr r27, r3
-; P8-NEXT: bl __truncsfhf2
-; P8-NEXT: nop
-; P8-NEXT: sth r3, 6(r30)
-; P8-NEXT: li r3, 48
-; P8-NEXT: sth r27, 4(r30)
-; P8-NEXT: ld r27, 72(r1) # 8-byte Folded Reload
-; P8-NEXT: sth r28, 2(r30)
-; P8-NEXT: sth r29, 0(r30)
-; P8-NEXT: ld r30, 96(r1) # 8-byte Folded Reload
-; P8-NEXT: ld r29, 88(r1) # 8-byte Folded Reload
-; P8-NEXT: lxvd2x vs63, r1, r3 # 16-byte Folded Reload
-; P8-NEXT: ld r28, 80(r1) # 8-byte Folded Reload
-; P8-NEXT: addi r1, r1, 112
-; P8-NEXT: ld r0, 16(r1)
-; P8-NEXT: mtlr r0
-; P8-NEXT: blr
-;
-; CHECK-LABEL: test_trunc32_vec4:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 3
-; CHECK-NEXT: xxsldwi vs1, vs34, vs34, 1
-; CHECK-NEXT: xscvspdpn f0, vs0
-; CHECK-NEXT: xscvspdpn f1, vs1
-; CHECK-NEXT: xscvdphp f0, f0
-; CHECK-NEXT: mffprwz r3, f0
-; CHECK-NEXT: xxswapd vs0, vs34
-; CHECK-NEXT: xscvspdpn f0, vs0
-; CHECK-NEXT: xscvdphp f0, f0
-; CHECK-NEXT: xscvdphp f1, f1
-; CHECK-NEXT: mffprwz r4, f1
-; CHECK-NEXT: xscvspdpn f1, vs34
-; CHECK-NEXT: xscvdphp f1, f1
-; CHECK-NEXT: sth r4, 4(r5)
-; CHECK-NEXT: mffprwz r4, f0
-; CHECK-NEXT: sth r3, 0(r5)
-; CHECK-NEXT: sth r4, 2(r5)
-; CHECK-NEXT: mffprwz r6, f1
-; CHECK-NEXT: sth r6, 6(r5)
-; CHECK-NEXT: blr
-;
-; SOFT-LABEL: test_trunc32_vec4:
-; SOFT: # %bb.0:
-; SOFT-NEXT: mflr r0
-; SOFT-NEXT: std r26, -48(r1) # 8-byte Folded Spill
-; SOFT-NEXT: std r27, -40(r1) # 8-byte Folded Spill
-; SOFT-NEXT: std r28, -32(r1) # 8-byte Folded Spill
-; SOFT-NEXT: std r29, -24(r1) # 8-byte Folded Spill
-; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill
-; SOFT-NEXT: stdu r1, -80(r1)
-; SOFT-NEXT: mr r27, r3
-; SOFT-NEXT: clrldi r3, r6, 32
-; SOFT-NEXT: std r0, 96(r1)
-; SOFT-NEXT: mr r30, r7
-; SOFT-NEXT: mr r29, r5
-; SOFT-NEXT: mr r28, r4
-; SOFT-NEXT: bl __truncsfhf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: mr r26, r3
-; SOFT-NEXT: clrldi r3, r29, 32
-; SOFT-NEXT: bl __truncsfhf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: mr r29, r3
-; SOFT-NEXT: clrldi r3, r28, 32
-; SOFT-NEXT: bl __truncsfhf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: mr r28, r3
-; SOFT-NEXT: clrldi r3, r27, 32
-; SOFT-NEXT: bl __truncsfhf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: clrldi r3, r3, 48
-; SOFT-NEXT: bl __extendhfsf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: mr r27, r3
-; SOFT-NEXT: clrldi r3, r28, 48
-; SOFT-NEXT: bl __extendhfsf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: mr r28, r3
-; SOFT-NEXT: clrldi r3, r29, 48
-; SOFT-NEXT: bl __extendhfsf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: mr r29, r3
-; SOFT-NEXT: clrldi r3, r26, 48
-; SOFT-NEXT: bl __extendhfsf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: bl __truncsfhf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: sth r3, 6(r30)
-; SOFT-NEXT: mr r3, r29
-; SOFT-NEXT: bl __truncsfhf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: sth r3, 4(r30)
-; SOFT-NEXT: mr r3, r28
-; SOFT-NEXT: bl __truncsfhf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: sth r3, 2(r30)
-; SOFT-NEXT: mr r3, r27
-; SOFT-NEXT: bl __truncsfhf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: sth r3, 0(r30)
-; SOFT-NEXT: addi r1, r1, 80
-; SOFT-NEXT: ld r0, 16(r1)
-; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
-; SOFT-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
-; SOFT-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
-; SOFT-NEXT: mtlr r0
-; SOFT-NEXT: ld r27, -40(r1) # 8-byte Folded Reload
-; SOFT-NEXT: ld r26, -48(r1) # 8-byte Folded Reload
-; SOFT-NEXT: blr
- %v = fptrunc <4 x float> %a to <4 x half>
- store <4 x half> %v, ptr %p
- ret void
-}
-define void @test_trunc64_vec4(<4 x double> %a, ptr %p) #0 {
-; P8-LABEL: test_trunc64_vec4:
-; P8: # %bb.0:
-; P8-NEXT: mflr r0
-; P8-NEXT: stdu r1, -128(r1)
-; P8-NEXT: li r3, 48
-; P8-NEXT: std r0, 144(r1)
-; P8-NEXT: xxswapd vs1, vs34
-; P8-NEXT: std r27, 88(r1) # 8-byte Folded Spill
-; P8-NEXT: std r28, 96(r1) # 8-byte Folded Spill
-; P8-NEXT: std r29, 104(r1) # 8-byte Folded Spill
-; P8-NEXT: std r30, 112(r1) # 8-byte Folded Spill
-; P8-NEXT: mr r30, r7
-; P8-NEXT: stxvd2x vs62, r1, r3 # 16-byte Folded Spill
-; P8-NEXT: li r3, 64
-; P8-NEXT: vmr v30, v2
-; P8-NEXT: stxvd2x vs63, r1, r3 # 16-byte Folded Spill
-; P8-NEXT: vmr v31, v3
-; P8-NEXT: bl __truncdfhf2
-; P8-NEXT: nop
-; P8-NEXT: xxswapd vs1, vs63
-; P8-NEXT: mr r29, r3
-; P8-NEXT: bl __truncdfhf2
-; P8-NEXT: nop
-; P8-NEXT: xxlor f1, vs62, vs62
-; P8-NEXT: mr r28, r3
-; P8-NEXT: bl __truncdfhf2
-; P8-NEXT: nop
-; P8-NEXT: xxlor f1, vs63, vs63
-; P8-NEXT: mr r27, r3
-; P8-NEXT: bl __truncdfhf2
-; P8-NEXT: nop
-; P8-NEXT: sth r3, 6(r30)
-; P8-NEXT: li r3, 64
-; P8-NEXT: sth r27, 2(r30)
-; P8-NEXT: ld r27, 88(r1) # 8-byte Folded Reload
-; P8-NEXT: sth r28, 4(r30)
-; P8-NEXT: sth r29, 0(r30)
-; P8-NEXT: ld r30, 112(r1) # 8-byte Folded Reload
-; P8-NEXT: ld r29, 104(r1) # 8-byte Folded Reload
-; P8-NEXT: lxvd2x vs63, r1, r3 # 16-byte Folded Reload
-; P8-NEXT: li r3, 48
-; P8-NEXT: ld r28, 96(r1) # 8-byte Folded Reload
-; P8-NEXT: lxvd2x vs62, r1, r3 # 16-byte Folded Reload
-; P8-NEXT: addi r1, r1, 128
-; P8-NEXT: ld r0, 16(r1)
-; P8-NEXT: mtlr r0
-; P8-NEXT: blr
-;
-; CHECK-LABEL: test_trunc64_vec4:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xxswapd vs0, vs34
-; CHECK-NEXT: xscvdphp f0, f0
-; CHECK-NEXT: mffprwz r3, f0
-; CHECK-NEXT: xxswapd vs0, vs35
-; CHECK-NEXT: xscvdphp f0, f0
-; CHECK-NEXT: xscvdphp f1, vs34
-; CHECK-NEXT: mffprwz r4, f1
-; CHECK-NEXT: xscvdphp f1, vs35
-; CHECK-NEXT: sth r3, 0(r7)
-; CHECK-NEXT: sth r4, 2(r7)
-; CHECK-NEXT: mffprwz r4, f0
-; CHECK-NEXT: sth r4, 4(r7)
-; CHECK-NEXT: mffprwz r5, f1
-; CHECK-NEXT: sth r5, 6(r7)
-; CHECK-NEXT: blr
-;
-; SOFT-LABEL: test_trunc64_vec4:
-; SOFT: # %bb.0:
-; SOFT-NEXT: mflr r0
-; SOFT-NEXT: std r26, -48(r1) # 8-byte Folded Spill
-; SOFT-NEXT: std r27, -40(r1) # 8-byte Folded Spill
-; SOFT-NEXT: std r28, -32(r1) # 8-byte Folded Spill
-; SOFT-NEXT: std r29, -24(r1) # 8-byte Folded Spill
-; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill
-; SOFT-NEXT: stdu r1, -80(r1)
-; SOFT-NEXT: mr r27, r3
-; SOFT-NEXT: mr r3, r6
-; SOFT-NEXT: std r0, 96(r1)
-; SOFT-NEXT: mr r30, r7
-; SOFT-NEXT: mr r29, r5
-; SOFT-NEXT: mr r28, r4
-; SOFT-NEXT: bl __truncdfhf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: mr r26, r3
-; SOFT-NEXT: mr r3, r29
-; SOFT-NEXT: bl __truncdfhf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: mr r29, r3
-; SOFT-NEXT: mr r3, r28
-; SOFT-NEXT: bl __truncdfhf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: mr r28, r3
-; SOFT-NEXT: mr r3, r27
-; SOFT-NEXT: bl __truncdfhf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: clrldi r3, r3, 48
-; SOFT-NEXT: bl __extendhfsf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: mr r27, r3
-; SOFT-NEXT: clrldi r3, r28, 48
-; SOFT-NEXT: bl __extendhfsf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: mr r28, r3
-; SOFT-NEXT: clrldi r3, r29, 48
-; SOFT-NEXT: bl __extendhfsf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: mr r29, r3
-; SOFT-NEXT: clrldi r3, r26, 48
-; SOFT-NEXT: bl __extendhfsf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: bl __truncsfhf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: sth r3, 6(r30)
-; SOFT-NEXT: mr r3, r29
-; SOFT-NEXT: bl __truncsfhf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: sth r3, 4(r30)
-; SOFT-NEXT: mr r3, r28
-; SOFT-NEXT: bl __truncsfhf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: sth r3, 2(r30)
-; SOFT-NEXT: mr r3, r27
-; SOFT-NEXT: bl __truncsfhf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: sth r3, 0(r30)
-; SOFT-NEXT: addi r1, r1, 80
-; SOFT-NEXT: ld r0, 16(r1)
-; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
-; SOFT-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
-; SOFT-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
-; SOFT-NEXT: mtlr r0
-; SOFT-NEXT: ld r27, -40(r1) # 8-byte Folded Reload
-; SOFT-NEXT: ld r26, -48(r1) # 8-byte Folded Reload
-; SOFT-NEXT: blr
- %v = fptrunc <4 x double> %a to <4 x half>
- store <4 x half> %v, ptr %p
- ret void
-}
-define float @test_sitofp_fadd_i32(i32 %a, ptr %b) #0 {
-; P8-LABEL: test_sitofp_fadd_i32:
-; P8: # %bb.0:
-; P8-NEXT: mflr r0
-; P8-NEXT: std r30, -24(r1) # 8-byte Folded Spill
-; P8-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
-; P8-NEXT: stdu r1, -64(r1)
-; P8-NEXT: std r0, 80(r1)
-; P8-NEXT: mr r30, r3
-; P8-NEXT: lhz r3, 0(r4)
-; P8-NEXT: bl __extendhfsf2
-; P8-NEXT: nop
-; P8-NEXT: mtfprwa f0, r30
-; P8-NEXT: fmr f31, f1
-; P8-NEXT: xscvsxdsp f1, f0
-; P8-NEXT: bl __truncsfhf2
-; P8-NEXT: nop
-; P8-NEXT: clrldi r3, r3, 48
-; P8-NEXT: bl __extendhfsf2
-; P8-NEXT: nop
-; P8-NEXT: xsaddsp f1, f31, f1
-; P8-NEXT: addi r1, r1, 64
-; P8-NEXT: ld r0, 16(r1)
-; P8-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
-; P8-NEXT: ld r30, -24(r1) # 8-byte Folded Reload
-; P8-NEXT: mtlr r0
-; P8-NEXT: blr
-;
-; CHECK-LABEL: test_sitofp_fadd_i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: mtfprwa f1, r3
-; CHECK-NEXT: lhz r4, 0(r4)
-; CHECK-NEXT: xscvsxdsp f1, f1
-; CHECK-NEXT: mtfprwz f0, r4
-; CHECK-NEXT: xscvhpdp f0, f0
-; CHECK-NEXT: xscvdphp f1, f1
-; CHECK-NEXT: mffprwz r3, f1
-; CHECK-NEXT: clrlwi r3, r3, 16
-; CHECK-NEXT: mtfprwz f1, r3
-; CHECK-NEXT: xscvhpdp f1, f1
-; CHECK-NEXT: xsaddsp f1, f0, f1
-; CHECK-NEXT: blr
-;
-; SOFT-LABEL: test_sitofp_fadd_i32:
-; SOFT: # %bb.0:
-; SOFT-NEXT: mflr r0
-; SOFT-NEXT: std r29, -24(r1) # 8-byte Folded Spill
-; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill
-; SOFT-NEXT: stdu r1, -64(r1)
-; SOFT-NEXT: std r0, 80(r1)
-; SOFT-NEXT: mr r30, r3
-; SOFT-NEXT: lhz r3, 0(r4)
-; SOFT-NEXT: bl __extendhfsf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: mr r29, r3
-; SOFT-NEXT: extsw r3, r30
-; SOFT-NEXT: bl __floatsisf
-; SOFT-NEXT: nop
-; SOFT-NEXT: clrldi r3, r3, 32
-; SOFT-NEXT: bl __truncsfhf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: clrldi r3, r3, 48
-; SOFT-NEXT: bl __extendhfsf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: mr r4, r3
-; SOFT-NEXT: mr r3, r29
-; SOFT-NEXT: bl __addsf3
-; SOFT-NEXT: nop
-; SOFT-NEXT: addi r1, r1, 64
-; SOFT-NEXT: ld r0, 16(r1)
-; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
-; SOFT-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
-; SOFT-NEXT: mtlr r0
-; SOFT-NEXT: blr
- %tmp0 = load half, ptr %b
- %tmp1 = sitofp i32 %a to half
- %tmp2 = fadd half %tmp0, %tmp1
- %tmp3 = fpext half %tmp2 to float
- ret float %tmp3
-}
-define half @PR40273(half) #0 {
-; P8-LABEL: PR40273:
-; P8: # %bb.0:
-; P8-NEXT: mflr r0
-; P8-NEXT: stdu r1, -32(r1)
-; P8-NEXT: std r0, 48(r1)
-; P8-NEXT: bl __truncsfhf2
-; P8-NEXT: nop
-; P8-NEXT: clrldi r3, r3, 48
-; P8-NEXT: bl __extendhfsf2
-; P8-NEXT: nop
-; P8-NEXT: fmr f0, f1
-; P8-NEXT: xxlxor f1, f1, f1
-; P8-NEXT: fcmpu cr0, f0, f1
-; P8-NEXT: beq cr0, .LBB20_2
-; P8-NEXT: # %bb.1:
-; P8-NEXT: vspltisw v2, 1
-; P8-NEXT: xvcvsxwdp vs1, vs34
-; P8-NEXT: .LBB20_2:
-; P8-NEXT: addi r1, r1, 32
-; P8-NEXT: ld r0, 16(r1)
-; P8-NEXT: mtlr r0
-; P8-NEXT: blr
-;
-; CHECK-LABEL: PR40273:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xscvdphp f0, f1
-; CHECK-NEXT: xxlxor f1, f1, f1
-; CHECK-NEXT: mffprwz r3, f0
-; CHECK-NEXT: clrlwi r3, r3, 16
-; CHECK-NEXT: mtfprwz f0, r3
-; CHECK-NEXT: xscvhpdp f0, f0
-; CHECK-NEXT: fcmpu cr0, f0, f1
-; CHECK-NEXT: beqlr cr0
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: vspltisw v2, 1
-; CHECK-NEXT: xvcvsxwdp vs1, vs34
-; CHECK-NEXT: blr
-;
-; SOFT-LABEL: PR40273:
-; SOFT: # %bb.0:
-; SOFT-NEXT: mflr r0
-; SOFT-NEXT: stdu r1, -32(r1)
-; SOFT-NEXT: clrldi r3, r3, 48
-; SOFT-NEXT: std r0, 48(r1)
-; SOFT-NEXT: bl __extendhfsf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: li r4, 0
-; SOFT-NEXT: bl __nesf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: cmplwi r3, 0
-; SOFT-NEXT: lis r3, 16256
-; SOFT-NEXT: iseleq r3, 0, r3
-; SOFT-NEXT: bl __truncsfhf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: addi r1, r1, 32
-; SOFT-NEXT: ld r0, 16(r1)
-; SOFT-NEXT: mtlr r0
-; SOFT-NEXT: blr
- %2 = fcmp une half %0, 0xH0000
- %3 = uitofp i1 %2 to half
- ret half %3
-}
-attributes #0 = { nounwind }
>From a8d24bac8db42df06a55f705b5ce75134adb77db Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross at umich.edu>
Date: Wed, 6 Aug 2025 07:21:41 +0000
Subject: [PATCH 4/4] [PowerPC] Change `half` to use soft promotion rather than
`PromoteFloat`
On PowerPC targets, `half` uses the default legalization of promoting to
a `f32`. However, this has some fundamental issues related to inability
to round trip. Resolve this by switching to the soft legalization, which
passes `f16` as an `i16`.
The PowerPC ABI Specification does not define a `_Float16` type, so the
calling convention changes are acceptable.
Fixes the PowerPC portion of [1]. A similar change was done for MIPS in
f0231b6164fd ("[MIPS] Use softPromoteHalf legalization for fp16 rather
than PromoteFloat (#110199)") and for Loongarch in 13280d99aec5
("[loongarch][DAG][FREEZE] Fix crash when FREEZE a half(f16) type on
loongarch (#107791)").
[1]: https://github.com/llvm/llvm-project/issues/97975
---
llvm/docs/ReleaseNotes.md | 2 +
llvm/lib/Target/PowerPC/PPCISelLowering.h | 2 +
llvm/test/CodeGen/Generic/half.ll | 6 +-
llvm/test/CodeGen/PowerPC/atomics.ll | 172 +-
llvm/test/CodeGen/PowerPC/f128-conv.ll | 13 +-
llvm/test/CodeGen/PowerPC/half.ll | 783 ++--
llvm/test/CodeGen/PowerPC/ldexp.ll | 6 +-
llvm/test/CodeGen/PowerPC/llvm.frexp.ll | 143 +-
llvm/test/CodeGen/PowerPC/llvm.modf.ll | 83 +-
llvm/test/CodeGen/PowerPC/pr48519.ll | 105 +-
llvm/test/CodeGen/PowerPC/pr49092.ll | 12 -
llvm/test/CodeGen/PowerPC/vector-llrint.ll | 3854 ++++++++-----------
llvm/test/CodeGen/PowerPC/vector-lrint.ll | 3880 ++++++++------------
13 files changed, 3370 insertions(+), 5691 deletions(-)
diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md
index b38ed6270796d..d384b1f818408 100644
--- a/llvm/docs/ReleaseNotes.md
+++ b/llvm/docs/ReleaseNotes.md
@@ -102,6 +102,8 @@ Changes to the MIPS Backend
Changes to the PowerPC Backend
------------------------------
+* `half` now uses a soft float ABI, which works correctly in more cases.
+
Changes to the RISC-V Backend
-----------------------------
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 9755f0e272d16..fedeb41dc5d37 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -801,6 +801,8 @@ namespace llvm {
bool useSoftFloat() const override;
+ bool softPromoteHalfType() const override { return true; }
+
bool hasSPE() const;
MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
diff --git a/llvm/test/CodeGen/Generic/half.ll b/llvm/test/CodeGen/Generic/half.ll
index f4ea5b5b30621..9249343cb67b0 100644
--- a/llvm/test/CodeGen/Generic/half.ll
+++ b/llvm/test/CodeGen/Generic/half.ll
@@ -30,9 +30,9 @@
; RUN: %if mips-registered-target %{ llc %s -o - -mtriple=mipsel-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK %}
; RUN: %if msp430-registered-target %{ llc %s -o - -mtriple=msp430-none-elf | FileCheck %s --check-prefixes=ALL,CHECK %}
; RUN: %if nvptx-registered-target %{ llc %s -o - -mtriple=nvptx64-nvidia-cuda | FileCheck %s --check-prefixes=NOCRASH %}
-; RUN: %if powerpc-registered-target %{ llc %s -o - -mtriple=powerpc-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,BAD %}
-; RUN: %if powerpc-registered-target %{ llc %s -o - -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,BAD %}
-; RUN: %if powerpc-registered-target %{ llc %s -o - -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,BAD %}
+; RUN: %if powerpc-registered-target %{ llc %s -o - -mtriple=powerpc-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK %}
+; RUN: %if powerpc-registered-target %{ llc %s -o - -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK %}
+; RUN: %if powerpc-registered-target %{ llc %s -o - -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK %}
; RUN: %if riscv-registered-target %{ llc %s -o - -mtriple=riscv32-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK %}
; RUN: %if riscv-registered-target %{ llc %s -o - -mtriple=riscv64-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK %}
; RUN: %if sparc-registered-target %{ llc %s -o - -mtriple=sparc-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,BAD %}
diff --git a/llvm/test/CodeGen/PowerPC/atomics.ll b/llvm/test/CodeGen/PowerPC/atomics.ll
index 40786057ead5f..1aaf0310879d7 100644
--- a/llvm/test/CodeGen/PowerPC/atomics.ll
+++ b/llvm/test/CodeGen/PowerPC/atomics.ll
@@ -138,67 +138,67 @@ define void @store_i64_seq_cst(ptr %mem) {
; Atomic CmpXchg
define i8 @cas_strong_i8_sc_sc(ptr %mem) {
; PPC32-LABEL: cas_strong_i8_sc_sc:
-; PPC32: # %bb.0:
+; PPC32: # %bb.0: # %cmpxchg.start
; PPC32-NEXT: rlwinm r5, r3, 0, 0, 29
; PPC32-NEXT: lwarx r4, 0, r5
-; PPC32-NEXT: not r3, r3
+; PPC32-NEXT: not r3, r3
; PPC32-NEXT: rlwinm r3, r3, 3, 27, 28
; PPC32-NEXT: srw r6, r4, r3
; PPC32-NEXT: andi. r6, r6, 255
-; PPC32-NEXT: bne cr0, .LBB8_4
-; PPC32-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC32-NEXT: bne cr0, .LBB8_4
+; PPC32-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC32-NEXT: li r6, 255
; PPC32-NEXT: li r7, 1
; PPC32-NEXT: slw r6, r6, r3
-; PPC32-NEXT: not r6, r6
+; PPC32-NEXT: not r6, r6
; PPC32-NEXT: slw r7, r7, r3
; PPC32-NEXT: sync
-; PPC32-NEXT: .LBB8_2: # %cmpxchg.trystore
-; PPC32-NEXT: # =>This Inner Loop Header: Depth=1
+; PPC32-NEXT: .LBB8_2: # %cmpxchg.trystore
+; PPC32-NEXT: #
; PPC32-NEXT: and r8, r4, r6
; PPC32-NEXT: or r8, r8, r7
; PPC32-NEXT: stwcx. r8, 0, r5
-; PPC32-NEXT: beq cr0, .LBB8_4
-; PPC32-NEXT: # %bb.3: # %cmpxchg.releasedload
-; PPC32-NEXT: # in Loop: Header=BB8_2 Depth=1
+; PPC32-NEXT: beq cr0, .LBB8_4
+; PPC32-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC32-NEXT: #
; PPC32-NEXT: lwarx r4, 0, r5
; PPC32-NEXT: srw r8, r4, r3
; PPC32-NEXT: andi. r8, r8, 255
-; PPC32-NEXT: beq cr0, .LBB8_2
-; PPC32-NEXT: .LBB8_4: # %cmpxchg.nostore
+; PPC32-NEXT: beq cr0, .LBB8_2
+; PPC32-NEXT: .LBB8_4: # %cmpxchg.nostore
; PPC32-NEXT: srw r3, r4, r3
; PPC32-NEXT: lwsync
; PPC32-NEXT: blr
;
; PPC64-LABEL: cas_strong_i8_sc_sc:
-; PPC64: # %bb.0:
+; PPC64: # %bb.0: # %cmpxchg.start
; PPC64-NEXT: rldicr r5, r3, 0, 61
-; PPC64-NEXT: not r3, r3
+; PPC64-NEXT: not r3, r3
; PPC64-NEXT: lwarx r4, 0, r5
; PPC64-NEXT: rlwinm r3, r3, 3, 27, 28
; PPC64-NEXT: srw r6, r4, r3
; PPC64-NEXT: andi. r6, r6, 255
-; PPC64-NEXT: bne cr0, .LBB8_4
-; PPC64-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64-NEXT: bne cr0, .LBB8_4
+; PPC64-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64-NEXT: li r6, 255
; PPC64-NEXT: li r7, 1
; PPC64-NEXT: slw r6, r6, r3
-; PPC64-NEXT: not r6, r6
+; PPC64-NEXT: not r6, r6
; PPC64-NEXT: slw r7, r7, r3
; PPC64-NEXT: sync
-; PPC64-NEXT: .LBB8_2: # %cmpxchg.trystore
-; PPC64-NEXT: # =>This Inner Loop Header: Depth=1
+; PPC64-NEXT: .LBB8_2: # %cmpxchg.trystore
+; PPC64-NEXT: #
; PPC64-NEXT: and r8, r4, r6
; PPC64-NEXT: or r8, r8, r7
; PPC64-NEXT: stwcx. r8, 0, r5
-; PPC64-NEXT: beq cr0, .LBB8_4
-; PPC64-NEXT: # %bb.3: # %cmpxchg.releasedload
-; PPC64-NEXT: # in Loop: Header=BB8_2 Depth=1
+; PPC64-NEXT: beq cr0, .LBB8_4
+; PPC64-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64-NEXT: #
; PPC64-NEXT: lwarx r4, 0, r5
; PPC64-NEXT: srw r8, r4, r3
; PPC64-NEXT: andi. r8, r8, 255
-; PPC64-NEXT: beq cr0, .LBB8_2
-; PPC64-NEXT: .LBB8_4: # %cmpxchg.nostore
+; PPC64-NEXT: beq cr0, .LBB8_2
+; PPC64-NEXT: .LBB8_4: # %cmpxchg.nostore
; PPC64-NEXT: srw r3, r4, r3
; PPC64-NEXT: lwsync
; PPC64-NEXT: blr
@@ -208,24 +208,24 @@ define i8 @cas_strong_i8_sc_sc(ptr %mem) {
}
define i16 @cas_weak_i16_acquire_acquire(ptr %mem) {
; PPC32-LABEL: cas_weak_i16_acquire_acquire:
-; PPC32: # %bb.0:
+; PPC32: # %bb.0: # %cmpxchg.start
; PPC32-NEXT: rlwinm r4, r3, 0, 0, 29
; PPC32-NEXT: lwarx r5, 0, r4
-; PPC32-NEXT: clrlwi r3, r3, 30
+; PPC32-NEXT: clrlwi r3, r3, 30
; PPC32-NEXT: xori r3, r3, 2
; PPC32-NEXT: slwi r6, r3, 3
; PPC32-NEXT: srw r3, r5, r6
; PPC32-NEXT: andi. r7, r3, 65535
-; PPC32-NEXT: beq cr0, .LBB9_2
-; PPC32-NEXT: # %bb.1: # %cmpxchg.failure
+; PPC32-NEXT: beq cr0, .LBB9_2
+; PPC32-NEXT: # %bb.1: # %cmpxchg.failure
; PPC32-NEXT: lwsync
; PPC32-NEXT: blr
-; PPC32-NEXT: .LBB9_2: # %cmpxchg.fencedstore
+; PPC32-NEXT: .LBB9_2: # %cmpxchg.fencedstore
; PPC32-NEXT: lis r7, 0
; PPC32-NEXT: ori r7, r7, 65535
; PPC32-NEXT: slw r7, r7, r6
; PPC32-NEXT: li r8, 1
-; PPC32-NEXT: not r7, r7
+; PPC32-NEXT: not r7, r7
; PPC32-NEXT: slw r6, r8, r6
; PPC32-NEXT: and r5, r5, r7
; PPC32-NEXT: or r5, r5, r6
@@ -234,24 +234,24 @@ define i16 @cas_weak_i16_acquire_acquire(ptr %mem) {
; PPC32-NEXT: blr
;
; PPC64-LABEL: cas_weak_i16_acquire_acquire:
-; PPC64: # %bb.0:
-; PPC64-NEXT: rldicr r4, r3, 0, 61
-; PPC64-NEXT: clrlwi r3, r3, 30
+; PPC64: # %bb.0: # %cmpxchg.start
+; PPC64-NEXT: rldicr r4, r3, 0, 61
+; PPC64-NEXT: clrlwi r3, r3, 30
; PPC64-NEXT: lwarx r5, 0, r4
; PPC64-NEXT: xori r3, r3, 2
; PPC64-NEXT: slwi r6, r3, 3
; PPC64-NEXT: srw r3, r5, r6
; PPC64-NEXT: andi. r7, r3, 65535
-; PPC64-NEXT: beq cr0, .LBB9_2
-; PPC64-NEXT: # %bb.1: # %cmpxchg.failure
+; PPC64-NEXT: beq cr0, .LBB9_2
+; PPC64-NEXT: # %bb.1: # %cmpxchg.failure
; PPC64-NEXT: lwsync
; PPC64-NEXT: blr
-; PPC64-NEXT: .LBB9_2: # %cmpxchg.fencedstore
+; PPC64-NEXT: .LBB9_2: # %cmpxchg.fencedstore
; PPC64-NEXT: lis r7, 0
; PPC64-NEXT: ori r7, r7, 65535
; PPC64-NEXT: slw r7, r7, r6
; PPC64-NEXT: li r8, 1
-; PPC64-NEXT: not r7, r7
+; PPC64-NEXT: not r7, r7
; PPC64-NEXT: slw r6, r8, r6
; PPC64-NEXT: and r5, r5, r7
; PPC64-NEXT: or r5, r5, r6
@@ -264,24 +264,24 @@ define i16 @cas_weak_i16_acquire_acquire(ptr %mem) {
}
define i32 @cas_strong_i32_acqrel_acquire(ptr %mem) {
; CHECK-LABEL: cas_strong_i32_acqrel_acquire:
-; CHECK: # %bb.0:
-; CHECK-NEXT: mr r4, r3
+; CHECK: # %bb.0: # %cmpxchg.start
+; CHECK-NEXT: mr r4, r3
; CHECK-NEXT: lwarx r3, 0, r3
-; CHECK-NEXT: cmplwi r3, 0
-; CHECK-NEXT: bne cr0, .LBB10_4
-; CHECK-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; CHECK-NEXT: cmplwi r3, 0
+; CHECK-NEXT: bne cr0, .LBB10_4
+; CHECK-NEXT: # %bb.1: # %cmpxchg.fencedstore
; CHECK-NEXT: li r5, 1
; CHECK-NEXT: lwsync
-; CHECK-NEXT: .LBB10_2: # %cmpxchg.trystore
-; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: .LBB10_2: # %cmpxchg.trystore
+; CHECK-NEXT: #
; CHECK-NEXT: stwcx. r5, 0, r4
-; CHECK-NEXT: beq cr0, .LBB10_4
-; CHECK-NEXT: # %bb.3: # %cmpxchg.releasedload
-; CHECK-NEXT: # in Loop: Header=BB10_2 Depth=1
+; CHECK-NEXT: beq cr0, .LBB10_4
+; CHECK-NEXT: # %bb.3: # %cmpxchg.releasedload
+; CHECK-NEXT: #
; CHECK-NEXT: lwarx r3, 0, r4
-; CHECK-NEXT: cmplwi r3, 0
-; CHECK-NEXT: beq cr0, .LBB10_2
-; CHECK-NEXT: .LBB10_4: # %cmpxchg.nostore
+; CHECK-NEXT: cmplwi r3, 0
+; CHECK-NEXT: beq cr0, .LBB10_2
+; CHECK-NEXT: .LBB10_4: # %cmpxchg.nostore
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
%val = cmpxchg ptr %mem, i32 0, i32 1 acq_rel acquire
@@ -313,12 +313,12 @@ define i64 @cas_weak_i64_release_monotonic(ptr %mem) {
; PPC32-NEXT: blr
;
; PPC64-LABEL: cas_weak_i64_release_monotonic:
-; PPC64: # %bb.0:
-; PPC64-NEXT: mr r4, r3
+; PPC64: # %bb.0: # %cmpxchg.start
+; PPC64-NEXT: mr r4, r3
; PPC64-NEXT: ldarx r3, 0, r3
-; PPC64-NEXT: cmpldi r3, 0
-; PPC64-NEXT: bnelr cr0
-; PPC64-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64-NEXT: cmpldi r3, 0
+; PPC64-NEXT: bnelr cr0
+; PPC64-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64-NEXT: li r5, 1
; PPC64-NEXT: lwsync
; PPC64-NEXT: stdcx. r5, 0, r4
@@ -473,39 +473,20 @@ define i64 @and_i64_release(ptr %mem, i64 %operand) {
define half @load_atomic_f16__seq_cst(ptr %ptr) {
; PPC32-LABEL: load_atomic_f16__seq_cst:
; PPC32: # %bb.0:
-; PPC32-NEXT: mflr r0
-; PPC32-NEXT: stwu r1, -16(r1)
-; PPC32-NEXT: stw r0, 20(r1)
-; PPC32-NEXT: .cfi_def_cfa_offset 16
-; PPC32-NEXT: .cfi_offset lr, 4
; PPC32-NEXT: sync
; PPC32-NEXT: lhz r3, 0(r3)
; PPC32-NEXT: cmpw cr7, r3, r3
; PPC32-NEXT: bne- cr7, .+4
; PPC32-NEXT: isync
-; PPC32-NEXT: bl __extendhfsf2
-; PPC32-NEXT: lwz r0, 20(r1)
-; PPC32-NEXT: addi r1, r1, 16
-; PPC32-NEXT: mtlr r0
; PPC32-NEXT: blr
;
; PPC64-LABEL: load_atomic_f16__seq_cst:
; PPC64: # %bb.0:
-; PPC64-NEXT: mflr r0
-; PPC64-NEXT: stdu r1, -112(r1)
-; PPC64-NEXT: std r0, 128(r1)
-; PPC64-NEXT: .cfi_def_cfa_offset 112
-; PPC64-NEXT: .cfi_offset lr, 16
; PPC64-NEXT: sync
; PPC64-NEXT: lhz r3, 0(r3)
; PPC64-NEXT: cmpd cr7, r3, r3
; PPC64-NEXT: bne- cr7, .+4
; PPC64-NEXT: isync
-; PPC64-NEXT: bl __extendhfsf2
-; PPC64-NEXT: nop
-; PPC64-NEXT: addi r1, r1, 112
-; PPC64-NEXT: ld r0, 16(r1)
-; PPC64-NEXT: mtlr r0
; PPC64-NEXT: blr
%val = load atomic half, ptr %ptr seq_cst, align 2
ret half %val
@@ -579,44 +560,11 @@ define double @load_atomic_f64__seq_cst(ptr %ptr) {
}
define void @store_atomic_f16__seq_cst(ptr %ptr, half %val1) {
-; PPC32-LABEL: store_atomic_f16__seq_cst:
-; PPC32: # %bb.0:
-; PPC32-NEXT: mflr r0
-; PPC32-NEXT: stwu r1, -16(r1)
-; PPC32-NEXT: stw r0, 20(r1)
-; PPC32-NEXT: .cfi_def_cfa_offset 16
-; PPC32-NEXT: .cfi_offset lr, 4
-; PPC32-NEXT: .cfi_offset r30, -8
-; PPC32-NEXT: stw r30, 8(r1) # 4-byte Folded Spill
-; PPC32-NEXT: mr r30, r3
-; PPC32-NEXT: bl __truncsfhf2
-; PPC32-NEXT: sync
-; PPC32-NEXT: sth r3, 0(r30)
-; PPC32-NEXT: lwz r30, 8(r1) # 4-byte Folded Reload
-; PPC32-NEXT: lwz r0, 20(r1)
-; PPC32-NEXT: addi r1, r1, 16
-; PPC32-NEXT: mtlr r0
-; PPC32-NEXT: blr
-;
-; PPC64-LABEL: store_atomic_f16__seq_cst:
-; PPC64: # %bb.0:
-; PPC64-NEXT: mflr r0
-; PPC64-NEXT: stdu r1, -128(r1)
-; PPC64-NEXT: std r0, 144(r1)
-; PPC64-NEXT: .cfi_def_cfa_offset 128
-; PPC64-NEXT: .cfi_offset lr, 16
-; PPC64-NEXT: .cfi_offset r30, -16
-; PPC64-NEXT: std r30, 112(r1) # 8-byte Folded Spill
-; PPC64-NEXT: mr r30, r3
-; PPC64-NEXT: bl __truncsfhf2
-; PPC64-NEXT: nop
-; PPC64-NEXT: sync
-; PPC64-NEXT: sth r3, 0(r30)
-; PPC64-NEXT: ld r30, 112(r1) # 8-byte Folded Reload
-; PPC64-NEXT: addi r1, r1, 128
-; PPC64-NEXT: ld r0, 16(r1)
-; PPC64-NEXT: mtlr r0
-; PPC64-NEXT: blr
+; CHECK-LABEL: store_atomic_f16__seq_cst:
+; CHECK: # %bb.0:
+; CHECK-NEXT: sync
+; CHECK-NEXT: sth r4, 0(r3)
+; CHECK-NEXT: blr
store atomic half %val1, ptr %ptr seq_cst, align 2
ret void
}
diff --git a/llvm/test/CodeGen/PowerPC/f128-conv.ll b/llvm/test/CodeGen/PowerPC/f128-conv.ll
index f8b2861156db4..080843217e8c9 100644
--- a/llvm/test/CodeGen/PowerPC/f128-conv.ll
+++ b/llvm/test/CodeGen/PowerPC/f128-conv.ll
@@ -1349,9 +1349,6 @@ define half @trunc(fp128 %a) nounwind {
; CHECK-NEXT: std r0, 48(r1)
; CHECK-NEXT: bl __trunckfhf2
; CHECK-NEXT: nop
-; CHECK-NEXT: clrlwi r3, r3, 16
-; CHECK-NEXT: mtfprwz f0, r3
-; CHECK-NEXT: xscvhpdp f1, f0
; CHECK-NEXT: addi r1, r1, 32
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
@@ -1364,9 +1361,6 @@ define half @trunc(fp128 %a) nounwind {
; CHECK-P8-NEXT: std r0, 48(r1)
; CHECK-P8-NEXT: bl __trunckfhf2
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: clrldi r3, r3, 48
-; CHECK-P8-NEXT: bl __extendhfsf2
-; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: addi r1, r1, 32
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: mtlr r0
@@ -1379,7 +1373,9 @@ entry:
define fp128 @ext(half %a) nounwind {
; CHECK-LABEL: ext:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xscpsgndp v2, f1, f1
+; CHECK-NEXT: clrlwi r3, r3, 16
+; CHECK-NEXT: mtfprwz f0, r3
+; CHECK-NEXT: xscvhpdp v2, f0
; CHECK-NEXT: xscvdpqp v2, v2
; CHECK-NEXT: blr
;
@@ -1387,7 +1383,10 @@ define fp128 @ext(half %a) nounwind {
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mflr r0
; CHECK-P8-NEXT: stdu r1, -32(r1)
+; CHECK-P8-NEXT: clrldi r3, r3, 48
; CHECK-P8-NEXT: std r0, 48(r1)
+; CHECK-P8-NEXT: bl __extendhfsf2
+; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: bl __extendsfkf2
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: addi r1, r1, 32
diff --git a/llvm/test/CodeGen/PowerPC/half.ll b/llvm/test/CodeGen/PowerPC/half.ll
index fe0dccf63af80..3cd4f8b5ff9b8 100644
--- a/llvm/test/CodeGen/PowerPC/half.ll
+++ b/llvm/test/CodeGen/PowerPC/half.ll
@@ -20,39 +20,17 @@
define void @store(half %x, ptr %p) nounwind {
; PPC32-LABEL: store:
; PPC32: # %bb.0:
-; PPC32-NEXT: mflr r0
-; PPC32-NEXT: stwu r1, -16(r1)
-; PPC32-NEXT: stw r0, 20(r1)
-; PPC32-NEXT: stw r30, 8(r1) # 4-byte Folded Spill
-; PPC32-NEXT: mr r30, r3
-; PPC32-NEXT: bl __truncsfhf2
-; PPC32-NEXT: sth r3, 0(r30)
-; PPC32-NEXT: lwz r30, 8(r1) # 4-byte Folded Reload
-; PPC32-NEXT: lwz r0, 20(r1)
-; PPC32-NEXT: addi r1, r1, 16
-; PPC32-NEXT: mtlr r0
+; PPC32-NEXT: sth r3, 0(r4)
; PPC32-NEXT: blr
;
; P8-LABEL: store:
; P8: # %bb.0:
-; P8-NEXT: mflr r0
-; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
-; P8-NEXT: stdu r1, -48(r1)
-; P8-NEXT: std r0, 64(r1)
-; P8-NEXT: mr r30, r4
-; P8-NEXT: bl __truncsfhf2
-; P8-NEXT: nop
-; P8-NEXT: sth r3, 0(r30)
-; P8-NEXT: addi r1, r1, 48
-; P8-NEXT: ld r0, 16(r1)
-; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
-; P8-NEXT: mtlr r0
+; P8-NEXT: sth r3, 0(r4)
; P8-NEXT: blr
;
; CHECK-LABEL: store:
; CHECK: # %bb.0:
-; CHECK-NEXT: xscvdphp f0, f1
-; CHECK-NEXT: stxsihx f0, 0, r4
+; CHECK-NEXT: sth r3, 0(r4)
; CHECK-NEXT: blr
;
; SOFT-LABEL: store:
@@ -62,18 +40,7 @@ define void @store(half %x, ptr %p) nounwind {
;
; BE-LABEL: store:
; BE: # %bb.0:
-; BE-NEXT: mflr r0
-; BE-NEXT: stdu r1, -128(r1)
-; BE-NEXT: std r0, 144(r1)
-; BE-NEXT: std r30, 112(r1) # 8-byte Folded Spill
-; BE-NEXT: mr r30, r4
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: sth r3, 0(r30)
-; BE-NEXT: ld r30, 112(r1) # 8-byte Folded Reload
-; BE-NEXT: addi r1, r1, 128
-; BE-NEXT: ld r0, 16(r1)
-; BE-NEXT: mtlr r0
+; BE-NEXT: sth r3, 0(r4)
; BE-NEXT: blr
store half %x, ptr %p
ret void
@@ -82,33 +49,17 @@ define void @store(half %x, ptr %p) nounwind {
define half @return(ptr %p) nounwind {
; PPC32-LABEL: return:
; PPC32: # %bb.0:
-; PPC32-NEXT: mflr r0
-; PPC32-NEXT: stwu r1, -16(r1)
-; PPC32-NEXT: stw r0, 20(r1)
; PPC32-NEXT: lhz r3, 0(r3)
-; PPC32-NEXT: bl __extendhfsf2
-; PPC32-NEXT: lwz r0, 20(r1)
-; PPC32-NEXT: addi r1, r1, 16
-; PPC32-NEXT: mtlr r0
; PPC32-NEXT: blr
;
; P8-LABEL: return:
; P8: # %bb.0:
-; P8-NEXT: mflr r0
-; P8-NEXT: stdu r1, -32(r1)
-; P8-NEXT: std r0, 48(r1)
; P8-NEXT: lhz r3, 0(r3)
-; P8-NEXT: bl __extendhfsf2
-; P8-NEXT: nop
-; P8-NEXT: addi r1, r1, 32
-; P8-NEXT: ld r0, 16(r1)
-; P8-NEXT: mtlr r0
; P8-NEXT: blr
;
; CHECK-LABEL: return:
; CHECK: # %bb.0:
-; CHECK-NEXT: lxsihzx f0, 0, r3
-; CHECK-NEXT: xscvhpdp f1, f0
+; CHECK-NEXT: lhz r3, 0(r3)
; CHECK-NEXT: blr
;
; SOFT-LABEL: return:
@@ -118,15 +69,7 @@ define half @return(ptr %p) nounwind {
;
; BE-LABEL: return:
; BE: # %bb.0:
-; BE-NEXT: mflr r0
-; BE-NEXT: stdu r1, -112(r1)
-; BE-NEXT: std r0, 128(r1)
; BE-NEXT: lhz r3, 0(r3)
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: addi r1, r1, 112
-; BE-NEXT: ld r0, 16(r1)
-; BE-NEXT: mtlr r0
; BE-NEXT: blr
%r = load half, ptr %p
ret half %r
@@ -316,11 +259,6 @@ define dso_local void @stored(ptr nocapture %a, double %b) local_unnamed_addr no
; SOFT-NEXT: std r0, 64(r1)
; SOFT-NEXT: bl __truncdfhf2
; SOFT-NEXT: nop
-; SOFT-NEXT: clrldi r3, r3, 48
-; SOFT-NEXT: bl __extendhfsf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: bl __truncsfhf2
-; SOFT-NEXT: nop
; SOFT-NEXT: sth r3, 0(r30)
; SOFT-NEXT: addi r1, r1, 48
; SOFT-NEXT: ld r0, 16(r1)
@@ -399,11 +337,6 @@ define dso_local void @storef(ptr nocapture %a, float %b) local_unnamed_addr nou
; SOFT-NEXT: std r0, 64(r1)
; SOFT-NEXT: bl __truncsfhf2
; SOFT-NEXT: nop
-; SOFT-NEXT: clrldi r3, r3, 48
-; SOFT-NEXT: bl __extendhfsf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: bl __truncsfhf2
-; SOFT-NEXT: nop
; SOFT-NEXT: sth r3, 0(r30)
; SOFT-NEXT: addi r1, r1, 48
; SOFT-NEXT: ld r0, 16(r1)
@@ -454,21 +387,8 @@ define void @test_load_store(ptr %in, ptr %out) nounwind {
;
; SOFT-LABEL: test_load_store:
; SOFT: # %bb.0:
-; SOFT-NEXT: mflr r0
-; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill
-; SOFT-NEXT: stdu r1, -48(r1)
-; SOFT-NEXT: std r0, 64(r1)
-; SOFT-NEXT: mr r30, r4
; SOFT-NEXT: lhz r3, 0(r3)
-; SOFT-NEXT: bl __extendhfsf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: bl __truncsfhf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: sth r3, 0(r30)
-; SOFT-NEXT: addi r1, r1, 48
-; SOFT-NEXT: ld r0, 16(r1)
-; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
-; SOFT-NEXT: mtlr r0
+; SOFT-NEXT: sth r3, 0(r4)
; SOFT-NEXT: blr
;
; BE-LABEL: test_load_store:
@@ -544,34 +464,14 @@ define void @test_bitcast_to_half(ptr %addr, i16 %in) nounwind {
define half @from_bits(i16 %x) nounwind {
; PPC32-LABEL: from_bits:
; PPC32: # %bb.0:
-; PPC32-NEXT: mflr r0
-; PPC32-NEXT: stwu r1, -16(r1)
-; PPC32-NEXT: clrlwi r3, r3, 16
-; PPC32-NEXT: stw r0, 20(r1)
-; PPC32-NEXT: bl __extendhfsf2
-; PPC32-NEXT: lwz r0, 20(r1)
-; PPC32-NEXT: addi r1, r1, 16
-; PPC32-NEXT: mtlr r0
; PPC32-NEXT: blr
;
; P8-LABEL: from_bits:
; P8: # %bb.0:
-; P8-NEXT: mflr r0
-; P8-NEXT: stdu r1, -32(r1)
-; P8-NEXT: clrldi r3, r3, 48
-; P8-NEXT: std r0, 48(r1)
-; P8-NEXT: bl __extendhfsf2
-; P8-NEXT: nop
-; P8-NEXT: addi r1, r1, 32
-; P8-NEXT: ld r0, 16(r1)
-; P8-NEXT: mtlr r0
; P8-NEXT: blr
;
; CHECK-LABEL: from_bits:
; CHECK: # %bb.0:
-; CHECK-NEXT: clrlwi r3, r3, 16
-; CHECK-NEXT: mtfprwz f0, r3
-; CHECK-NEXT: xscvhpdp f1, f0
; CHECK-NEXT: blr
;
; SOFT-LABEL: from_bits:
@@ -580,15 +480,6 @@ define half @from_bits(i16 %x) nounwind {
;
; BE-LABEL: from_bits:
; BE: # %bb.0:
-; BE-NEXT: mflr r0
-; BE-NEXT: stdu r1, -112(r1)
-; BE-NEXT: clrldi r3, r3, 48
-; BE-NEXT: std r0, 128(r1)
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: addi r1, r1, 112
-; BE-NEXT: ld r0, 16(r1)
-; BE-NEXT: mtlr r0
; BE-NEXT: blr
%res = bitcast i16 %x to half
ret half %res
@@ -597,34 +488,14 @@ define half @from_bits(i16 %x) nounwind {
define i16 @to_bits(half %x) nounwind {
; PPC32-LABEL: to_bits:
; PPC32: # %bb.0:
-; PPC32-NEXT: mflr r0
-; PPC32-NEXT: stwu r1, -16(r1)
-; PPC32-NEXT: stw r0, 20(r1)
-; PPC32-NEXT: bl __truncsfhf2
-; PPC32-NEXT: clrlwi r3, r3, 16
-; PPC32-NEXT: lwz r0, 20(r1)
-; PPC32-NEXT: addi r1, r1, 16
-; PPC32-NEXT: mtlr r0
; PPC32-NEXT: blr
;
; P8-LABEL: to_bits:
; P8: # %bb.0:
-; P8-NEXT: mflr r0
-; P8-NEXT: stdu r1, -32(r1)
-; P8-NEXT: std r0, 48(r1)
-; P8-NEXT: bl __truncsfhf2
-; P8-NEXT: nop
-; P8-NEXT: clrldi r3, r3, 48
-; P8-NEXT: addi r1, r1, 32
-; P8-NEXT: ld r0, 16(r1)
-; P8-NEXT: mtlr r0
; P8-NEXT: blr
;
; CHECK-LABEL: to_bits:
; CHECK: # %bb.0:
-; CHECK-NEXT: xscvdphp f0, f1
-; CHECK-NEXT: mffprwz r3, f0
-; CHECK-NEXT: clrlwi r3, r3, 16
; CHECK-NEXT: blr
;
; SOFT-LABEL: to_bits:
@@ -633,15 +504,6 @@ define i16 @to_bits(half %x) nounwind {
;
; BE-LABEL: to_bits:
; BE: # %bb.0:
-; BE-NEXT: mflr r0
-; BE-NEXT: stdu r1, -112(r1)
-; BE-NEXT: std r0, 128(r1)
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r3, 48
-; BE-NEXT: addi r1, r1, 112
-; BE-NEXT: ld r0, 16(r1)
-; BE-NEXT: mtlr r0
; BE-NEXT: blr
%res = bitcast half %x to i16
ret i16 %res
@@ -819,11 +681,6 @@ define void @test_trunc32(float %in, ptr %addr) nounwind {
; SOFT-NEXT: mr r30, r4
; SOFT-NEXT: bl __truncsfhf2
; SOFT-NEXT: nop
-; SOFT-NEXT: clrldi r3, r3, 48
-; SOFT-NEXT: bl __extendhfsf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: bl __truncsfhf2
-; SOFT-NEXT: nop
; SOFT-NEXT: sth r3, 0(r30)
; SOFT-NEXT: addi r1, r1, 48
; SOFT-NEXT: ld r0, 16(r1)
@@ -897,11 +754,6 @@ define void @test_trunc64(double %in, ptr %addr) nounwind {
; SOFT-NEXT: mr r30, r4
; SOFT-NEXT: bl __truncdfhf2
; SOFT-NEXT: nop
-; SOFT-NEXT: clrldi r3, r3, 48
-; SOFT-NEXT: bl __extendhfsf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: bl __truncsfhf2
-; SOFT-NEXT: nop
; SOFT-NEXT: sth r3, 0(r30)
; SOFT-NEXT: addi r1, r1, 48
; SOFT-NEXT: ld r0, 16(r1)
@@ -1056,11 +908,6 @@ define void @test_sitofp_i64(i64 %a, ptr %p) nounwind {
; SOFT-NEXT: clrldi r3, r3, 32
; SOFT-NEXT: bl __truncsfhf2
; SOFT-NEXT: nop
-; SOFT-NEXT: clrldi r3, r3, 48
-; SOFT-NEXT: bl __extendhfsf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: bl __truncsfhf2
-; SOFT-NEXT: nop
; SOFT-NEXT: sth r3, 0(r30)
; SOFT-NEXT: addi r1, r1, 48
; SOFT-NEXT: ld r0, 16(r1)
@@ -1243,11 +1090,6 @@ define void @test_uitofp_i64(i64 %a, ptr %p) nounwind {
; SOFT-NEXT: nop
; SOFT-NEXT: bl __truncsfhf2
; SOFT-NEXT: nop
-; SOFT-NEXT: clrldi r3, r3, 48
-; SOFT-NEXT: bl __extendhfsf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: bl __truncsfhf2
-; SOFT-NEXT: nop
; SOFT-NEXT: sth r3, 0(r30)
; SOFT-NEXT: addi r1, r1, 48
; SOFT-NEXT: ld r0, 16(r1)
@@ -1354,67 +1196,89 @@ define <4 x float> @test_extend32_vec4(ptr %p) nounwind {
; P8-LABEL: test_extend32_vec4:
; P8: # %bb.0:
; P8-NEXT: mflr r0
-; P8-NEXT: stdu r1, -112(r1)
-; P8-NEXT: li r4, 48
-; P8-NEXT: std r0, 128(r1)
-; P8-NEXT: std r30, 96(r1) # 8-byte Folded Spill
-; P8-NEXT: mr r30, r3
-; P8-NEXT: lhz r3, 6(r3)
-; P8-NEXT: stxvd2x vs61, r1, r4 # 16-byte Folded Spill
-; P8-NEXT: li r4, 64
-; P8-NEXT: stxvd2x vs62, r1, r4 # 16-byte Folded Spill
+; P8-NEXT: stdu r1, -144(r1)
; P8-NEXT: li r4, 80
+; P8-NEXT: std r0, 160(r1)
+; P8-NEXT: std r29, 120(r1) # 8-byte Folded Spill
+; P8-NEXT: std r30, 128(r1) # 8-byte Folded Spill
+; P8-NEXT: stxvd2x vs62, r1, r4 # 16-byte Folded Spill
+; P8-NEXT: li r4, 96
; P8-NEXT: stxvd2x vs63, r1, r4 # 16-byte Folded Spill
+; P8-NEXT: lwz r4, 4(r3)
+; P8-NEXT: stw r4, 64(r1)
+; P8-NEXT: lwz r3, 0(r3)
+; P8-NEXT: stw r3, 48(r1)
+; P8-NEXT: addi r3, r1, 64
+; P8-NEXT: lxvd2x vs62, 0, r3
+; P8-NEXT: addi r3, r1, 48
+; P8-NEXT: lxvd2x vs0, 0, r3
+; P8-NEXT: mffprd r30, f0
+; P8-NEXT: clrldi r3, r30, 48
+; P8-NEXT: clrlwi r3, r3, 16
; P8-NEXT: bl __extendhfsf2
; P8-NEXT: nop
-; P8-NEXT: lhz r3, 2(r30)
+; P8-NEXT: mfvsrd r29, vs62
; P8-NEXT: xxlor vs63, f1, f1
+; P8-NEXT: clrldi r3, r29, 48
+; P8-NEXT: clrlwi r3, r3, 16
; P8-NEXT: bl __extendhfsf2
; P8-NEXT: nop
-; P8-NEXT: lhz r3, 4(r30)
-; P8-NEXT: xxlor vs62, f1, f1
+; P8-NEXT: rldicl r3, r30, 48, 48
+; P8-NEXT: xxmrghd vs0, vs1, vs63
+; P8-NEXT: clrlwi r3, r3, 16
+; P8-NEXT: xvcvdpsp vs62, vs0
; P8-NEXT: bl __extendhfsf2
; P8-NEXT: nop
-; P8-NEXT: lhz r3, 0(r30)
-; P8-NEXT: xxlor vs61, f1, f1
+; P8-NEXT: rldicl r3, r29, 48, 48
+; P8-NEXT: xxlor vs63, f1, f1
+; P8-NEXT: clrlwi r3, r3, 16
; P8-NEXT: bl __extendhfsf2
; P8-NEXT: nop
-; P8-NEXT: li r3, 80
-; P8-NEXT: xxmrghd vs0, vs61, vs1
-; P8-NEXT: xxmrghd vs1, vs63, vs62
-; P8-NEXT: ld r30, 96(r1) # 8-byte Folded Reload
-; P8-NEXT: lxvd2x vs63, r1, r3 # 16-byte Folded Reload
-; P8-NEXT: li r3, 64
+; P8-NEXT: xxmrghd vs0, vs1, vs63
+; P8-NEXT: li r3, 96
+; P8-NEXT: ld r30, 128(r1) # 8-byte Folded Reload
+; P8-NEXT: ld r29, 120(r1) # 8-byte Folded Reload
; P8-NEXT: xvcvdpsp vs34, vs0
-; P8-NEXT: xvcvdpsp vs35, vs1
+; P8-NEXT: lxvd2x vs63, r1, r3 # 16-byte Folded Reload
+; P8-NEXT: li r3, 80
+; P8-NEXT: vmrgew v2, v2, v30
; P8-NEXT: lxvd2x vs62, r1, r3 # 16-byte Folded Reload
-; P8-NEXT: li r3, 48
-; P8-NEXT: lxvd2x vs61, r1, r3 # 16-byte Folded Reload
-; P8-NEXT: vmrgew v2, v3, v2
-; P8-NEXT: addi r1, r1, 112
+; P8-NEXT: addi r1, r1, 144
; P8-NEXT: ld r0, 16(r1)
; P8-NEXT: mtlr r0
; P8-NEXT: blr
;
; CHECK-LABEL: test_extend32_vec4:
; CHECK: # %bb.0:
-; CHECK-NEXT: lhz r4, 6(r3)
+; CHECK-NEXT: lwz r4, 4(r3)
+; CHECK-NEXT: stw r4, -16(r1)
+; CHECK-NEXT: lwz r3, 0(r3)
+; CHECK-NEXT: lxv vs34, -16(r1)
+; CHECK-NEXT: stw r3, -32(r1)
+; CHECK-NEXT: li r3, 0
+; CHECK-NEXT: lxv vs35, -32(r1)
+; CHECK-NEXT: vextuhrx r4, r3, v3
+; CHECK-NEXT: vextuhrx r3, r3, v2
+; CHECK-NEXT: clrlwi r4, r4, 16
+; CHECK-NEXT: clrlwi r3, r3, 16
; CHECK-NEXT: mtfprwz f0, r4
+; CHECK-NEXT: mtfprwz f1, r3
+; CHECK-NEXT: li r3, 2
; CHECK-NEXT: xscvhpdp f0, f0
-; CHECK-NEXT: lhz r4, 2(r3)
-; CHECK-NEXT: mtfprwz f1, r4
; CHECK-NEXT: xscvhpdp f1, f1
-; CHECK-NEXT: lhz r4, 4(r3)
-; CHECK-NEXT: mtfprwz f2, r4
-; CHECK-NEXT: xscvhpdp f2, f2
-; CHECK-NEXT: lhz r3, 0(r3)
-; CHECK-NEXT: xxmrghd vs0, vs0, vs1
-; CHECK-NEXT: mtfprwz f3, r3
-; CHECK-NEXT: xvcvdpsp vs35, vs0
-; CHECK-NEXT: xscvhpdp f3, f3
-; CHECK-NEXT: xxmrghd vs2, vs2, vs3
-; CHECK-NEXT: xvcvdpsp vs34, vs2
-; CHECK-NEXT: vmrgew v2, v3, v2
+; CHECK-NEXT: vextuhrx r4, r3, v3
+; CHECK-NEXT: vextuhrx r3, r3, v2
+; CHECK-NEXT: clrlwi r4, r4, 16
+; CHECK-NEXT: clrlwi r3, r3, 16
+; CHECK-NEXT: xxmrghd vs0, vs1, vs0
+; CHECK-NEXT: mtfprwz f1, r3
+; CHECK-NEXT: xvcvdpsp vs36, vs0
+; CHECK-NEXT: mtfprwz f0, r4
+; CHECK-NEXT: xscvhpdp f0, f0
+; CHECK-NEXT: xscvhpdp f1, f1
+; CHECK-NEXT: xxmrghd vs0, vs1, vs0
+; CHECK-NEXT: xvcvdpsp vs34, vs0
+; CHECK-NEXT: vmrgew v2, v2, v4
; CHECK-NEXT: blr
;
; SOFT-LABEL: test_extend32_vec4:
@@ -1458,39 +1322,39 @@ define <4 x float> @test_extend32_vec4(ptr %p) nounwind {
; BE-LABEL: test_extend32_vec4:
; BE: # %bb.0:
; BE-NEXT: mflr r0
-; BE-NEXT: stdu r1, -176(r1)
-; BE-NEXT: std r0, 192(r1)
-; BE-NEXT: std r30, 136(r1) # 8-byte Folded Spill
-; BE-NEXT: mr r30, r3
-; BE-NEXT: lhz r3, 0(r3)
-; BE-NEXT: stfd f29, 152(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f30, 160(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f31, 168(r1) # 8-byte Folded Spill
+; BE-NEXT: stdu r1, -192(r1)
+; BE-NEXT: std r0, 208(r1)
+; BE-NEXT: lwz r4, 0(r3)
+; BE-NEXT: stw r4, 160(r1)
+; BE-NEXT: lwz r3, 4(r3)
+; BE-NEXT: stw r3, 176(r1)
+; BE-NEXT: addi r3, r1, 160
+; BE-NEXT: lvx v2, 0, r3
+; BE-NEXT: addi r3, r1, 176
+; BE-NEXT: lvx v3, 0, r3
+; BE-NEXT: addi r3, r1, 128
+; BE-NEXT: stvx v3, 0, r3
+; BE-NEXT: addi r3, r1, 112
+; BE-NEXT: stvx v2, 0, r3
+; BE-NEXT: lhz r3, 130(r1)
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: lhz r3, 2(r30)
-; BE-NEXT: fmr f31, f1
+; BE-NEXT: lhz r3, 128(r1)
+; BE-NEXT: stfs f1, 156(r1)
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: lhz r3, 4(r30)
-; BE-NEXT: fmr f30, f1
+; BE-NEXT: lhz r3, 114(r1)
+; BE-NEXT: stfs f1, 152(r1)
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: lhz r3, 6(r30)
-; BE-NEXT: fmr f29, f1
+; BE-NEXT: lhz r3, 112(r1)
+; BE-NEXT: stfs f1, 148(r1)
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: stfs f29, 120(r1)
-; BE-NEXT: addi r3, r1, 112
-; BE-NEXT: stfs f30, 116(r1)
-; BE-NEXT: stfs f31, 112(r1)
-; BE-NEXT: stfs f1, 124(r1)
+; BE-NEXT: stfs f1, 144(r1)
+; BE-NEXT: addi r3, r1, 144
; BE-NEXT: lvx v2, 0, r3
-; BE-NEXT: lfd f31, 168(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f30, 160(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f29, 152(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r30, 136(r1) # 8-byte Folded Reload
-; BE-NEXT: addi r1, r1, 176
+; BE-NEXT: addi r1, r1, 192
; BE-NEXT: ld r0, 16(r1)
; BE-NEXT: mtlr r0
; BE-NEXT: blr
@@ -1537,39 +1401,41 @@ define <4 x double> @test_extend64_vec4(ptr %p) nounwind {
; P8: # %bb.0:
; P8-NEXT: mflr r0
; P8-NEXT: stdu r1, -112(r1)
-; P8-NEXT: li r4, 48
; P8-NEXT: std r0, 128(r1)
+; P8-NEXT: li r4, 48
+; P8-NEXT: std r28, 80(r1) # 8-byte Folded Spill
+; P8-NEXT: lhz r28, 2(r3)
+; P8-NEXT: std r29, 88(r1) # 8-byte Folded Spill
; P8-NEXT: std r30, 96(r1) # 8-byte Folded Spill
-; P8-NEXT: mr r30, r3
-; P8-NEXT: lhz r3, 6(r3)
-; P8-NEXT: stxvd2x vs61, r1, r4 # 16-byte Folded Spill
-; P8-NEXT: li r4, 64
+; P8-NEXT: lhz r30, 6(r3)
+; P8-NEXT: lhz r29, 4(r3)
+; P8-NEXT: lhz r3, 0(r3)
; P8-NEXT: stxvd2x vs62, r1, r4 # 16-byte Folded Spill
-; P8-NEXT: li r4, 80
+; P8-NEXT: li r4, 64
; P8-NEXT: stxvd2x vs63, r1, r4 # 16-byte Folded Spill
; P8-NEXT: bl __extendhfsf2
; P8-NEXT: nop
-; P8-NEXT: lhz r3, 4(r30)
+; P8-NEXT: mr r3, r28
; P8-NEXT: xxlor vs63, f1, f1
; P8-NEXT: bl __extendhfsf2
; P8-NEXT: nop
-; P8-NEXT: lhz r3, 2(r30)
-; P8-NEXT: xxlor vs62, f1, f1
+; P8-NEXT: mr r3, r29
+; P8-NEXT: xxmrghd vs63, vs1, vs63
; P8-NEXT: bl __extendhfsf2
; P8-NEXT: nop
-; P8-NEXT: lhz r3, 0(r30)
-; P8-NEXT: xxlor vs61, f1, f1
+; P8-NEXT: mr r3, r30
+; P8-NEXT: xxlor vs62, f1, f1
; P8-NEXT: bl __extendhfsf2
; P8-NEXT: nop
-; P8-NEXT: li r3, 80
-; P8-NEXT: xxmrghd vs35, vs63, vs62
-; P8-NEXT: xxmrghd vs34, vs61, vs1
+; P8-NEXT: li r3, 64
+; P8-NEXT: vmr v2, v31
+; P8-NEXT: xxmrghd vs35, vs1, vs62
; P8-NEXT: ld r30, 96(r1) # 8-byte Folded Reload
+; P8-NEXT: ld r29, 88(r1) # 8-byte Folded Reload
+; P8-NEXT: ld r28, 80(r1) # 8-byte Folded Reload
; P8-NEXT: lxvd2x vs63, r1, r3 # 16-byte Folded Reload
-; P8-NEXT: li r3, 64
-; P8-NEXT: lxvd2x vs62, r1, r3 # 16-byte Folded Reload
; P8-NEXT: li r3, 48
-; P8-NEXT: lxvd2x vs61, r1, r3 # 16-byte Folded Reload
+; P8-NEXT: lxvd2x vs62, r1, r3 # 16-byte Folded Reload
; P8-NEXT: addi r1, r1, 112
; P8-NEXT: ld r0, 16(r1)
; P8-NEXT: mtlr r0
@@ -1642,36 +1508,43 @@ define <4 x double> @test_extend64_vec4(ptr %p) nounwind {
; BE-LABEL: test_extend64_vec4:
; BE: # %bb.0:
; BE-NEXT: mflr r0
-; BE-NEXT: stdu r1, -160(r1)
-; BE-NEXT: std r0, 176(r1)
-; BE-NEXT: std r30, 120(r1) # 8-byte Folded Spill
-; BE-NEXT: mr r30, r3
-; BE-NEXT: lhz r3, 6(r3)
-; BE-NEXT: stfd f29, 136(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f30, 144(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f31, 152(r1) # 8-byte Folded Spill
+; BE-NEXT: stdu r1, -176(r1)
+; BE-NEXT: std r0, 192(r1)
+; BE-NEXT: std r28, 120(r1) # 8-byte Folded Spill
+; BE-NEXT: std r29, 128(r1) # 8-byte Folded Spill
+; BE-NEXT: std r30, 136(r1) # 8-byte Folded Spill
+; BE-NEXT: lhz r30, 6(r3)
+; BE-NEXT: lhz r29, 4(r3)
+; BE-NEXT: lhz r28, 2(r3)
+; BE-NEXT: lhz r3, 0(r3)
+; BE-NEXT: stfd f29, 152(r1) # 8-byte Folded Spill
+; BE-NEXT: stfd f30, 160(r1) # 8-byte Folded Spill
+; BE-NEXT: stfd f31, 168(r1) # 8-byte Folded Spill
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: lhz r3, 4(r30)
+; BE-NEXT: mr r3, r28
; BE-NEXT: fmr f31, f1
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: lhz r3, 2(r30)
+; BE-NEXT: mr r3, r29
; BE-NEXT: fmr f30, f1
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: lhz r3, 0(r30)
+; BE-NEXT: mr r3, r30
; BE-NEXT: fmr f29, f1
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: fmr f2, f29
-; BE-NEXT: fmr f3, f30
-; BE-NEXT: lfd f30, 144(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f29, 136(r1) # 8-byte Folded Reload
-; BE-NEXT: fmr f4, f31
-; BE-NEXT: lfd f31, 152(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r30, 120(r1) # 8-byte Folded Reload
-; BE-NEXT: addi r1, r1, 160
+; BE-NEXT: fmr f4, f1
+; BE-NEXT: fmr f1, f31
+; BE-NEXT: lfd f31, 168(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r30, 136(r1) # 8-byte Folded Reload
+; BE-NEXT: fmr f2, f30
+; BE-NEXT: fmr f3, f29
+; BE-NEXT: lfd f30, 160(r1) # 8-byte Folded Reload
+; BE-NEXT: lfd f29, 152(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r29, 128(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r28, 120(r1) # 8-byte Folded Reload
+; BE-NEXT: addi r1, r1, 176
; BE-NEXT: ld r0, 16(r1)
; BE-NEXT: mtlr r0
; BE-NEXT: blr
@@ -1800,56 +1673,30 @@ define void @test_trunc32_vec4(<4 x float> %a, ptr %p) nounwind {
; SOFT-NEXT: std r29, -24(r1) # 8-byte Folded Spill
; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; SOFT-NEXT: stdu r1, -80(r1)
-; SOFT-NEXT: mr r27, r3
-; SOFT-NEXT: clrldi r3, r6, 32
+; SOFT-NEXT: clrldi r3, r3, 32
; SOFT-NEXT: std r0, 96(r1)
; SOFT-NEXT: mr r30, r7
-; SOFT-NEXT: mr r29, r5
-; SOFT-NEXT: mr r28, r4
+; SOFT-NEXT: mr r29, r6
+; SOFT-NEXT: mr r28, r5
+; SOFT-NEXT: mr r27, r4
; SOFT-NEXT: bl __truncsfhf2
; SOFT-NEXT: nop
; SOFT-NEXT: mr r26, r3
-; SOFT-NEXT: clrldi r3, r29, 32
-; SOFT-NEXT: bl __truncsfhf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: mr r29, r3
-; SOFT-NEXT: clrldi r3, r28, 32
-; SOFT-NEXT: bl __truncsfhf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: mr r28, r3
; SOFT-NEXT: clrldi r3, r27, 32
; SOFT-NEXT: bl __truncsfhf2
; SOFT-NEXT: nop
-; SOFT-NEXT: clrldi r3, r3, 48
-; SOFT-NEXT: bl __extendhfsf2
-; SOFT-NEXT: nop
; SOFT-NEXT: mr r27, r3
-; SOFT-NEXT: clrldi r3, r28, 48
-; SOFT-NEXT: bl __extendhfsf2
+; SOFT-NEXT: clrldi r3, r28, 32
+; SOFT-NEXT: bl __truncsfhf2
; SOFT-NEXT: nop
; SOFT-NEXT: mr r28, r3
-; SOFT-NEXT: clrldi r3, r29, 48
-; SOFT-NEXT: bl __extendhfsf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: mr r29, r3
-; SOFT-NEXT: clrldi r3, r26, 48
-; SOFT-NEXT: bl __extendhfsf2
-; SOFT-NEXT: nop
+; SOFT-NEXT: clrldi r3, r29, 32
; SOFT-NEXT: bl __truncsfhf2
; SOFT-NEXT: nop
; SOFT-NEXT: sth r3, 6(r30)
-; SOFT-NEXT: mr r3, r29
-; SOFT-NEXT: bl __truncsfhf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: sth r3, 4(r30)
-; SOFT-NEXT: mr r3, r28
-; SOFT-NEXT: bl __truncsfhf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: sth r3, 2(r30)
-; SOFT-NEXT: mr r3, r27
-; SOFT-NEXT: bl __truncsfhf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: sth r3, 0(r30)
+; SOFT-NEXT: sth r28, 4(r30)
+; SOFT-NEXT: sth r27, 2(r30)
+; SOFT-NEXT: sth r26, 0(r30)
; SOFT-NEXT: addi r1, r1, 80
; SOFT-NEXT: ld r0, 16(r1)
; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
@@ -2021,56 +1868,29 @@ define void @test_trunc64_vec4(<4 x double> %a, ptr %p) nounwind {
; SOFT-NEXT: std r29, -24(r1) # 8-byte Folded Spill
; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; SOFT-NEXT: stdu r1, -80(r1)
-; SOFT-NEXT: mr r27, r3
-; SOFT-NEXT: mr r3, r6
; SOFT-NEXT: std r0, 96(r1)
; SOFT-NEXT: mr r30, r7
-; SOFT-NEXT: mr r29, r5
-; SOFT-NEXT: mr r28, r4
+; SOFT-NEXT: mr r29, r6
+; SOFT-NEXT: mr r28, r5
+; SOFT-NEXT: mr r27, r4
; SOFT-NEXT: bl __truncdfhf2
; SOFT-NEXT: nop
; SOFT-NEXT: mr r26, r3
-; SOFT-NEXT: mr r3, r29
+; SOFT-NEXT: mr r3, r27
; SOFT-NEXT: bl __truncdfhf2
; SOFT-NEXT: nop
-; SOFT-NEXT: mr r29, r3
+; SOFT-NEXT: mr r27, r3
; SOFT-NEXT: mr r3, r28
; SOFT-NEXT: bl __truncdfhf2
; SOFT-NEXT: nop
; SOFT-NEXT: mr r28, r3
-; SOFT-NEXT: mr r3, r27
+; SOFT-NEXT: mr r3, r29
; SOFT-NEXT: bl __truncdfhf2
; SOFT-NEXT: nop
-; SOFT-NEXT: clrldi r3, r3, 48
-; SOFT-NEXT: bl __extendhfsf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: mr r27, r3
-; SOFT-NEXT: clrldi r3, r28, 48
-; SOFT-NEXT: bl __extendhfsf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: mr r28, r3
-; SOFT-NEXT: clrldi r3, r29, 48
-; SOFT-NEXT: bl __extendhfsf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: mr r29, r3
-; SOFT-NEXT: clrldi r3, r26, 48
-; SOFT-NEXT: bl __extendhfsf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: bl __truncsfhf2
-; SOFT-NEXT: nop
; SOFT-NEXT: sth r3, 6(r30)
-; SOFT-NEXT: mr r3, r29
-; SOFT-NEXT: bl __truncsfhf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: sth r3, 4(r30)
-; SOFT-NEXT: mr r3, r28
-; SOFT-NEXT: bl __truncsfhf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: sth r3, 2(r30)
-; SOFT-NEXT: mr r3, r27
-; SOFT-NEXT: bl __truncsfhf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: sth r3, 0(r30)
+; SOFT-NEXT: sth r28, 4(r30)
+; SOFT-NEXT: sth r27, 2(r30)
+; SOFT-NEXT: sth r26, 0(r30)
; SOFT-NEXT: addi r1, r1, 80
; SOFT-NEXT: ld r0, 16(r1)
; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
@@ -2136,25 +1956,28 @@ define float @test_sitofp_fadd_i32(i32 %a, ptr %b) nounwind {
; PPC32-NEXT: mflr r0
; PPC32-NEXT: stwu r1, -32(r1)
; PPC32-NEXT: stw r0, 36(r1)
+; PPC32-NEXT: lis r5, 17200
+; PPC32-NEXT: xoris r3, r3, 32768
; PPC32-NEXT: stw r30, 16(r1) # 4-byte Folded Spill
-; PPC32-NEXT: mr r30, r3
-; PPC32-NEXT: lhz r3, 0(r4)
-; PPC32-NEXT: stfd f31, 24(r1) # 8-byte Folded Spill
-; PPC32-NEXT: bl __extendhfsf2
-; PPC32-NEXT: lis r3, 17200
-; PPC32-NEXT: stw r3, 8(r1)
-; PPC32-NEXT: xoris r3, r30, 32768
+; PPC32-NEXT: lhz r30, 0(r4)
+; PPC32-NEXT: stw r5, 8(r1)
; PPC32-NEXT: stw r3, 12(r1)
; PPC32-NEXT: lis r3, .LCPI23_0 at ha
-; PPC32-NEXT: fmr f31, f1
; PPC32-NEXT: lfd f0, 8(r1)
; PPC32-NEXT: lfs f1, .LCPI23_0 at l(r3)
+; PPC32-NEXT: stfd f31, 24(r1) # 8-byte Folded Spill
; PPC32-NEXT: fsub f0, f0, f1
; PPC32-NEXT: frsp f1, f0
; PPC32-NEXT: bl __truncsfhf2
; PPC32-NEXT: clrlwi r3, r3, 16
; PPC32-NEXT: bl __extendhfsf2
-; PPC32-NEXT: fadds f1, f31, f1
+; PPC32-NEXT: mr r3, r30
+; PPC32-NEXT: fmr f31, f1
+; PPC32-NEXT: bl __extendhfsf2
+; PPC32-NEXT: fadds f1, f1, f31
+; PPC32-NEXT: bl __truncsfhf2
+; PPC32-NEXT: clrlwi r3, r3, 16
+; PPC32-NEXT: bl __extendhfsf2
; PPC32-NEXT: lfd f31, 24(r1) # 8-byte Folded Reload
; PPC32-NEXT: lwz r30, 16(r1) # 4-byte Folded Reload
; PPC32-NEXT: lwz r0, 36(r1)
@@ -2168,20 +1991,25 @@ define float @test_sitofp_fadd_i32(i32 %a, ptr %b) nounwind {
; P8-NEXT: std r30, -24(r1) # 8-byte Folded Spill
; P8-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
; P8-NEXT: stdu r1, -64(r1)
+; P8-NEXT: mtfprwa f0, r3
; P8-NEXT: std r0, 80(r1)
-; P8-NEXT: mr r30, r3
-; P8-NEXT: lhz r3, 0(r4)
+; P8-NEXT: lhz r30, 0(r4)
+; P8-NEXT: xscvsxdsp f1, f0
+; P8-NEXT: bl __truncsfhf2
+; P8-NEXT: nop
+; P8-NEXT: clrldi r3, r3, 48
; P8-NEXT: bl __extendhfsf2
; P8-NEXT: nop
-; P8-NEXT: mtfprwa f0, r30
+; P8-NEXT: mr r3, r30
; P8-NEXT: fmr f31, f1
-; P8-NEXT: xscvsxdsp f1, f0
+; P8-NEXT: bl __extendhfsf2
+; P8-NEXT: nop
+; P8-NEXT: xsaddsp f1, f1, f31
; P8-NEXT: bl __truncsfhf2
; P8-NEXT: nop
; P8-NEXT: clrldi r3, r3, 48
; P8-NEXT: bl __extendhfsf2
; P8-NEXT: nop
-; P8-NEXT: xsaddsp f1, f31, f1
; P8-NEXT: addi r1, r1, 64
; P8-NEXT: ld r0, 16(r1)
; P8-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
@@ -2191,17 +2019,22 @@ define float @test_sitofp_fadd_i32(i32 %a, ptr %b) nounwind {
;
; CHECK-LABEL: test_sitofp_fadd_i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: mtfprwa f1, r3
+; CHECK-NEXT: mtfprwa f0, r3
; CHECK-NEXT: lhz r4, 0(r4)
-; CHECK-NEXT: xscvsxdsp f1, f1
+; CHECK-NEXT: xscvsxdsp f0, f0
+; CHECK-NEXT: xscvdphp f0, f0
+; CHECK-NEXT: mffprwz r3, f0
; CHECK-NEXT: mtfprwz f0, r4
-; CHECK-NEXT: xscvhpdp f0, f0
-; CHECK-NEXT: xscvdphp f1, f1
-; CHECK-NEXT: mffprwz r3, f1
; CHECK-NEXT: clrlwi r3, r3, 16
+; CHECK-NEXT: xscvhpdp f0, f0
; CHECK-NEXT: mtfprwz f1, r3
; CHECK-NEXT: xscvhpdp f1, f1
-; CHECK-NEXT: xsaddsp f1, f0, f1
+; CHECK-NEXT: xsaddsp f0, f0, f1
+; CHECK-NEXT: xscvdphp f0, f0
+; CHECK-NEXT: mffprwz r3, f0
+; CHECK-NEXT: clrlwi r3, r3, 16
+; CHECK-NEXT: mtfprwz f0, r3
+; CHECK-NEXT: xscvhpdp f1, f0
; CHECK-NEXT: blr
;
; SOFT-LABEL: test_sitofp_fadd_i32:
@@ -2210,25 +2043,31 @@ define float @test_sitofp_fadd_i32(i32 %a, ptr %b) nounwind {
; SOFT-NEXT: std r29, -24(r1) # 8-byte Folded Spill
; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; SOFT-NEXT: stdu r1, -64(r1)
+; SOFT-NEXT: extsw r3, r3
; SOFT-NEXT: std r0, 80(r1)
-; SOFT-NEXT: mr r30, r3
-; SOFT-NEXT: lhz r3, 0(r4)
-; SOFT-NEXT: bl __extendhfsf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: mr r29, r3
-; SOFT-NEXT: extsw r3, r30
+; SOFT-NEXT: mr r30, r4
; SOFT-NEXT: bl __floatsisf
; SOFT-NEXT: nop
; SOFT-NEXT: clrldi r3, r3, 32
; SOFT-NEXT: bl __truncsfhf2
; SOFT-NEXT: nop
-; SOFT-NEXT: clrldi r3, r3, 48
+; SOFT-NEXT: mr r29, r3
+; SOFT-NEXT: lhz r3, 0(r30)
+; SOFT-NEXT: bl __extendhfsf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: mr r30, r3
+; SOFT-NEXT: clrldi r3, r29, 48
; SOFT-NEXT: bl __extendhfsf2
; SOFT-NEXT: nop
; SOFT-NEXT: mr r4, r3
-; SOFT-NEXT: mr r3, r29
+; SOFT-NEXT: mr r3, r30
; SOFT-NEXT: bl __addsf3
; SOFT-NEXT: nop
+; SOFT-NEXT: bl __truncsfhf2
+; SOFT-NEXT: nop
+; SOFT-NEXT: clrldi r3, r3, 48
+; SOFT-NEXT: bl __extendhfsf2
+; SOFT-NEXT: nop
; SOFT-NEXT: addi r1, r1, 64
; SOFT-NEXT: ld r0, 16(r1)
; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
@@ -2241,24 +2080,29 @@ define float @test_sitofp_fadd_i32(i32 %a, ptr %b) nounwind {
; BE-NEXT: mflr r0
; BE-NEXT: stdu r1, -144(r1)
; BE-NEXT: std r0, 160(r1)
+; BE-NEXT: extsw r3, r3
; BE-NEXT: std r30, 120(r1) # 8-byte Folded Spill
-; BE-NEXT: mr r30, r3
-; BE-NEXT: lhz r3, 0(r4)
-; BE-NEXT: stfd f31, 136(r1) # 8-byte Folded Spill
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: extsw r3, r30
-; BE-NEXT: fmr f31, f1
+; BE-NEXT: lhz r30, 0(r4)
; BE-NEXT: std r3, 112(r1)
; BE-NEXT: lfd f0, 112(r1)
; BE-NEXT: fcfid f0, f0
+; BE-NEXT: stfd f31, 136(r1) # 8-byte Folded Spill
; BE-NEXT: frsp f1, f0
; BE-NEXT: bl __truncsfhf2
; BE-NEXT: nop
; BE-NEXT: clrldi r3, r3, 48
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: fadds f1, f31, f1
+; BE-NEXT: mr r3, r30
+; BE-NEXT: fmr f31, f1
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
+; BE-NEXT: fadds f1, f1, f31
+; BE-NEXT: bl __truncsfhf2
+; BE-NEXT: nop
+; BE-NEXT: clrldi r3, r3, 48
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
; BE-NEXT: lfd f31, 136(r1) # 8-byte Folded Reload
; BE-NEXT: ld r30, 120(r1) # 8-byte Folded Reload
; BE-NEXT: addi r1, r1, 144
@@ -2276,21 +2120,17 @@ define half @PR40273(half) nounwind {
; PPC32: # %bb.0:
; PPC32-NEXT: mflr r0
; PPC32-NEXT: stwu r1, -16(r1)
-; PPC32-NEXT: stw r0, 20(r1)
-; PPC32-NEXT: bl __truncsfhf2
; PPC32-NEXT: clrlwi r3, r3, 16
+; PPC32-NEXT: stw r0, 20(r1)
; PPC32-NEXT: bl __extendhfsf2
; PPC32-NEXT: lis r3, .LCPI24_0 at ha
; PPC32-NEXT: lfs f0, .LCPI24_0 at l(r3)
-; PPC32-NEXT: li r3, 0
+; PPC32-NEXT: li r3, 15360
; PPC32-NEXT: fcmpu cr0, f1, f0
-; PPC32-NEXT: bc 12, eq, .LBB24_2
+; PPC32-NEXT: bne cr0, .LBB24_2
; PPC32-NEXT: # %bb.1:
-; PPC32-NEXT: li r3, 4
+; PPC32-NEXT: li r3, 0
; PPC32-NEXT: .LBB24_2:
-; PPC32-NEXT: li r4, .LCPI24_1 at l
-; PPC32-NEXT: addis r4, r4, .LCPI24_1 at ha
-; PPC32-NEXT: lfsx f1, r4, r3
; PPC32-NEXT: lwz r0, 20(r1)
; PPC32-NEXT: addi r1, r1, 16
; PPC32-NEXT: mtlr r0
@@ -2300,20 +2140,14 @@ define half @PR40273(half) nounwind {
; P8: # %bb.0:
; P8-NEXT: mflr r0
; P8-NEXT: stdu r1, -32(r1)
-; P8-NEXT: std r0, 48(r1)
-; P8-NEXT: bl __truncsfhf2
-; P8-NEXT: nop
; P8-NEXT: clrldi r3, r3, 48
+; P8-NEXT: std r0, 48(r1)
; P8-NEXT: bl __extendhfsf2
; P8-NEXT: nop
-; P8-NEXT: fmr f0, f1
-; P8-NEXT: xxlxor f1, f1, f1
-; P8-NEXT: fcmpu cr0, f0, f1
-; P8-NEXT: beq cr0, .LBB24_2
-; P8-NEXT: # %bb.1:
-; P8-NEXT: vspltisw v2, 1
-; P8-NEXT: xvcvsxwdp vs1, vs34
-; P8-NEXT: .LBB24_2:
+; P8-NEXT: xxlxor f0, f0, f0
+; P8-NEXT: li r3, 15360
+; P8-NEXT: fcmpu cr0, f1, f0
+; P8-NEXT: iseleq r3, 0, r3
; P8-NEXT: addi r1, r1, 32
; P8-NEXT: ld r0, 16(r1)
; P8-NEXT: mtlr r0
@@ -2321,17 +2155,13 @@ define half @PR40273(half) nounwind {
;
; CHECK-LABEL: PR40273:
; CHECK: # %bb.0:
-; CHECK-NEXT: xscvdphp f0, f1
-; CHECK-NEXT: xxlxor f1, f1, f1
-; CHECK-NEXT: mffprwz r3, f0
; CHECK-NEXT: clrlwi r3, r3, 16
+; CHECK-NEXT: xxlxor f1, f1, f1
; CHECK-NEXT: mtfprwz f0, r3
+; CHECK-NEXT: li r3, 15360
; CHECK-NEXT: xscvhpdp f0, f0
; CHECK-NEXT: fcmpu cr0, f0, f1
-; CHECK-NEXT: beqlr cr0
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: vspltisw v2, 1
-; CHECK-NEXT: xvcvsxwdp vs1, vs34
+; CHECK-NEXT: iseleq r3, 0, r3
; CHECK-NEXT: blr
;
; SOFT-LABEL: PR40273:
@@ -2346,10 +2176,8 @@ define half @PR40273(half) nounwind {
; SOFT-NEXT: bl __nesf2
; SOFT-NEXT: nop
; SOFT-NEXT: cmplwi r3, 0
-; SOFT-NEXT: lis r3, 16256
+; SOFT-NEXT: li r3, 15360
; SOFT-NEXT: iseleq r3, 0, r3
-; SOFT-NEXT: bl __truncsfhf2
-; SOFT-NEXT: nop
; SOFT-NEXT: addi r1, r1, 32
; SOFT-NEXT: ld r0, 16(r1)
; SOFT-NEXT: mtlr r0
@@ -2359,23 +2187,18 @@ define half @PR40273(half) nounwind {
; BE: # %bb.0:
; BE-NEXT: mflr r0
; BE-NEXT: stdu r1, -112(r1)
-; BE-NEXT: std r0, 128(r1)
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
; BE-NEXT: clrldi r3, r3, 48
+; BE-NEXT: std r0, 128(r1)
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
; BE-NEXT: addis r3, r2, .LCPI24_0 at toc@ha
; BE-NEXT: lfs f0, .LCPI24_0 at toc@l(r3)
-; BE-NEXT: li r3, 0
+; BE-NEXT: li r3, 15360
; BE-NEXT: fcmpu cr0, f1, f0
-; BE-NEXT: bc 12, eq, .LBB24_2
+; BE-NEXT: bne cr0, .LBB24_2
; BE-NEXT: # %bb.1:
-; BE-NEXT: li r3, 4
+; BE-NEXT: li r3, 0
; BE-NEXT: .LBB24_2:
-; BE-NEXT: addis r4, r2, .LCPI24_1 at toc@ha
-; BE-NEXT: addi r4, r4, .LCPI24_1 at toc@l
-; BE-NEXT: lfsx f1, r4, r3
; BE-NEXT: addi r1, r1, 112
; BE-NEXT: ld r0, 16(r1)
; BE-NEXT: mtlr r0
@@ -2390,42 +2213,17 @@ define half @PR40273(half) nounwind {
define half @fabs(half %x) nounwind {
; PPC32-LABEL: fabs:
; PPC32: # %bb.0:
-; PPC32-NEXT: mflr r0
-; PPC32-NEXT: stwu r1, -16(r1)
-; PPC32-NEXT: stw r0, 20(r1)
-; PPC32-NEXT: bl __truncsfhf2
-; PPC32-NEXT: clrlwi r3, r3, 16
-; PPC32-NEXT: bl __extendhfsf2
-; PPC32-NEXT: fabs f1, f1
-; PPC32-NEXT: lwz r0, 20(r1)
-; PPC32-NEXT: addi r1, r1, 16
-; PPC32-NEXT: mtlr r0
+; PPC32-NEXT: clrlwi r3, r3, 17
; PPC32-NEXT: blr
;
; P8-LABEL: fabs:
; P8: # %bb.0:
-; P8-NEXT: mflr r0
-; P8-NEXT: stdu r1, -32(r1)
-; P8-NEXT: std r0, 48(r1)
-; P8-NEXT: bl __truncsfhf2
-; P8-NEXT: nop
-; P8-NEXT: clrldi r3, r3, 48
-; P8-NEXT: bl __extendhfsf2
-; P8-NEXT: nop
-; P8-NEXT: xsabsdp f1, f1
-; P8-NEXT: addi r1, r1, 32
-; P8-NEXT: ld r0, 16(r1)
-; P8-NEXT: mtlr r0
+; P8-NEXT: clrldi r3, r3, 49
; P8-NEXT: blr
;
; CHECK-LABEL: fabs:
; CHECK: # %bb.0:
-; CHECK-NEXT: xscvdphp f0, f1
-; CHECK-NEXT: mffprwz r3, f0
-; CHECK-NEXT: clrlwi r3, r3, 16
-; CHECK-NEXT: mtfprwz f0, r3
-; CHECK-NEXT: xscvhpdp f0, f0
-; CHECK-NEXT: xsabsdp f1, f0
+; CHECK-NEXT: clrldi r3, r3, 49
; CHECK-NEXT: blr
;
; SOFT-LABEL: fabs:
@@ -2435,18 +2233,7 @@ define half @fabs(half %x) nounwind {
;
; BE-LABEL: fabs:
; BE: # %bb.0:
-; BE-NEXT: mflr r0
-; BE-NEXT: stdu r1, -112(r1)
-; BE-NEXT: std r0, 128(r1)
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r3, 48
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: fabs f1, f1
-; BE-NEXT: addi r1, r1, 112
-; BE-NEXT: ld r0, 16(r1)
-; BE-NEXT: mtlr r0
+; BE-NEXT: clrldi r3, r3, 49
; BE-NEXT: blr
%a = call half @llvm.fabs.f16(half %x)
ret half %a
@@ -2455,107 +2242,35 @@ define half @fabs(half %x) nounwind {
define half @fcopysign(half %x, half %y) nounwind {
; PPC32-LABEL: fcopysign:
; PPC32: # %bb.0:
-; PPC32-NEXT: mflr r0
-; PPC32-NEXT: stwu r1, -32(r1)
-; PPC32-NEXT: stw r0, 36(r1)
-; PPC32-NEXT: stfd f31, 24(r1) # 8-byte Folded Spill
-; PPC32-NEXT: fmr f31, f2
-; PPC32-NEXT: bl __truncsfhf2
-; PPC32-NEXT: clrlwi r3, r3, 16
-; PPC32-NEXT: bl __extendhfsf2
-; PPC32-NEXT: stfs f31, 20(r1)
-; PPC32-NEXT: lwz r3, 20(r1)
-; PPC32-NEXT: srwi r3, r3, 31
-; PPC32-NEXT: andi. r3, r3, 1
-; PPC32-NEXT: bc 12, gt, .LBB26_2
-; PPC32-NEXT: # %bb.1:
-; PPC32-NEXT: fabs f1, f1
-; PPC32-NEXT: b .LBB26_3
-; PPC32-NEXT: .LBB26_2:
-; PPC32-NEXT: fnabs f1, f1
-; PPC32-NEXT: .LBB26_3:
-; PPC32-NEXT: lfd f31, 24(r1) # 8-byte Folded Reload
-; PPC32-NEXT: lwz r0, 36(r1)
-; PPC32-NEXT: addi r1, r1, 32
-; PPC32-NEXT: mtlr r0
+; PPC32-NEXT: rlwimi r3, r4, 0, 0, 16
; PPC32-NEXT: blr
;
; P8-LABEL: fcopysign:
; P8: # %bb.0:
-; P8-NEXT: mflr r0
-; P8-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
-; P8-NEXT: stdu r1, -48(r1)
-; P8-NEXT: std r0, 64(r1)
-; P8-NEXT: fmr f31, f2
-; P8-NEXT: bl __truncsfhf2
-; P8-NEXT: nop
-; P8-NEXT: clrldi r3, r3, 48
-; P8-NEXT: bl __extendhfsf2
-; P8-NEXT: nop
-; P8-NEXT: fcpsgn f1, f31, f1
-; P8-NEXT: addi r1, r1, 48
-; P8-NEXT: ld r0, 16(r1)
-; P8-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
-; P8-NEXT: mtlr r0
+; P8-NEXT: rotldi r4, r4, 49
+; P8-NEXT: clrldi r3, r3, 49
+; P8-NEXT: rldimi r3, r4, 15, 32
; P8-NEXT: blr
;
; CHECK-LABEL: fcopysign:
; CHECK: # %bb.0:
-; CHECK-NEXT: xscvdphp f0, f1
-; CHECK-NEXT: mffprwz r3, f0
-; CHECK-NEXT: clrlwi r3, r3, 16
-; CHECK-NEXT: mtfprwz f0, r3
-; CHECK-NEXT: xscvhpdp f0, f0
-; CHECK-NEXT: fcpsgn f1, f2, f0
+; CHECK-NEXT: rotldi r4, r4, 49
+; CHECK-NEXT: clrldi r3, r3, 49
+; CHECK-NEXT: rldimi r3, r4, 15, 32
; CHECK-NEXT: blr
;
; SOFT-LABEL: fcopysign:
; SOFT: # %bb.0:
-; SOFT-NEXT: mflr r0
-; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill
-; SOFT-NEXT: stdu r1, -48(r1)
-; SOFT-NEXT: clrldi r3, r3, 48
-; SOFT-NEXT: std r0, 64(r1)
-; SOFT-NEXT: mr r30, r4
-; SOFT-NEXT: bl __extendhfsf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: rlwimi r3, r30, 16, 0, 0
-; SOFT-NEXT: clrldi r3, r3, 32
-; SOFT-NEXT: bl __truncsfhf2
-; SOFT-NEXT: nop
-; SOFT-NEXT: addi r1, r1, 48
-; SOFT-NEXT: ld r0, 16(r1)
-; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
-; SOFT-NEXT: mtlr r0
+; SOFT-NEXT: rotldi r4, r4, 49
+; SOFT-NEXT: clrldi r3, r3, 49
+; SOFT-NEXT: rldimi r3, r4, 15, 32
; SOFT-NEXT: blr
;
; BE-LABEL: fcopysign:
; BE: # %bb.0:
-; BE-NEXT: mflr r0
-; BE-NEXT: stdu r1, -128(r1)
-; BE-NEXT: std r0, 144(r1)
-; BE-NEXT: stfd f31, 120(r1) # 8-byte Folded Spill
-; BE-NEXT: fmr f31, f2
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r3, 48
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: stfs f31, 116(r1)
-; BE-NEXT: lwz r3, 116(r1)
-; BE-NEXT: srwi r3, r3, 31
-; BE-NEXT: andi. r3, r3, 1
-; BE-NEXT: bc 12, gt, .LBB26_2
-; BE-NEXT: # %bb.1:
-; BE-NEXT: fabs f1, f1
-; BE-NEXT: b .LBB26_3
-; BE-NEXT: .LBB26_2:
-; BE-NEXT: fnabs f1, f1
-; BE-NEXT: .LBB26_3:
-; BE-NEXT: lfd f31, 120(r1) # 8-byte Folded Reload
-; BE-NEXT: addi r1, r1, 128
-; BE-NEXT: ld r0, 16(r1)
-; BE-NEXT: mtlr r0
+; BE-NEXT: rotldi r4, r4, 49
+; BE-NEXT: clrldi r3, r3, 49
+; BE-NEXT: rldimi r3, r4, 15, 32
; BE-NEXT: blr
%a = call half @llvm.copysign.f16(half %x, half %y)
ret half %a
diff --git a/llvm/test/CodeGen/PowerPC/ldexp.ll b/llvm/test/CodeGen/PowerPC/ldexp.ll
index 8d7253b5ce8e3..3c8439683cba5 100644
--- a/llvm/test/CodeGen/PowerPC/ldexp.ll
+++ b/llvm/test/CodeGen/PowerPC/ldexp.ll
@@ -143,15 +143,15 @@ define half @ldexp_f16(half %arg0, i32 %arg1) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -32(r1)
+; CHECK-NEXT: clrlwi r3, r3, 16
; CHECK-NEXT: std r0, 48(r1)
-; CHECK-NEXT: xscvdphp f0, f1
; CHECK-NEXT: extsw r4, r4
-; CHECK-NEXT: mffprwz r3, f0
-; CHECK-NEXT: clrlwi r3, r3, 16
; CHECK-NEXT: mtfprwz f0, r3
; CHECK-NEXT: xscvhpdp f1, f0
; CHECK-NEXT: bl ldexpf
; CHECK-NEXT: nop
+; CHECK-NEXT: xscvdphp f0, f1
+; CHECK-NEXT: mffprwz r3, f0
; CHECK-NEXT: addi r1, r1, 32
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
diff --git a/llvm/test/CodeGen/PowerPC/llvm.frexp.ll b/llvm/test/CodeGen/PowerPC/llvm.frexp.ll
index 95d763d7179ed..b0f9fd47a1e54 100644
--- a/llvm/test/CodeGen/PowerPC/llvm.frexp.ll
+++ b/llvm/test/CodeGen/PowerPC/llvm.frexp.ll
@@ -7,16 +7,16 @@ define { half, i32 } @test_frexp_f16_i32(half %a) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -48(r1)
+; CHECK-NEXT: clrlwi r3, r3, 16
; CHECK-NEXT: std r0, 64(r1)
-; CHECK-NEXT: xscvdphp f0, f1
; CHECK-NEXT: addi r4, r1, 44
-; CHECK-NEXT: mffprwz r3, f0
-; CHECK-NEXT: clrlwi r3, r3, 16
; CHECK-NEXT: mtfprwz f0, r3
; CHECK-NEXT: xscvhpdp f1, f0
; CHECK-NEXT: bl frexpf
; CHECK-NEXT: nop
-; CHECK-NEXT: lwz r3, 44(r1)
+; CHECK-NEXT: xscvdphp f0, f1
+; CHECK-NEXT: lwz r4, 44(r1)
+; CHECK-NEXT: mffprwz r3, f0
; CHECK-NEXT: addi r1, r1, 48
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
@@ -30,15 +30,15 @@ define half @test_frexp_f16_i32_only_use_fract(half %a) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -48(r1)
+; CHECK-NEXT: clrlwi r3, r3, 16
; CHECK-NEXT: std r0, 64(r1)
-; CHECK-NEXT: xscvdphp f0, f1
; CHECK-NEXT: addi r4, r1, 44
-; CHECK-NEXT: mffprwz r3, f0
-; CHECK-NEXT: clrlwi r3, r3, 16
; CHECK-NEXT: mtfprwz f0, r3
; CHECK-NEXT: xscvhpdp f1, f0
; CHECK-NEXT: bl frexpf
; CHECK-NEXT: nop
+; CHECK-NEXT: xscvdphp f0, f1
+; CHECK-NEXT: mffprwz r3, f0
; CHECK-NEXT: addi r1, r1, 48
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
@@ -53,11 +53,9 @@ define i32 @test_frexp_f16_i32_only_use_exp(half %a) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -48(r1)
+; CHECK-NEXT: clrlwi r3, r3, 16
; CHECK-NEXT: std r0, 64(r1)
-; CHECK-NEXT: xscvdphp f0, f1
; CHECK-NEXT: addi r4, r1, 44
-; CHECK-NEXT: mffprwz r3, f0
-; CHECK-NEXT: clrlwi r3, r3, 16
; CHECK-NEXT: mtfprwz f0, r3
; CHECK-NEXT: xscvhpdp f1, f0
; CHECK-NEXT: bl frexpf
@@ -76,43 +74,42 @@ define { <2 x half>, <2 x i32> } @test_frexp_v2f16_v2i32(<2 x half> %a) nounwind
; CHECK-LABEL: test_frexp_v2f16_v2i32:
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
-; CHECK-NEXT: std r29, -40(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r30, -32(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-NEXT: stdu r1, -80(r1)
+; CHECK-NEXT: clrlwi r29, r3, 16
+; CHECK-NEXT: clrlwi r3, r4, 16
+; CHECK-NEXT: addi r30, r1, 44
+; CHECK-NEXT: mtfprwz f0, r3
; CHECK-NEXT: std r0, 96(r1)
-; CHECK-NEXT: xscvdphp f0, f2
-; CHECK-NEXT: addi r30, r1, 32
; CHECK-NEXT: mr r4, r30
-; CHECK-NEXT: mffprwz r3, f0
-; CHECK-NEXT: clrlwi r3, r3, 16
-; CHECK-NEXT: mtfprwz f0, r3
-; CHECK-NEXT: xscvhpdp f31, f0
-; CHECK-NEXT: xscvdphp f0, f1
-; CHECK-NEXT: mffprwz r3, f0
-; CHECK-NEXT: clrlwi r3, r3, 16
-; CHECK-NEXT: mtfprwz f0, r3
; CHECK-NEXT: xscvhpdp f1, f0
; CHECK-NEXT: bl frexpf
; CHECK-NEXT: nop
-; CHECK-NEXT: addi r29, r1, 36
-; CHECK-NEXT: fmr f30, f1
-; CHECK-NEXT: fmr f1, f31
+; CHECK-NEXT: xscvdphp f0, f1
+; CHECK-NEXT: mffprwz r3, f0
+; CHECK-NEXT: mtfprwz f0, r29
+; CHECK-NEXT: addi r29, r1, 40
+; CHECK-NEXT: sth r3, 50(r1)
+; CHECK-NEXT: xscvhpdp f1, f0
; CHECK-NEXT: mr r4, r29
; CHECK-NEXT: bl frexpf
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f2, f1
-; CHECK-NEXT: lfiwzx f0, 0, r30
-; CHECK-NEXT: lfiwzx f1, 0, r29
+; CHECK-NEXT: xscvdphp f0, f1
+; CHECK-NEXT: li r4, 2
+; CHECK-NEXT: mffprwz r3, f0
+; CHECK-NEXT: sth r3, 48(r1)
+; CHECK-NEXT: li r3, 0
+; CHECK-NEXT: lxv v3, 48(r1)
+; CHECK-NEXT: lfiwzx f0, 0, r29
+; CHECK-NEXT: lfiwzx f1, 0, r30
; CHECK-NEXT: xxmrghw v2, vs1, vs0
-; CHECK-NEXT: fmr f1, f30
+; CHECK-NEXT: vextuhrx r3, r3, v3
+; CHECK-NEXT: vextuhrx r4, r4, v3
; CHECK-NEXT: addi r1, r1, 80
; CHECK-NEXT: ld r0, 16(r1)
-; CHECK-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r30, -32(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r29, -40(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
%result = call { <2 x half>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x half> %a)
@@ -123,34 +120,35 @@ define <2 x half> @test_frexp_v2f16_v2i32_only_use_fract(<2 x half> %a) nounwind
; CHECK-LABEL: test_frexp_v2f16_v2i32_only_use_fract:
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
-; CHECK-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stdu r1, -64(r1)
-; CHECK-NEXT: std r0, 80(r1)
-; CHECK-NEXT: xscvdphp f0, f2
-; CHECK-NEXT: addi r4, r1, 40
-; CHECK-NEXT: mffprwz r3, f0
-; CHECK-NEXT: clrlwi r3, r3, 16
-; CHECK-NEXT: mtfprwz f0, r3
-; CHECK-NEXT: xscvhpdp f31, f0
-; CHECK-NEXT: xscvdphp f0, f1
-; CHECK-NEXT: mffprwz r3, f0
-; CHECK-NEXT: clrlwi r3, r3, 16
+; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-NEXT: stdu r1, -80(r1)
+; CHECK-NEXT: clrlwi r30, r3, 16
+; CHECK-NEXT: clrlwi r3, r4, 16
+; CHECK-NEXT: addi r4, r1, 44
; CHECK-NEXT: mtfprwz f0, r3
+; CHECK-NEXT: std r0, 96(r1)
; CHECK-NEXT: xscvhpdp f1, f0
; CHECK-NEXT: bl frexpf
; CHECK-NEXT: nop
-; CHECK-NEXT: addi r4, r1, 44
-; CHECK-NEXT: fmr f30, f1
-; CHECK-NEXT: fmr f1, f31
+; CHECK-NEXT: xscvdphp f0, f1
+; CHECK-NEXT: addi r4, r1, 40
+; CHECK-NEXT: mffprwz r3, f0
+; CHECK-NEXT: mtfprwz f0, r30
+; CHECK-NEXT: sth r3, 50(r1)
+; CHECK-NEXT: xscvhpdp f1, f0
; CHECK-NEXT: bl frexpf
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f2, f1
-; CHECK-NEXT: fmr f1, f30
-; CHECK-NEXT: addi r1, r1, 64
+; CHECK-NEXT: xscvdphp f0, f1
+; CHECK-NEXT: li r4, 2
+; CHECK-NEXT: mffprwz r3, f0
+; CHECK-NEXT: sth r3, 48(r1)
+; CHECK-NEXT: li r3, 0
+; CHECK-NEXT: lxv v2, 48(r1)
+; CHECK-NEXT: vextuhrx r3, r3, v2
+; CHECK-NEXT: vextuhrx r4, r4, v2
+; CHECK-NEXT: addi r1, r1, 80
; CHECK-NEXT: ld r0, 16(r1)
-; CHECK-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
%result = call { <2 x half>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x half> %a)
@@ -162,38 +160,31 @@ define <2 x i32> @test_frexp_v2f16_v2i32_only_use_exp(<2 x half> %a) nounwind {
; CHECK-LABEL: test_frexp_v2f16_v2i32_only_use_exp:
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
-; CHECK-NEXT: std r29, -32(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r30, -24(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stdu r1, -80(r1)
-; CHECK-NEXT: std r0, 96(r1)
-; CHECK-NEXT: xscvdphp f0, f2
-; CHECK-NEXT: addi r30, r1, 40
-; CHECK-NEXT: mr r4, r30
-; CHECK-NEXT: mffprwz r3, f0
-; CHECK-NEXT: clrlwi r3, r3, 16
-; CHECK-NEXT: mtfprwz f0, r3
-; CHECK-NEXT: xscvhpdp f31, f0
-; CHECK-NEXT: xscvdphp f0, f1
-; CHECK-NEXT: mffprwz r3, f0
+; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-NEXT: stdu r1, -64(r1)
; CHECK-NEXT: clrlwi r3, r3, 16
+; CHECK-NEXT: std r0, 80(r1)
+; CHECK-NEXT: addi r30, r1, 32
; CHECK-NEXT: mtfprwz f0, r3
+; CHECK-NEXT: clrlwi r29, r4, 16
+; CHECK-NEXT: mr r4, r30
; CHECK-NEXT: xscvhpdp f1, f0
; CHECK-NEXT: bl frexpf
; CHECK-NEXT: nop
-; CHECK-NEXT: addi r29, r1, 44
-; CHECK-NEXT: fmr f1, f31
+; CHECK-NEXT: mtfprwz f0, r29
+; CHECK-NEXT: addi r29, r1, 36
+; CHECK-NEXT: xscvhpdp f1, f0
; CHECK-NEXT: mr r4, r29
; CHECK-NEXT: bl frexpf
; CHECK-NEXT: nop
; CHECK-NEXT: lfiwzx f0, 0, r30
; CHECK-NEXT: lfiwzx f1, 0, r29
; CHECK-NEXT: xxmrghw v2, vs1, vs0
-; CHECK-NEXT: addi r1, r1, 80
+; CHECK-NEXT: addi r1, r1, 64
; CHECK-NEXT: ld r0, 16(r1)
-; CHECK-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r30, -24(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r29, -32(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
%result = call { <2 x half>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x half> %a)
diff --git a/llvm/test/CodeGen/PowerPC/llvm.modf.ll b/llvm/test/CodeGen/PowerPC/llvm.modf.ll
index 1b137c786cc91..fa9082278826c 100644
--- a/llvm/test/CodeGen/PowerPC/llvm.modf.ll
+++ b/llvm/test/CodeGen/PowerPC/llvm.modf.ll
@@ -10,15 +10,17 @@ define { half, half } @test_modf_f16(half %a) {
; CHECK-NEXT: std r0, 64(r1)
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: .cfi_offset lr, 16
-; CHECK-NEXT: xscvdphp f0, f1
-; CHECK-NEXT: addi r4, r1, 44
-; CHECK-NEXT: mffprwz r3, f0
; CHECK-NEXT: clrlwi r3, r3, 16
+; CHECK-NEXT: addi r4, r1, 44
; CHECK-NEXT: mtfprwz f0, r3
; CHECK-NEXT: xscvhpdp f1, f0
; CHECK-NEXT: bl modff
; CHECK-NEXT: nop
-; CHECK-NEXT: lfs f2, 44(r1)
+; CHECK-NEXT: xscvdphp f0, f1
+; CHECK-NEXT: mffprwz r3, f0
+; CHECK-NEXT: lfs f0, 44(r1)
+; CHECK-NEXT: xscvdphp f0, f0
+; CHECK-NEXT: mffprwz r4, f0
; CHECK-NEXT: addi r1, r1, 48
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
@@ -35,14 +37,14 @@ define half @test_modf_f16_only_use_fractional_part(half %a) {
; CHECK-NEXT: std r0, 64(r1)
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: .cfi_offset lr, 16
-; CHECK-NEXT: xscvdphp f0, f1
-; CHECK-NEXT: addi r4, r1, 44
-; CHECK-NEXT: mffprwz r3, f0
; CHECK-NEXT: clrlwi r3, r3, 16
+; CHECK-NEXT: addi r4, r1, 44
; CHECK-NEXT: mtfprwz f0, r3
; CHECK-NEXT: xscvhpdp f1, f0
; CHECK-NEXT: bl modff
; CHECK-NEXT: nop
+; CHECK-NEXT: xscvdphp f0, f1
+; CHECK-NEXT: mffprwz r3, f0
; CHECK-NEXT: addi r1, r1, 48
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
@@ -60,15 +62,15 @@ define half @test_modf_f16_only_use_integral_part(half %a) {
; CHECK-NEXT: std r0, 64(r1)
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: .cfi_offset lr, 16
-; CHECK-NEXT: xscvdphp f0, f1
-; CHECK-NEXT: addi r4, r1, 44
-; CHECK-NEXT: mffprwz r3, f0
; CHECK-NEXT: clrlwi r3, r3, 16
+; CHECK-NEXT: addi r4, r1, 44
; CHECK-NEXT: mtfprwz f0, r3
; CHECK-NEXT: xscvhpdp f1, f0
; CHECK-NEXT: bl modff
; CHECK-NEXT: nop
-; CHECK-NEXT: lfs f1, 44(r1)
+; CHECK-NEXT: lfs f0, 44(r1)
+; CHECK-NEXT: xscvdphp f0, f0
+; CHECK-NEXT: mffprwz r3, f0
; CHECK-NEXT: addi r1, r1, 48
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
@@ -82,40 +84,53 @@ define { <2 x half>, <2 x half> } @test_modf_v2f16(<2 x half> %a) {
; CHECK-LABEL: test_modf_v2f16:
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
-; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: .cfi_def_cfa_offset 96
; CHECK-NEXT: .cfi_offset lr, 16
-; CHECK-NEXT: .cfi_offset f30, -16
+; CHECK-NEXT: .cfi_offset r30, -24
; CHECK-NEXT: .cfi_offset f31, -8
-; CHECK-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r30, -24(r1) # 8-byte Folded Spill
; CHECK-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stdu r1, -64(r1)
-; CHECK-NEXT: std r0, 80(r1)
-; CHECK-NEXT: xscvdphp f0, f2
-; CHECK-NEXT: addi r4, r1, 40
-; CHECK-NEXT: mffprwz r3, f0
-; CHECK-NEXT: clrlwi r3, r3, 16
-; CHECK-NEXT: mtfprwz f0, r3
-; CHECK-NEXT: xscvhpdp f31, f0
-; CHECK-NEXT: xscvdphp f0, f1
-; CHECK-NEXT: mffprwz r3, f0
-; CHECK-NEXT: clrlwi r3, r3, 16
+; CHECK-NEXT: stdu r1, -96(r1)
+; CHECK-NEXT: clrlwi r30, r3, 16
+; CHECK-NEXT: clrlwi r3, r4, 16
+; CHECK-NEXT: addi r4, r1, 44
; CHECK-NEXT: mtfprwz f0, r3
+; CHECK-NEXT: std r0, 112(r1)
; CHECK-NEXT: xscvhpdp f1, f0
; CHECK-NEXT: bl modff
; CHECK-NEXT: nop
-; CHECK-NEXT: addi r4, r1, 44
-; CHECK-NEXT: fmr f30, f1
-; CHECK-NEXT: fmr f1, f31
+; CHECK-NEXT: lfs f0, 44(r1)
+; CHECK-NEXT: addi r4, r1, 40
+; CHECK-NEXT: xscvdphp f0, f0
+; CHECK-NEXT: fmr f31, f1
+; CHECK-NEXT: mffprwz r3, f0
+; CHECK-NEXT: mtfprwz f0, r30
+; CHECK-NEXT: sth r3, 50(r1)
+; CHECK-NEXT: xscvhpdp f1, f0
; CHECK-NEXT: bl modff
; CHECK-NEXT: nop
-; CHECK-NEXT: lfs f3, 40(r1)
-; CHECK-NEXT: fmr f2, f1
-; CHECK-NEXT: fmr f1, f30
-; CHECK-NEXT: lfs f4, 44(r1)
-; CHECK-NEXT: addi r1, r1, 64
+; CHECK-NEXT: lfs f0, 40(r1)
+; CHECK-NEXT: li r5, 0
+; CHECK-NEXT: li r6, 2
+; CHECK-NEXT: xscvdphp f0, f0
+; CHECK-NEXT: mffprwz r3, f0
+; CHECK-NEXT: sth r3, 48(r1)
+; CHECK-NEXT: xscvdphp f0, f31
+; CHECK-NEXT: mffprwz r3, f0
+; CHECK-NEXT: sth r3, 66(r1)
+; CHECK-NEXT: xscvdphp f0, f1
+; CHECK-NEXT: lxv v2, 48(r1)
+; CHECK-NEXT: mffprwz r3, f0
+; CHECK-NEXT: sth r3, 64(r1)
+; CHECK-NEXT: lxv v3, 64(r1)
+; CHECK-NEXT: vextuhrx r3, r5, v3
+; CHECK-NEXT: vextuhrx r4, r6, v3
+; CHECK-NEXT: vextuhrx r5, r5, v2
+; CHECK-NEXT: vextuhrx r6, r6, v2
+; CHECK-NEXT: addi r1, r1, 96
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r30, -24(r1) # 8-byte Folded Reload
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
%result = call { <2 x half>, <2 x half> } @llvm.modf.v2f16(<2 x half> %a)
diff --git a/llvm/test/CodeGen/PowerPC/pr48519.ll b/llvm/test/CodeGen/PowerPC/pr48519.ll
index b610f12159ee2..61a8ebe49e6dd 100644
--- a/llvm/test/CodeGen/PowerPC/pr48519.ll
+++ b/llvm/test/CodeGen/PowerPC/pr48519.ll
@@ -12,26 +12,21 @@ define void @julia__typed_vcat_20() #0 {
; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-NEXT: stdu r1, -48(r1)
; CHECK-NEXT: li r30, 0
-; CHECK-NEXT: li r3, 1
+; CHECK-NEXT: li r4, 1
; CHECK-NEXT: std r0, 64(r1)
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_1: # %bb3
; CHECK-NEXT: #
-; CHECK-NEXT: addi r3, r3, -1
+; CHECK-NEXT: addi r3, r4, -1
; CHECK-NEXT: mtfprd f0, r3
; CHECK-NEXT: xscvsxdsp f1, f0
; CHECK-NEXT: bl __truncsfhf2
; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r3, 48
-; CHECK-NEXT: bl __extendhfsf2
-; CHECK-NEXT: nop
; CHECK-NEXT: addi r30, r30, -1
-; CHECK-NEXT: li r3, 0
+; CHECK-NEXT: li r4, 0
; CHECK-NEXT: cmpldi r30, 0
; CHECK-NEXT: bc 12, gt, .LBB0_1
; CHECK-NEXT: # %bb.2: # %bb11
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
; CHECK-NEXT: sth r3, 128(0)
;
; CHECK-P9-LABEL: julia__typed_vcat_20:
@@ -39,23 +34,18 @@ define void @julia__typed_vcat_20() #0 {
; CHECK-P9-NEXT: li r3, 0
; CHECK-P9-NEXT: mtctr r3
; CHECK-P9-NEXT: li r3, 1
-; CHECK-P9-NEXT: .p2align 4
+; CHECK-P9-NEXT: .p2align 5
; CHECK-P9-NEXT: .LBB0_1: # %bb3
; CHECK-P9-NEXT: #
; CHECK-P9-NEXT: addi r3, r3, -1
; CHECK-P9-NEXT: mtfprd f0, r3
+; CHECK-P9-NEXT: li r3, 0
; CHECK-P9-NEXT: xscvsxdsp f0, f0
; CHECK-P9-NEXT: xscvdphp f0, f0
-; CHECK-P9-NEXT: mffprwz r3, f0
-; CHECK-P9-NEXT: clrlwi r3, r3, 16
-; CHECK-P9-NEXT: mtfprwz f0, r3
-; CHECK-P9-NEXT: li r3, 0
-; CHECK-P9-NEXT: xscvhpdp f0, f0
; CHECK-P9-NEXT: bdnz .LBB0_1
; CHECK-P9-NEXT: # %bb.2: # %bb11
-; CHECK-P9-NEXT: xscvdphp f0, f0
-; CHECK-P9-NEXT: li r3, 128
-; CHECK-P9-NEXT: stxsihx f0, 0, r3
+; CHECK-P9-NEXT: mffprwz r3, f0
+; CHECK-P9-NEXT: sth r3, 128(0)
bb:
%i = load i64, ptr addrspace(11) null, align 8
%i1 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %i, i64 0)
@@ -147,54 +137,33 @@ bb9: ; preds = %bb3, %bb1
define void @func_48786() #0 {
; CHECK-LABEL: func_48786:
; CHECK: # %bb.0: # %bb
-; CHECK-NEXT: mfocrf r12, 32
-; CHECK-NEXT: stw r12, 8(r1)
-; CHECK-NEXT: mflr r0
-; CHECK-NEXT: stdu r1, -48(r1)
-; CHECK-NEXT: std r0, 64(r1)
-; CHECK-NEXT: std r30, 32(r1) # 8-byte Folded Spill
-; CHECK-NEXT: # implicit-def: $x30
; CHECK-NEXT: ld r3, 0(r3)
; CHECK-NEXT: cmpdi r3, 0
-; CHECK-NEXT: crnot 4*cr2+lt, eq
+; CHECK-NEXT: mtctr r3
+; CHECK-NEXT: crnot 4*cr5+lt, eq
; CHECK-NEXT: b .LBB2_2
-; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: .p2align 5
; CHECK-NEXT: .LBB2_1: # %bb10
; CHECK-NEXT: #
-; CHECK-NEXT: addi r30, r30, -1
-; CHECK-NEXT: cmpldi r30, 0
-; CHECK-NEXT: bc 4, gt, .LBB2_5
+; CHECK-NEXT: bdzlr
; CHECK-NEXT: .LBB2_2: # %bb2
; CHECK-NEXT: #
; CHECK-NEXT: bc 12, 4*cr5+lt, .LBB2_1
; CHECK-NEXT: # %bb.3: # %bb4
; CHECK-NEXT: #
-; CHECK-NEXT: lhz r3, 0(r3)
-; CHECK-NEXT: bl __extendhfsf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: bc 4, 4*cr2+lt, .LBB2_6
+; CHECK-NEXT: bc 4, 4*cr5+lt, .LBB2_5
; CHECK-NEXT: # %bb.4: # %bb8
; CHECK-NEXT: #
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
+; CHECK-NEXT: lhz r3, 0(r3)
; CHECK-NEXT: sth r3, 0(0)
; CHECK-NEXT: b .LBB2_1
-; CHECK-NEXT: .LBB2_5: # %bb14
-; CHECK-NEXT: ld r30, 32(r1) # 8-byte Folded Reload
-; CHECK-NEXT: addi r1, r1, 48
-; CHECK-NEXT: ld r0, 16(r1)
-; CHECK-NEXT: lwz r12, 8(r1)
-; CHECK-NEXT: mtlr r0
-; CHECK-NEXT: mtocrf 32, r12
-; CHECK-NEXT: blr
-; CHECK-NEXT: .LBB2_6: # %bb15
+; CHECK-NEXT: .LBB2_5: # %bb15
;
; CHECK-P9-LABEL: func_48786:
; CHECK-P9: # %bb.0: # %bb
; CHECK-P9-NEXT: ld r3, 0(r3)
; CHECK-P9-NEXT: cmpdi r3, 0
; CHECK-P9-NEXT: mtctr r3
-; CHECK-P9-NEXT: li r3, 0
; CHECK-P9-NEXT: crnot 4*cr5+lt, eq
; CHECK-P9-NEXT: b .LBB2_2
; CHECK-P9-NEXT: .p2align 5
@@ -206,13 +175,11 @@ define void @func_48786() #0 {
; CHECK-P9-NEXT: bc 12, 4*cr5+lt, .LBB2_1
; CHECK-P9-NEXT: # %bb.3: # %bb4
; CHECK-P9-NEXT: #
-; CHECK-P9-NEXT: lxsihzx f0, 0, r3
-; CHECK-P9-NEXT: xscvhpdp f0, f0
; CHECK-P9-NEXT: bc 4, 4*cr5+lt, .LBB2_5
; CHECK-P9-NEXT: # %bb.4: # %bb8
; CHECK-P9-NEXT: #
-; CHECK-P9-NEXT: xscvdphp f0, f0
-; CHECK-P9-NEXT: stxsihx f0, 0, r3
+; CHECK-P9-NEXT: lhz r3, 0(r3)
+; CHECK-P9-NEXT: sth r3, 0(0)
; CHECK-P9-NEXT: b .LBB2_1
; CHECK-P9-NEXT: .LBB2_5: # %bb15
bb:
@@ -260,41 +227,29 @@ bb15: ; preds = %bb5
define void @func_48785(half %arg) #0 {
; CHECK-LABEL: func_48785:
; CHECK: # %bb.0: # %bb
-; CHECK-NEXT: mflr r0
-; CHECK-NEXT: std r29, -32(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r30, -24(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stdu r1, -64(r1)
-; CHECK-NEXT: fmr f31, f1
-; CHECK-NEXT: li r3, 1
-; CHECK-NEXT: li r29, 0
-; CHECK-NEXT: std r0, 80(r1)
-; CHECK-NEXT: rldic r30, r3, 62, 1
-; CHECK-NEXT: .p2align 5
+; CHECK-NEXT: li r4, 1
+; CHECK-NEXT: rldic r4, r4, 62, 1
+; CHECK-NEXT: mtctr r4
+; CHECK-NEXT: li r4, 0
+; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB3_1: # %bb1
; CHECK-NEXT: #
-; CHECK-NEXT: fmr f1, f31
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: addi r30, r30, -1
-; CHECK-NEXT: sth r3, 0(r29)
-; CHECK-NEXT: addi r29, r29, 24
-; CHECK-NEXT: cmpldi r30, 0
-; CHECK-NEXT: bc 12, gt, .LBB3_1
+; CHECK-NEXT: sth r3, 0(r4)
+; CHECK-NEXT: addi r4, r4, 24
+; CHECK-NEXT: bdnz .LBB3_1
; CHECK-NEXT: # %bb.2: # %bb5
;
; CHECK-P9-LABEL: func_48785:
; CHECK-P9: # %bb.0: # %bb
-; CHECK-P9-NEXT: li r3, 1
-; CHECK-P9-NEXT: rldic r3, r3, 62, 1
-; CHECK-P9-NEXT: mtctr r3
-; CHECK-P9-NEXT: li r3, 0
+; CHECK-P9-NEXT: li r4, 1
+; CHECK-P9-NEXT: rldic r4, r4, 62, 1
+; CHECK-P9-NEXT: mtctr r4
+; CHECK-P9-NEXT: li r4, 0
; CHECK-P9-NEXT: .p2align 4
; CHECK-P9-NEXT: .LBB3_1: # %bb1
; CHECK-P9-NEXT: #
-; CHECK-P9-NEXT: xscvdphp f0, f1
-; CHECK-P9-NEXT: stxsihx f0, 0, r3
-; CHECK-P9-NEXT: addi r3, r3, 24
+; CHECK-P9-NEXT: sth r3, 0(r4)
+; CHECK-P9-NEXT: addi r4, r4, 24
; CHECK-P9-NEXT: bdnz .LBB3_1
; CHECK-P9-NEXT: # %bb.2: # %bb5
bb:
diff --git a/llvm/test/CodeGen/PowerPC/pr49092.ll b/llvm/test/CodeGen/PowerPC/pr49092.ll
index 7b524a6d2f69b..3c028e9005ee6 100644
--- a/llvm/test/CodeGen/PowerPC/pr49092.ll
+++ b/llvm/test/CodeGen/PowerPC/pr49092.ll
@@ -8,26 +8,14 @@
define dso_local half @test2(i64 %a, i64 %b) local_unnamed_addr #0 {
; CHECK-LABEL: test2:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: mflr r0
-; CHECK-NEXT: stdu r1, -32(r1)
; CHECK-NEXT: add r3, r4, r3
-; CHECK-NEXT: std r0, 48(r1)
; CHECK-NEXT: addi r3, r3, 11
-; CHECK-NEXT: clrlwi r3, r3, 16
-; CHECK-NEXT: bl __extendhfsf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: addi r1, r1, 32
-; CHECK-NEXT: ld r0, 16(r1)
-; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
;
; CHECK-P9-LABEL: test2:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: add r3, r4, r3
; CHECK-P9-NEXT: addi r3, r3, 11
-; CHECK-P9-NEXT: clrlwi r3, r3, 16
-; CHECK-P9-NEXT: mtfprwz f0, r3
-; CHECK-P9-NEXT: xscvhpdp f1, f0
; CHECK-P9-NEXT: blr
entry:
%add = add i64 %b, %a
diff --git a/llvm/test/CodeGen/PowerPC/vector-llrint.ll b/llvm/test/CodeGen/PowerPC/vector-llrint.ll
index 8a9e48e002381..fef9c039c043e 100644
--- a/llvm/test/CodeGen/PowerPC/vector-llrint.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-llrint.ll
@@ -18,10 +18,8 @@ define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) nounwind {
; BE: # %bb.0:
; BE-NEXT: mflr r0
; BE-NEXT: stdu r1, -112(r1)
-; BE-NEXT: std r0, 128(r1)
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
; BE-NEXT: clrldi r3, r3, 48
+; BE-NEXT: std r0, 128(r1)
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
; BE-NEXT: bl llrintf
@@ -35,10 +33,8 @@ define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -32(r1)
-; CHECK-NEXT: std r0, 48(r1)
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
; CHECK-NEXT: clrldi r3, r3, 48
+; CHECK-NEXT: std r0, 48(r1)
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
; CHECK-NEXT: bl llrintf
@@ -52,10 +48,8 @@ define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) nounwind {
; FAST: # %bb.0:
; FAST-NEXT: mflr r0
; FAST-NEXT: stdu r1, -32(r1)
-; FAST-NEXT: std r0, 48(r1)
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: std r0, 48(r1)
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
; FAST-NEXT: fctid f0, f1
@@ -73,37 +67,26 @@ define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) nounwind {
; BE-LABEL: llrint_v1i64_v2f16:
; BE: # %bb.0:
; BE-NEXT: mflr r0
-; BE-NEXT: stdu r1, -160(r1)
-; BE-NEXT: std r0, 176(r1)
-; BE-NEXT: stfd f31, 152(r1) # 8-byte Folded Spill
-; BE-NEXT: fmr f31, f1
-; BE-NEXT: fmr f1, f2
-; BE-NEXT: std r30, 136(r1) # 8-byte Folded Spill
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f31
+; BE-NEXT: stdu r1, -144(r1)
+; BE-NEXT: std r0, 160(r1)
+; BE-NEXT: std r30, 128(r1) # 8-byte Folded Spill
; BE-NEXT: mr r30, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r3, 48
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r30, 48
-; BE-NEXT: fmr f31, f1
+; BE-NEXT: clrldi r3, r4, 48
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
; BE-NEXT: bl llrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f31
; BE-NEXT: std r3, 120(r1)
+; BE-NEXT: clrldi r3, r30, 48
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
; BE-NEXT: bl llrintf
; BE-NEXT: nop
; BE-NEXT: std r3, 112(r1)
; BE-NEXT: addi r3, r1, 112
-; BE-NEXT: ld r30, 136(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f31, 152(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r30, 128(r1) # 8-byte Folded Reload
; BE-NEXT: lxvd2x v2, 0, r3
-; BE-NEXT: addi r1, r1, 160
+; BE-NEXT: addi r1, r1, 144
; BE-NEXT: ld r0, 16(r1)
; BE-NEXT: mtlr r0
; BE-NEXT: blr
@@ -112,35 +95,28 @@ define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -96(r1)
-; CHECK-NEXT: li r3, 48
-; CHECK-NEXT: std r0, 112(r1)
-; CHECK-NEXT: std r30, 72(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stfd f31, 88(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f31, f2
-; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f31
-; CHECK-NEXT: mr r30, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
+; CHECK-NEXT: li r5, 48
; CHECK-NEXT: clrldi r3, r3, 48
+; CHECK-NEXT: std r0, 112(r1)
+; CHECK-NEXT: std r29, 72(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r30, 80(r1) # 8-byte Folded Spill
+; CHECK-NEXT: mr r30, r4
+; CHECK-NEXT: stxvd2x v31, r1, r5 # 16-byte Folded Spill
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
+; CHECK-NEXT: bl llrintf
+; CHECK-NEXT: nop
+; CHECK-NEXT: mr r29, r3
; CHECK-NEXT: clrldi r3, r30, 48
-; CHECK-NEXT: fmr f31, f1
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: bl llrintf
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f31
-; CHECK-NEXT: mtvsrd v31, r3
+; CHECK-NEXT: mtvsrd v31, r29
; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
; CHECK-NEXT: mtfprd f0, r3
; CHECK-NEXT: li r3, 48
-; CHECK-NEXT: lfd f31, 88(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r30, 72(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r30, 80(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r29, 72(r1) # 8-byte Folded Reload
; CHECK-NEXT: xxmrghd v2, vs0, v31
; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: addi r1, r1, 96
@@ -151,35 +127,30 @@ define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) nounwind {
; FAST-LABEL: llrint_v1i64_v2f16:
; FAST: # %bb.0:
; FAST-NEXT: mflr r0
-; FAST-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
-; FAST-NEXT: stdu r1, -48(r1)
-; FAST-NEXT: fmr f31, f1
-; FAST-NEXT: fmr f1, f2
-; FAST-NEXT: std r0, 64(r1)
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
+; FAST-NEXT: stdu r1, -80(r1)
+; FAST-NEXT: li r5, 48
; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: std r0, 96(r1)
+; FAST-NEXT: std r30, 64(r1) # 8-byte Folded Spill
+; FAST-NEXT: mr r30, r4
+; FAST-NEXT: stxvd2x v31, r1, r5 # 16-byte Folded Spill
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f30, f1
-; FAST-NEXT: fmr f1, f31
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v31, r3
+; FAST-NEXT: clrldi r3, r30, 48
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
; FAST-NEXT: fctid f0, f1
-; FAST-NEXT: fctid f1, f30
+; FAST-NEXT: ld r30, 64(r1) # 8-byte Folded Reload
; FAST-NEXT: mffprd r3, f0
; FAST-NEXT: mtfprd f0, r3
-; FAST-NEXT: mffprd r3, f1
-; FAST-NEXT: mtfprd f1, r3
-; FAST-NEXT: xxmrghd v2, vs1, vs0
-; FAST-NEXT: addi r1, r1, 48
+; FAST-NEXT: li r3, 48
+; FAST-NEXT: xxmrghd v2, vs0, v31
+; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: addi r1, r1, 80
; FAST-NEXT: ld r0, 16(r1)
-; FAST-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload
; FAST-NEXT: mtlr r0
; FAST-NEXT: blr
%a = call <2 x i64> @llvm.llrint.v2i64.v2f16(<2 x half> %x)
@@ -191,73 +162,46 @@ define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) nounwind {
; BE-LABEL: llrint_v4i64_v4f16:
; BE: # %bb.0:
; BE-NEXT: mflr r0
-; BE-NEXT: stdu r1, -208(r1)
-; BE-NEXT: std r0, 224(r1)
-; BE-NEXT: stfd f29, 184(r1) # 8-byte Folded Spill
-; BE-NEXT: fmr f29, f1
-; BE-NEXT: fmr f1, f2
-; BE-NEXT: std r28, 152(r1) # 8-byte Folded Spill
-; BE-NEXT: std r29, 160(r1) # 8-byte Folded Spill
-; BE-NEXT: std r30, 168(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f30, 192(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f31, 200(r1) # 8-byte Folded Spill
-; BE-NEXT: fmr f31, f4
-; BE-NEXT: fmr f30, f3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f29
-; BE-NEXT: mr r30, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f31
-; BE-NEXT: mr r29, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f30
+; BE-NEXT: stdu r1, -176(r1)
+; BE-NEXT: std r0, 192(r1)
+; BE-NEXT: std r28, 144(r1) # 8-byte Folded Spill
; BE-NEXT: mr r28, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r3, 48
+; BE-NEXT: clrldi r3, r4, 48
+; BE-NEXT: std r29, 152(r1) # 8-byte Folded Spill
+; BE-NEXT: std r30, 160(r1) # 8-byte Folded Spill
+; BE-NEXT: mr r30, r6
+; BE-NEXT: mr r29, r5
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
+; BE-NEXT: bl llrintf
+; BE-NEXT: nop
+; BE-NEXT: std r3, 120(r1)
; BE-NEXT: clrldi r3, r28, 48
-; BE-NEXT: fmr f31, f1
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r29, 48
-; BE-NEXT: fmr f30, f1
-; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: bl llrintf
; BE-NEXT: nop
+; BE-NEXT: std r3, 112(r1)
; BE-NEXT: clrldi r3, r30, 48
-; BE-NEXT: fmr f29, f1
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
; BE-NEXT: bl llrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f29
-; BE-NEXT: std r3, 120(r1)
-; BE-NEXT: bl llrintf
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f30
-; BE-NEXT: std r3, 112(r1)
-; BE-NEXT: bl llrintf
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f31
; BE-NEXT: std r3, 136(r1)
+; BE-NEXT: clrldi r3, r29, 48
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
; BE-NEXT: bl llrintf
; BE-NEXT: nop
; BE-NEXT: std r3, 128(r1)
; BE-NEXT: addi r3, r1, 112
-; BE-NEXT: ld r30, 168(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f31, 200(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f30, 192(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f29, 184(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r30, 160(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r29, 152(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r28, 144(r1) # 8-byte Folded Reload
; BE-NEXT: lxvd2x v2, 0, r3
; BE-NEXT: addi r3, r1, 128
-; BE-NEXT: ld r29, 160(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r28, 152(r1) # 8-byte Folded Reload
; BE-NEXT: lxvd2x v3, 0, r3
-; BE-NEXT: addi r1, r1, 208
+; BE-NEXT: addi r1, r1, 176
; BE-NEXT: ld r0, 16(r1)
; BE-NEXT: mtlr r0
; BE-NEXT: blr
@@ -265,79 +209,57 @@ define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) nounwind {
; CHECK-LABEL: llrint_v4i64_v4f16:
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
-; CHECK-NEXT: stdu r1, -144(r1)
-; CHECK-NEXT: li r3, 48
-; CHECK-NEXT: std r0, 160(r1)
-; CHECK-NEXT: std r28, 88(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r29, 96(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r30, 104(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stfd f29, 120(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f29, f2
-; CHECK-NEXT: stfd f30, 128(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f30, f3
-; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT: li r3, 64
-; CHECK-NEXT: stfd f31, 136(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f31, f4
-; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f29
-; CHECK-NEXT: mr r30, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f30
-; CHECK-NEXT: mr r29, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f31
-; CHECK-NEXT: mr r28, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
+; CHECK-NEXT: stdu r1, -128(r1)
+; CHECK-NEXT: li r7, 48
+; CHECK-NEXT: std r0, 144(r1)
; CHECK-NEXT: clrldi r3, r3, 48
+; CHECK-NEXT: std r27, 88(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r28, 96(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r29, 104(r1) # 8-byte Folded Spill
+; CHECK-NEXT: mr r29, r5
+; CHECK-NEXT: mr r28, r4
+; CHECK-NEXT: stxvd2x v30, r1, r7 # 16-byte Folded Spill
+; CHECK-NEXT: li r7, 64
+; CHECK-NEXT: std r30, 112(r1) # 8-byte Folded Spill
+; CHECK-NEXT: mr r30, r6
+; CHECK-NEXT: stxvd2x v31, r1, r7 # 16-byte Folded Spill
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r28, 48
-; CHECK-NEXT: fmr f31, f1
-; CHECK-NEXT: bl __extendhfsf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r29, 48
-; CHECK-NEXT: fmr f30, f1
-; CHECK-NEXT: bl __extendhfsf2
+; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r30, 48
-; CHECK-NEXT: fmr f29, f1
+; CHECK-NEXT: mr r27, r3
+; CHECK-NEXT: clrldi r3, r28, 48
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
+; CHECK-NEXT: mtvsrd v31, r27
; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f29
-; CHECK-NEXT: mtvsrd v31, r3
-; CHECK-NEXT: bl llrintf
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f30
; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: clrldi r3, r29, 48
; CHECK-NEXT: xxmrghd v31, vs0, v31
+; CHECK-NEXT: bl __extendhfsf2
+; CHECK-NEXT: nop
; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f31
-; CHECK-NEXT: mtvsrd v30, r3
+; CHECK-NEXT: mr r29, r3
+; CHECK-NEXT: clrldi r3, r30, 48
+; CHECK-NEXT: bl __extendhfsf2
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtvsrd v30, r29
; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
; CHECK-NEXT: mtfprd f0, r3
; CHECK-NEXT: li r3, 64
; CHECK-NEXT: vmr v2, v31
-; CHECK-NEXT: lfd f31, 136(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f30, 128(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f29, 120(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r30, 104(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r29, 96(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r30, 112(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r29, 104(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r28, 96(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r27, 88(r1) # 8-byte Folded Reload
; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 48
-; CHECK-NEXT: ld r28, 88(r1) # 8-byte Folded Reload
; CHECK-NEXT: xxmrghd v3, vs0, v30
; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT: addi r1, r1, 144
+; CHECK-NEXT: addi r1, r1, 128
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
@@ -345,63 +267,55 @@ define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) nounwind {
; FAST-LABEL: llrint_v4i64_v4f16:
; FAST: # %bb.0:
; FAST-NEXT: mflr r0
-; FAST-NEXT: stfd f28, -32(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f29, -24(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
-; FAST-NEXT: stdu r1, -64(r1)
-; FAST-NEXT: fmr f29, f1
-; FAST-NEXT: fmr f1, f4
-; FAST-NEXT: std r0, 80(r1)
-; FAST-NEXT: fmr f31, f3
-; FAST-NEXT: fmr f30, f2
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
+; FAST-NEXT: stdu r1, -112(r1)
+; FAST-NEXT: li r7, 48
+; FAST-NEXT: std r0, 128(r1)
; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: std r28, 80(r1) # 8-byte Folded Spill
+; FAST-NEXT: std r29, 88(r1) # 8-byte Folded Spill
+; FAST-NEXT: std r30, 96(r1) # 8-byte Folded Spill
+; FAST-NEXT: mr r30, r6
+; FAST-NEXT: mr r29, r5
+; FAST-NEXT: stxvd2x v30, r1, r7 # 16-byte Folded Spill
+; FAST-NEXT: li r7, 64
+; FAST-NEXT: mr r28, r4
+; FAST-NEXT: stxvd2x v31, r1, r7 # 16-byte Folded Spill
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f28, f1
-; FAST-NEXT: fmr f1, f31
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v31, r3
+; FAST-NEXT: clrldi r3, r28, 48
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f31, f1
-; FAST-NEXT: fmr f1, f30
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: clrldi r3, r29, 48
+; FAST-NEXT: xxmrghd v31, vs0, v31
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f30, f1
-; FAST-NEXT: fmr f1, f29
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v30, r3
+; FAST-NEXT: clrldi r3, r30, 48
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fctid f0, f30
-; FAST-NEXT: fctid f2, f31
-; FAST-NEXT: mffprd r3, f0
-; FAST-NEXT: fctid f1, f1
-; FAST-NEXT: mtfprd f0, r3
-; FAST-NEXT: mffprd r3, f2
-; FAST-NEXT: mtfprd f2, r3
-; FAST-NEXT: mffprd r3, f1
-; FAST-NEXT: mtfprd f1, r3
-; FAST-NEXT: xxmrghd v2, vs0, vs1
-; FAST-NEXT: fctid f0, f28
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: vmr v2, v31
+; FAST-NEXT: ld r30, 96(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r29, 88(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r28, 80(r1) # 8-byte Folded Reload
; FAST-NEXT: mffprd r3, f0
; FAST-NEXT: mtfprd f0, r3
-; FAST-NEXT: xxmrghd v3, vs0, vs2
-; FAST-NEXT: addi r1, r1, 64
+; FAST-NEXT: li r3, 64
+; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 48
+; FAST-NEXT: xxmrghd v3, vs0, v30
+; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: addi r1, r1, 112
; FAST-NEXT: ld r0, 16(r1)
-; FAST-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload
; FAST-NEXT: mtlr r0
-; FAST-NEXT: lfd f29, -24(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f28, -32(r1) # 8-byte Folded Reload
; FAST-NEXT: blr
%a = call <4 x i64> @llvm.llrint.v4i64.v4f16(<4 x half> %x)
ret <4 x i64> %a
@@ -412,145 +326,86 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) nounwind {
; BE-LABEL: llrint_v8i64_v8f16:
; BE: # %bb.0:
; BE-NEXT: mflr r0
-; BE-NEXT: stdu r1, -304(r1)
-; BE-NEXT: std r0, 320(r1)
-; BE-NEXT: stfd f25, 248(r1) # 8-byte Folded Spill
-; BE-NEXT: fmr f25, f1
-; BE-NEXT: fmr f1, f2
-; BE-NEXT: std r24, 184(r1) # 8-byte Folded Spill
-; BE-NEXT: std r25, 192(r1) # 8-byte Folded Spill
-; BE-NEXT: std r26, 200(r1) # 8-byte Folded Spill
-; BE-NEXT: std r27, 208(r1) # 8-byte Folded Spill
-; BE-NEXT: std r28, 216(r1) # 8-byte Folded Spill
-; BE-NEXT: std r29, 224(r1) # 8-byte Folded Spill
-; BE-NEXT: std r30, 232(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f26, 256(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f27, 264(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f28, 272(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f29, 280(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f30, 288(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f31, 296(r1) # 8-byte Folded Spill
-; BE-NEXT: fmr f31, f8
-; BE-NEXT: fmr f30, f7
-; BE-NEXT: fmr f29, f6
-; BE-NEXT: fmr f28, f5
-; BE-NEXT: fmr f27, f4
-; BE-NEXT: fmr f26, f3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f25
-; BE-NEXT: mr r30, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f27
-; BE-NEXT: mr r29, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f26
-; BE-NEXT: mr r28, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f29
-; BE-NEXT: mr r27, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f28
-; BE-NEXT: mr r26, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f31
-; BE-NEXT: mr r25, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f30
+; BE-NEXT: stdu r1, -240(r1)
+; BE-NEXT: std r0, 256(r1)
+; BE-NEXT: std r24, 176(r1) # 8-byte Folded Spill
; BE-NEXT: mr r24, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r3, 48
+; BE-NEXT: clrldi r3, r4, 48
+; BE-NEXT: std r25, 184(r1) # 8-byte Folded Spill
+; BE-NEXT: std r26, 192(r1) # 8-byte Folded Spill
+; BE-NEXT: std r27, 200(r1) # 8-byte Folded Spill
+; BE-NEXT: std r28, 208(r1) # 8-byte Folded Spill
+; BE-NEXT: std r29, 216(r1) # 8-byte Folded Spill
+; BE-NEXT: std r30, 224(r1) # 8-byte Folded Spill
+; BE-NEXT: mr r29, r10
+; BE-NEXT: mr r30, r9
+; BE-NEXT: mr r27, r8
+; BE-NEXT: mr r28, r7
+; BE-NEXT: mr r26, r6
+; BE-NEXT: mr r25, r5
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
+; BE-NEXT: bl llrintf
+; BE-NEXT: nop
+; BE-NEXT: std r3, 120(r1)
; BE-NEXT: clrldi r3, r24, 48
-; BE-NEXT: fmr f31, f1
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r25, 48
-; BE-NEXT: fmr f30, f1
-; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: bl llrintf
; BE-NEXT: nop
+; BE-NEXT: std r3, 112(r1)
; BE-NEXT: clrldi r3, r26, 48
-; BE-NEXT: fmr f29, f1
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r27, 48
-; BE-NEXT: fmr f28, f1
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r28, 48
-; BE-NEXT: fmr f27, f1
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r29, 48
-; BE-NEXT: fmr f26, f1
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r30, 48
-; BE-NEXT: fmr f25, f1
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: bl llrintf
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f25
-; BE-NEXT: std r3, 120(r1)
-; BE-NEXT: bl llrintf
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f26
-; BE-NEXT: std r3, 112(r1)
; BE-NEXT: bl llrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f27
; BE-NEXT: std r3, 136(r1)
+; BE-NEXT: clrldi r3, r25, 48
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
; BE-NEXT: bl llrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f28
; BE-NEXT: std r3, 128(r1)
+; BE-NEXT: clrldi r3, r27, 48
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
; BE-NEXT: bl llrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f29
; BE-NEXT: std r3, 152(r1)
+; BE-NEXT: clrldi r3, r28, 48
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
; BE-NEXT: bl llrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f30
; BE-NEXT: std r3, 144(r1)
+; BE-NEXT: clrldi r3, r29, 48
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
; BE-NEXT: bl llrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f31
; BE-NEXT: std r3, 168(r1)
+; BE-NEXT: clrldi r3, r30, 48
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
; BE-NEXT: bl llrintf
; BE-NEXT: nop
; BE-NEXT: std r3, 160(r1)
; BE-NEXT: addi r3, r1, 112
-; BE-NEXT: ld r30, 232(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f31, 296(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f30, 288(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f29, 280(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r30, 224(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r29, 216(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r28, 208(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r27, 200(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r26, 192(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r25, 184(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r24, 176(r1) # 8-byte Folded Reload
; BE-NEXT: lxvd2x v2, 0, r3
; BE-NEXT: addi r3, r1, 128
-; BE-NEXT: lfd f28, 272(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f27, 264(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f26, 256(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r29, 224(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r28, 216(r1) # 8-byte Folded Reload
; BE-NEXT: lxvd2x v3, 0, r3
; BE-NEXT: addi r3, r1, 144
-; BE-NEXT: lfd f25, 248(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r27, 208(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r26, 200(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r25, 192(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r24, 184(r1) # 8-byte Folded Reload
; BE-NEXT: lxvd2x v4, 0, r3
; BE-NEXT: addi r3, r1, 160
; BE-NEXT: lxvd2x v5, 0, r3
-; BE-NEXT: addi r1, r1, 304
+; BE-NEXT: addi r1, r1, 240
; BE-NEXT: ld r0, 16(r1)
; BE-NEXT: mtlr r0
; BE-NEXT: blr
@@ -558,159 +413,107 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) nounwind {
; CHECK-LABEL: llrint_v8i64_v8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
-; CHECK-NEXT: stdu r1, -240(r1)
-; CHECK-NEXT: li r3, 48
-; CHECK-NEXT: std r0, 256(r1)
-; CHECK-NEXT: std r24, 120(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r25, 128(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r26, 136(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r27, 144(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r28, 152(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r29, 160(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT: li r3, 64
-; CHECK-NEXT: std r30, 168(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stfd f25, 184(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f25, f2
-; CHECK-NEXT: stfd f26, 192(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f26, f3
-; CHECK-NEXT: stfd f27, 200(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f27, f4
-; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT: li r3, 80
-; CHECK-NEXT: stfd f28, 208(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f28, f5
-; CHECK-NEXT: stfd f29, 216(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f29, f6
-; CHECK-NEXT: stfd f30, 224(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f30, f7
-; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT: li r3, 96
-; CHECK-NEXT: stfd f31, 232(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f31, f8
-; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f25
-; CHECK-NEXT: mr r30, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f26
-; CHECK-NEXT: mr r29, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f27
-; CHECK-NEXT: mr r28, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f28
-; CHECK-NEXT: mr r27, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f29
-; CHECK-NEXT: mr r26, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f30
-; CHECK-NEXT: mr r25, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f31
-; CHECK-NEXT: mr r24, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
+; CHECK-NEXT: stdu r1, -192(r1)
+; CHECK-NEXT: li r11, 48
+; CHECK-NEXT: std r0, 208(r1)
; CHECK-NEXT: clrldi r3, r3, 48
+; CHECK-NEXT: std r23, 120(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r24, 128(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r25, 136(r1) # 8-byte Folded Spill
+; CHECK-NEXT: mr r25, r5
+; CHECK-NEXT: mr r24, r4
+; CHECK-NEXT: stxvd2x v28, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT: li r11, 64
+; CHECK-NEXT: std r26, 144(r1) # 8-byte Folded Spill
+; CHECK-NEXT: mr r26, r6
+; CHECK-NEXT: std r27, 152(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r28, 160(r1) # 8-byte Folded Spill
+; CHECK-NEXT: mr r28, r8
+; CHECK-NEXT: mr r27, r7
+; CHECK-NEXT: stxvd2x v29, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT: li r11, 80
+; CHECK-NEXT: std r29, 168(r1) # 8-byte Folded Spill
+; CHECK-NEXT: mr r29, r9
+; CHECK-NEXT: std r30, 176(r1) # 8-byte Folded Spill
+; CHECK-NEXT: mr r30, r10
+; CHECK-NEXT: stxvd2x v30, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT: li r11, 96
+; CHECK-NEXT: stxvd2x v31, r1, r11 # 16-byte Folded Spill
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r24, 48
-; CHECK-NEXT: fmr f31, f1
-; CHECK-NEXT: bl __extendhfsf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r25, 48
-; CHECK-NEXT: fmr f30, f1
-; CHECK-NEXT: bl __extendhfsf2
+; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r26, 48
-; CHECK-NEXT: fmr f29, f1
+; CHECK-NEXT: mr r23, r3
+; CHECK-NEXT: clrldi r3, r24, 48
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r27, 48
-; CHECK-NEXT: fmr f28, f1
-; CHECK-NEXT: bl __extendhfsf2
+; CHECK-NEXT: mtvsrd v31, r23
+; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r28, 48
-; CHECK-NEXT: fmr f27, f1
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: clrldi r3, r25, 48
+; CHECK-NEXT: xxmrghd v31, vs0, v31
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r29, 48
-; CHECK-NEXT: fmr f26, f1
-; CHECK-NEXT: bl __extendhfsf2
+; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r30, 48
-; CHECK-NEXT: fmr f25, f1
+; CHECK-NEXT: mr r25, r3
+; CHECK-NEXT: clrldi r3, r26, 48
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
+; CHECK-NEXT: mtvsrd v30, r25
; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f25
-; CHECK-NEXT: mtvsrd v31, r3
-; CHECK-NEXT: bl llrintf
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f26
; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: xxmrghd v31, vs0, v31
-; CHECK-NEXT: bl llrintf
+; CHECK-NEXT: clrldi r3, r27, 48
+; CHECK-NEXT: xxmrghd v30, vs0, v30
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f27
-; CHECK-NEXT: mtvsrd v30, r3
; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f28
-; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: xxmrghd v30, vs0, v30
-; CHECK-NEXT: bl llrintf
+; CHECK-NEXT: mr r27, r3
+; CHECK-NEXT: clrldi r3, r28, 48
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f29
-; CHECK-NEXT: mtvsrd v29, r3
+; CHECK-NEXT: mtvsrd v29, r27
; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f30
; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: clrldi r3, r29, 48
; CHECK-NEXT: xxmrghd v29, vs0, v29
+; CHECK-NEXT: bl __extendhfsf2
+; CHECK-NEXT: nop
; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f31
-; CHECK-NEXT: mtvsrd v28, r3
+; CHECK-NEXT: mr r29, r3
+; CHECK-NEXT: clrldi r3, r30, 48
+; CHECK-NEXT: bl __extendhfsf2
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtvsrd v28, r29
; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
; CHECK-NEXT: mtfprd f0, r3
; CHECK-NEXT: li r3, 96
; CHECK-NEXT: vmr v2, v31
-; CHECK-NEXT: lfd f31, 232(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r30, 176(r1) # 8-byte Folded Reload
; CHECK-NEXT: vmr v3, v30
; CHECK-NEXT: vmr v4, v29
-; CHECK-NEXT: lfd f30, 224(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f29, 216(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r29, 168(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r28, 160(r1) # 8-byte Folded Reload
; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 80
-; CHECK-NEXT: lfd f28, 208(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f27, 200(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f26, 192(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f25, 184(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r30, 168(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r29, 160(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r27, 152(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r26, 144(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r25, 136(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r24, 128(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r23, 120(r1) # 8-byte Folded Reload
; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 64
-; CHECK-NEXT: ld r28, 152(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r27, 144(r1) # 8-byte Folded Reload
-; CHECK-NEXT: xxmrghd v5, vs0, v28
-; CHECK-NEXT: ld r26, 136(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r25, 128(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r24, 120(r1) # 8-byte Folded Reload
; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 48
+; CHECK-NEXT: xxmrghd v5, vs0, v28
; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT: addi r1, r1, 240
+; CHECK-NEXT: addi r1, r1, 192
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
@@ -718,117 +521,103 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) nounwind {
; FAST-LABEL: llrint_v8i64_v8f16:
; FAST: # %bb.0:
; FAST-NEXT: mflr r0
-; FAST-NEXT: stfd f24, -64(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f25, -56(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f26, -48(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f27, -40(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f28, -32(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f29, -24(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
-; FAST-NEXT: stdu r1, -96(r1)
-; FAST-NEXT: fmr f24, f1
-; FAST-NEXT: fmr f1, f8
-; FAST-NEXT: std r0, 112(r1)
-; FAST-NEXT: fmr f30, f7
-; FAST-NEXT: fmr f29, f6
-; FAST-NEXT: fmr f28, f5
-; FAST-NEXT: fmr f27, f4
-; FAST-NEXT: fmr f26, f3
-; FAST-NEXT: fmr f25, f2
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
+; FAST-NEXT: stdu r1, -176(r1)
+; FAST-NEXT: li r11, 48
+; FAST-NEXT: std r0, 192(r1)
; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: std r24, 112(r1) # 8-byte Folded Spill
+; FAST-NEXT: std r25, 120(r1) # 8-byte Folded Spill
+; FAST-NEXT: std r26, 128(r1) # 8-byte Folded Spill
+; FAST-NEXT: mr r26, r6
+; FAST-NEXT: mr r25, r5
+; FAST-NEXT: stxvd2x v28, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT: li r11, 64
+; FAST-NEXT: std r27, 136(r1) # 8-byte Folded Spill
+; FAST-NEXT: mr r27, r7
+; FAST-NEXT: std r28, 144(r1) # 8-byte Folded Spill
+; FAST-NEXT: std r29, 152(r1) # 8-byte Folded Spill
+; FAST-NEXT: mr r29, r9
+; FAST-NEXT: mr r28, r8
+; FAST-NEXT: stxvd2x v29, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT: li r11, 80
+; FAST-NEXT: std r30, 160(r1) # 8-byte Folded Spill
+; FAST-NEXT: mr r30, r10
+; FAST-NEXT: mr r24, r4
+; FAST-NEXT: stxvd2x v30, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT: li r11, 96
+; FAST-NEXT: stxvd2x v31, r1, r11 # 16-byte Folded Spill
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f31, f1
-; FAST-NEXT: fmr f1, f30
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v31, r3
+; FAST-NEXT: clrldi r3, r24, 48
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f30, f1
-; FAST-NEXT: fmr f1, f29
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: clrldi r3, r25, 48
+; FAST-NEXT: xxmrghd v31, vs0, v31
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f29, f1
-; FAST-NEXT: fmr f1, f28
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v30, r3
+; FAST-NEXT: clrldi r3, r26, 48
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f28, f1
-; FAST-NEXT: fmr f1, f27
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: clrldi r3, r27, 48
+; FAST-NEXT: xxmrghd v30, vs0, v30
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f27, f1
-; FAST-NEXT: fmr f1, f26
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v29, r3
+; FAST-NEXT: clrldi r3, r28, 48
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f26, f1
-; FAST-NEXT: fmr f1, f25
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: clrldi r3, r29, 48
+; FAST-NEXT: xxmrghd v29, vs0, v29
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f25, f1
-; FAST-NEXT: fmr f1, f24
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v28, r3
+; FAST-NEXT: clrldi r3, r30, 48
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fctid f0, f25
-; FAST-NEXT: fctid f2, f26
-; FAST-NEXT: mffprd r3, f0
-; FAST-NEXT: fctid f3, f27
-; FAST-NEXT: fctid f4, f28
-; FAST-NEXT: fctid f5, f29
-; FAST-NEXT: fctid f6, f30
-; FAST-NEXT: fctid f1, f1
-; FAST-NEXT: mtfprd f0, r3
-; FAST-NEXT: mffprd r3, f2
-; FAST-NEXT: mtfprd f2, r3
-; FAST-NEXT: mffprd r3, f3
-; FAST-NEXT: mtfprd f3, r3
-; FAST-NEXT: mffprd r3, f4
-; FAST-NEXT: mtfprd f4, r3
-; FAST-NEXT: mffprd r3, f5
-; FAST-NEXT: mtfprd f5, r3
-; FAST-NEXT: mffprd r3, f6
-; FAST-NEXT: mtfprd f6, r3
-; FAST-NEXT: mffprd r3, f1
-; FAST-NEXT: mtfprd f1, r3
-; FAST-NEXT: xxmrghd v3, vs3, vs2
-; FAST-NEXT: xxmrghd v4, vs5, vs4
-; FAST-NEXT: xxmrghd v2, vs0, vs1
-; FAST-NEXT: fctid f0, f31
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: vmr v2, v31
+; FAST-NEXT: ld r30, 160(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r29, 152(r1) # 8-byte Folded Reload
+; FAST-NEXT: vmr v3, v30
+; FAST-NEXT: vmr v4, v29
+; FAST-NEXT: ld r28, 144(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r27, 136(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r26, 128(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r25, 120(r1) # 8-byte Folded Reload
; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: ld r24, 112(r1) # 8-byte Folded Reload
; FAST-NEXT: mtfprd f0, r3
-; FAST-NEXT: xxmrghd v5, vs0, vs6
-; FAST-NEXT: addi r1, r1, 96
+; FAST-NEXT: li r3, 96
+; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 80
+; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 64
+; FAST-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 48
+; FAST-NEXT: xxmrghd v5, vs0, v28
+; FAST-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: addi r1, r1, 176
; FAST-NEXT: ld r0, 16(r1)
-; FAST-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload
; FAST-NEXT: mtlr r0
-; FAST-NEXT: lfd f29, -24(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f28, -32(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f27, -40(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f26, -48(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f25, -56(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f24, -64(r1) # 8-byte Folded Reload
; FAST-NEXT: blr
%a = call <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half> %x)
ret <8 x i64> %a
@@ -839,286 +628,166 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) nounwind {
; BE-LABEL: llrint_v16i64_v16f16:
; BE: # %bb.0:
; BE-NEXT: mflr r0
-; BE-NEXT: stdu r1, -496(r1)
-; BE-NEXT: std r0, 512(r1)
-; BE-NEXT: stfd f20, 400(r1) # 8-byte Folded Spill
-; BE-NEXT: fmr f20, f1
-; BE-NEXT: fmr f1, f2
-; BE-NEXT: std r16, 248(r1) # 8-byte Folded Spill
-; BE-NEXT: std r17, 256(r1) # 8-byte Folded Spill
-; BE-NEXT: std r18, 264(r1) # 8-byte Folded Spill
-; BE-NEXT: std r19, 272(r1) # 8-byte Folded Spill
-; BE-NEXT: std r20, 280(r1) # 8-byte Folded Spill
-; BE-NEXT: std r21, 288(r1) # 8-byte Folded Spill
-; BE-NEXT: std r22, 296(r1) # 8-byte Folded Spill
-; BE-NEXT: std r23, 304(r1) # 8-byte Folded Spill
-; BE-NEXT: std r24, 312(r1) # 8-byte Folded Spill
-; BE-NEXT: std r25, 320(r1) # 8-byte Folded Spill
-; BE-NEXT: std r26, 328(r1) # 8-byte Folded Spill
-; BE-NEXT: std r27, 336(r1) # 8-byte Folded Spill
-; BE-NEXT: std r28, 344(r1) # 8-byte Folded Spill
-; BE-NEXT: std r29, 352(r1) # 8-byte Folded Spill
-; BE-NEXT: std r30, 360(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f17, 376(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f18, 384(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f19, 392(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f21, 408(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f22, 416(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f23, 424(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f24, 432(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f25, 440(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f26, 448(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f27, 456(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f28, 464(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f29, 472(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f30, 480(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f31, 488(r1) # 8-byte Folded Spill
-; BE-NEXT: fmr f31, f13
-; BE-NEXT: fmr f29, f12
-; BE-NEXT: fmr f30, f11
-; BE-NEXT: fmr f28, f10
-; BE-NEXT: fmr f27, f9
-; BE-NEXT: fmr f26, f8
-; BE-NEXT: fmr f25, f7
-; BE-NEXT: fmr f24, f6
-; BE-NEXT: fmr f23, f5
-; BE-NEXT: fmr f22, f4
-; BE-NEXT: fmr f21, f3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f20
-; BE-NEXT: mr r30, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f22
-; BE-NEXT: mr r29, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f21
-; BE-NEXT: mr r28, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f24
-; BE-NEXT: mr r27, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f23
-; BE-NEXT: mr r26, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f26
-; BE-NEXT: mr r25, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f25
+; BE-NEXT: stdu r1, -368(r1)
+; BE-NEXT: std r0, 384(r1)
+; BE-NEXT: std r24, 304(r1) # 8-byte Folded Spill
; BE-NEXT: mr r24, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f28
-; BE-NEXT: mr r23, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f27
-; BE-NEXT: mr r22, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f29
-; BE-NEXT: mr r21, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f30
-; BE-NEXT: mr r20, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: lfs f1, 652(r1)
-; BE-NEXT: mr r19, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f31
-; BE-NEXT: mr r18, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: lfs f1, 668(r1)
-; BE-NEXT: mr r17, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: lfs f1, 660(r1)
-; BE-NEXT: mr r16, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r3, 48
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r16, 48
-; BE-NEXT: fmr f31, f1
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r17, 48
-; BE-NEXT: fmr f30, f1
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r18, 48
-; BE-NEXT: fmr f29, f1
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r19, 48
-; BE-NEXT: fmr f28, f1
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r20, 48
-; BE-NEXT: fmr f27, f1
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r21, 48
-; BE-NEXT: fmr f26, f1
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r22, 48
-; BE-NEXT: fmr f25, f1
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r23, 48
-; BE-NEXT: fmr f24, f1
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r24, 48
-; BE-NEXT: fmr f23, f1
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r25, 48
-; BE-NEXT: fmr f22, f1
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r26, 48
-; BE-NEXT: fmr f21, f1
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r27, 48
-; BE-NEXT: fmr f20, f1
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r28, 48
-; BE-NEXT: fmr f19, f1
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r29, 48
-; BE-NEXT: fmr f18, f1
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r30, 48
-; BE-NEXT: fmr f17, f1
+; BE-NEXT: lhz r3, 494(r1)
+; BE-NEXT: std r16, 240(r1) # 8-byte Folded Spill
+; BE-NEXT: lhz r16, 486(r1)
+; BE-NEXT: std r17, 248(r1) # 8-byte Folded Spill
+; BE-NEXT: std r18, 256(r1) # 8-byte Folded Spill
+; BE-NEXT: std r19, 264(r1) # 8-byte Folded Spill
+; BE-NEXT: std r20, 272(r1) # 8-byte Folded Spill
+; BE-NEXT: std r21, 280(r1) # 8-byte Folded Spill
+; BE-NEXT: std r22, 288(r1) # 8-byte Folded Spill
+; BE-NEXT: lhz r22, 534(r1)
+; BE-NEXT: lhz r21, 542(r1)
+; BE-NEXT: lhz r20, 518(r1)
+; BE-NEXT: lhz r19, 526(r1)
+; BE-NEXT: lhz r18, 502(r1)
+; BE-NEXT: lhz r17, 510(r1)
+; BE-NEXT: std r23, 296(r1) # 8-byte Folded Spill
+; BE-NEXT: std r25, 312(r1) # 8-byte Folded Spill
+; BE-NEXT: std r26, 320(r1) # 8-byte Folded Spill
+; BE-NEXT: std r27, 328(r1) # 8-byte Folded Spill
+; BE-NEXT: std r28, 336(r1) # 8-byte Folded Spill
+; BE-NEXT: std r29, 344(r1) # 8-byte Folded Spill
+; BE-NEXT: std r30, 352(r1) # 8-byte Folded Spill
+; BE-NEXT: mr r29, r10
+; BE-NEXT: mr r30, r9
+; BE-NEXT: mr r27, r8
+; BE-NEXT: mr r28, r7
+; BE-NEXT: mr r25, r6
+; BE-NEXT: mr r26, r5
+; BE-NEXT: mr r23, r4
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
; BE-NEXT: bl llrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f17
; BE-NEXT: std r3, 120(r1)
+; BE-NEXT: mr r3, r16
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
; BE-NEXT: bl llrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f18
; BE-NEXT: std r3, 112(r1)
+; BE-NEXT: mr r3, r17
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
; BE-NEXT: bl llrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f19
; BE-NEXT: std r3, 136(r1)
+; BE-NEXT: mr r3, r18
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
; BE-NEXT: bl llrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f20
; BE-NEXT: std r3, 128(r1)
+; BE-NEXT: mr r3, r19
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
; BE-NEXT: bl llrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f21
; BE-NEXT: std r3, 152(r1)
+; BE-NEXT: mr r3, r20
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
; BE-NEXT: bl llrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f22
; BE-NEXT: std r3, 144(r1)
+; BE-NEXT: mr r3, r21
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
; BE-NEXT: bl llrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f23
; BE-NEXT: std r3, 168(r1)
+; BE-NEXT: mr r3, r22
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
; BE-NEXT: bl llrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f24
; BE-NEXT: std r3, 160(r1)
+; BE-NEXT: clrldi r3, r23, 48
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
; BE-NEXT: bl llrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f25
; BE-NEXT: std r3, 184(r1)
+; BE-NEXT: clrldi r3, r24, 48
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
; BE-NEXT: bl llrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f26
; BE-NEXT: std r3, 176(r1)
+; BE-NEXT: clrldi r3, r25, 48
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
; BE-NEXT: bl llrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f27
; BE-NEXT: std r3, 200(r1)
+; BE-NEXT: clrldi r3, r26, 48
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
; BE-NEXT: bl llrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f28
; BE-NEXT: std r3, 192(r1)
+; BE-NEXT: clrldi r3, r27, 48
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
; BE-NEXT: bl llrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f29
; BE-NEXT: std r3, 216(r1)
+; BE-NEXT: clrldi r3, r28, 48
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
; BE-NEXT: bl llrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f30
; BE-NEXT: std r3, 208(r1)
+; BE-NEXT: clrldi r3, r29, 48
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
; BE-NEXT: bl llrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f31
; BE-NEXT: std r3, 232(r1)
+; BE-NEXT: clrldi r3, r30, 48
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
; BE-NEXT: bl llrintf
; BE-NEXT: nop
; BE-NEXT: std r3, 224(r1)
; BE-NEXT: addi r3, r1, 112
-; BE-NEXT: ld r30, 360(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f31, 488(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f30, 480(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f29, 472(r1) # 8-byte Folded Reload
-; BE-NEXT: lxvd2x v2, 0, r3
+; BE-NEXT: ld r30, 352(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r29, 344(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r28, 336(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r27, 328(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r26, 320(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r25, 312(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r24, 304(r1) # 8-byte Folded Reload
+; BE-NEXT: lxvd2x v6, 0, r3
; BE-NEXT: addi r3, r1, 128
-; BE-NEXT: lfd f28, 464(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f27, 456(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f26, 448(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r29, 352(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r28, 344(r1) # 8-byte Folded Reload
-; BE-NEXT: lxvd2x v3, 0, r3
+; BE-NEXT: ld r23, 296(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r22, 288(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r21, 280(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r20, 272(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r19, 264(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r18, 256(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r17, 248(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r16, 240(r1) # 8-byte Folded Reload
+; BE-NEXT: lxvd2x v7, 0, r3
; BE-NEXT: addi r3, r1, 144
-; BE-NEXT: lfd f25, 440(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f24, 432(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f23, 424(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r27, 336(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r26, 328(r1) # 8-byte Folded Reload
-; BE-NEXT: lxvd2x v4, 0, r3
+; BE-NEXT: lxvd2x v8, 0, r3
; BE-NEXT: addi r3, r1, 160
-; BE-NEXT: lfd f22, 416(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f21, 408(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f20, 400(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r25, 320(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r24, 312(r1) # 8-byte Folded Reload
-; BE-NEXT: lxvd2x v5, 0, r3
+; BE-NEXT: lxvd2x v9, 0, r3
; BE-NEXT: addi r3, r1, 176
-; BE-NEXT: lfd f19, 392(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f18, 384(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f17, 376(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r23, 304(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r22, 296(r1) # 8-byte Folded Reload
-; BE-NEXT: lxvd2x v6, 0, r3
+; BE-NEXT: lxvd2x v2, 0, r3
; BE-NEXT: addi r3, r1, 192
-; BE-NEXT: ld r21, 288(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r20, 280(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r19, 272(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r18, 264(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r17, 256(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r16, 248(r1) # 8-byte Folded Reload
-; BE-NEXT: lxvd2x v7, 0, r3
+; BE-NEXT: lxvd2x v3, 0, r3
; BE-NEXT: addi r3, r1, 208
-; BE-NEXT: lxvd2x v8, 0, r3
+; BE-NEXT: lxvd2x v4, 0, r3
; BE-NEXT: addi r3, r1, 224
-; BE-NEXT: lxvd2x v9, 0, r3
-; BE-NEXT: addi r1, r1, 496
+; BE-NEXT: lxvd2x v5, 0, r3
+; BE-NEXT: addi r1, r1, 368
; BE-NEXT: ld r0, 16(r1)
; BE-NEXT: mtlr r0
; BE-NEXT: blr
@@ -1126,316 +795,207 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) nounwind {
; CHECK-LABEL: llrint_v16i64_v16f16:
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
-; CHECK-NEXT: stdu r1, -432(r1)
-; CHECK-NEXT: li r3, 48
-; CHECK-NEXT: std r0, 448(r1)
-; CHECK-NEXT: std r16, 184(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r17, 192(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r18, 200(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r19, 208(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r20, 216(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r21, 224(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stxvd2x v24, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT: li r3, 64
-; CHECK-NEXT: std r22, 232(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r23, 240(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r24, 248(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r25, 256(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r26, 264(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r27, 272(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stxvd2x v25, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT: li r3, 80
-; CHECK-NEXT: std r28, 280(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r29, 288(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r30, 296(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stfd f17, 312(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stfd f18, 320(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stfd f19, 328(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stxvd2x v26, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT: li r3, 96
-; CHECK-NEXT: stfd f20, 336(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f20, f2
-; CHECK-NEXT: stfd f21, 344(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f21, f3
-; CHECK-NEXT: stfd f22, 352(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f22, f4
-; CHECK-NEXT: stxvd2x v27, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT: li r3, 112
-; CHECK-NEXT: stfd f23, 360(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f23, f5
-; CHECK-NEXT: stfd f24, 368(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f24, f6
-; CHECK-NEXT: stfd f25, 376(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f25, f7
-; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT: li r3, 128
-; CHECK-NEXT: stfd f26, 384(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f26, f8
-; CHECK-NEXT: stfd f27, 392(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f27, f9
-; CHECK-NEXT: stfd f28, 400(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f28, f10
-; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT: li r3, 144
-; CHECK-NEXT: stfd f29, 408(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f29, f11
-; CHECK-NEXT: stfd f30, 416(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f30, f12
-; CHECK-NEXT: stfd f31, 424(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f31, f13
-; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT: li r3, 160
-; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f20
-; CHECK-NEXT: mr r30, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f21
-; CHECK-NEXT: mr r29, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f22
-; CHECK-NEXT: mr r28, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f23
-; CHECK-NEXT: mr r27, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f24
-; CHECK-NEXT: mr r26, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f25
-; CHECK-NEXT: mr r25, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f26
-; CHECK-NEXT: mr r24, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f27
+; CHECK-NEXT: stdu r1, -320(r1)
+; CHECK-NEXT: li r11, 48
+; CHECK-NEXT: std r0, 336(r1)
+; CHECK-NEXT: std r23, 248(r1) # 8-byte Folded Spill
; CHECK-NEXT: mr r23, r3
-; CHECK-NEXT: bl __truncsfhf2
+; CHECK-NEXT: lhz r3, 416(r1)
+; CHECK-NEXT: std r16, 192(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r17, 200(r1) # 8-byte Folded Spill
+; CHECK-NEXT: lhz r17, 432(r1)
+; CHECK-NEXT: stxvd2x v24, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT: li r11, 64
+; CHECK-NEXT: std r18, 208(r1) # 8-byte Folded Spill
+; CHECK-NEXT: lhz r18, 440(r1)
+; CHECK-NEXT: std r19, 216(r1) # 8-byte Folded Spill
+; CHECK-NEXT: lhz r19, 448(r1)
+; CHECK-NEXT: lhz r16, 424(r1)
+; CHECK-NEXT: stxvd2x v25, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT: li r11, 80
+; CHECK-NEXT: std r20, 224(r1) # 8-byte Folded Spill
+; CHECK-NEXT: lhz r20, 456(r1)
+; CHECK-NEXT: stxvd2x v26, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT: li r11, 96
+; CHECK-NEXT: std r21, 232(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r22, 240(r1) # 8-byte Folded Spill
+; CHECK-NEXT: lhz r22, 472(r1)
+; CHECK-NEXT: lhz r21, 464(r1)
+; CHECK-NEXT: stxvd2x v27, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT: li r11, 112
+; CHECK-NEXT: std r15, 184(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r24, 256(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r25, 264(r1) # 8-byte Folded Spill
+; CHECK-NEXT: mr r25, r5
+; CHECK-NEXT: mr r24, r4
+; CHECK-NEXT: stxvd2x v28, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT: li r11, 128
+; CHECK-NEXT: std r26, 272(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r27, 280(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r28, 288(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r29, 296(r1) # 8-byte Folded Spill
+; CHECK-NEXT: mr r29, r9
+; CHECK-NEXT: mr r28, r8
+; CHECK-NEXT: mr r27, r7
+; CHECK-NEXT: mr r26, r6
+; CHECK-NEXT: stxvd2x v29, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT: li r11, 144
+; CHECK-NEXT: std r30, 304(r1) # 8-byte Folded Spill
+; CHECK-NEXT: mr r30, r10
+; CHECK-NEXT: stxvd2x v30, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT: li r11, 160
+; CHECK-NEXT: stxvd2x v31, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f28
-; CHECK-NEXT: mr r22, r3
-; CHECK-NEXT: bl __truncsfhf2
+; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f29
-; CHECK-NEXT: mr r21, r3
-; CHECK-NEXT: bl __truncsfhf2
+; CHECK-NEXT: mr r15, r3
+; CHECK-NEXT: mr r3, r16
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f30
-; CHECK-NEXT: mr r20, r3
-; CHECK-NEXT: bl __truncsfhf2
+; CHECK-NEXT: mtvsrd v31, r15
+; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f31
-; CHECK-NEXT: mr r19, r3
-; CHECK-NEXT: bl __truncsfhf2
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: mr r3, r17
+; CHECK-NEXT: xxmrghd v31, vs0, v31
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: lfs f1, 568(r1)
-; CHECK-NEXT: mr r18, r3
-; CHECK-NEXT: bl __truncsfhf2
+; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: lfs f1, 576(r1)
; CHECK-NEXT: mr r17, r3
-; CHECK-NEXT: bl __truncsfhf2
+; CHECK-NEXT: mr r3, r18
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: lfs f1, 584(r1)
-; CHECK-NEXT: mr r16, r3
-; CHECK-NEXT: bl __truncsfhf2
+; CHECK-NEXT: mtvsrd v30, r17
+; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r3, 48
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: mr r3, r19
+; CHECK-NEXT: xxmrghd v30, vs0, v30
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r16, 48
-; CHECK-NEXT: fmr f31, f1
-; CHECK-NEXT: bl __extendhfsf2
+; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r17, 48
-; CHECK-NEXT: fmr f30, f1
+; CHECK-NEXT: mr r19, r3
+; CHECK-NEXT: mr r3, r20
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r18, 48
-; CHECK-NEXT: fmr f29, f1
-; CHECK-NEXT: bl __extendhfsf2
+; CHECK-NEXT: mtvsrd v29, r19
+; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r19, 48
-; CHECK-NEXT: fmr f28, f1
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: mr r3, r21
+; CHECK-NEXT: xxmrghd v29, vs0, v29
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r20, 48
-; CHECK-NEXT: fmr f27, f1
-; CHECK-NEXT: bl __extendhfsf2
+; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r21, 48
-; CHECK-NEXT: fmr f26, f1
+; CHECK-NEXT: mr r21, r3
+; CHECK-NEXT: mr r3, r22
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r22, 48
-; CHECK-NEXT: fmr f25, f1
-; CHECK-NEXT: bl __extendhfsf2
+; CHECK-NEXT: mtvsrd v28, r21
+; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
+; CHECK-NEXT: mtfprd f0, r3
; CHECK-NEXT: clrldi r3, r23, 48
-; CHECK-NEXT: fmr f24, f1
+; CHECK-NEXT: xxmrghd v28, vs0, v28
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
+; CHECK-NEXT: bl llrintf
+; CHECK-NEXT: nop
+; CHECK-NEXT: mr r23, r3
; CHECK-NEXT: clrldi r3, r24, 48
-; CHECK-NEXT: fmr f23, f1
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
+; CHECK-NEXT: mtvsrd v27, r23
+; CHECK-NEXT: bl llrintf
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtfprd f0, r3
; CHECK-NEXT: clrldi r3, r25, 48
-; CHECK-NEXT: fmr f22, f1
+; CHECK-NEXT: xxmrghd v27, vs0, v27
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
+; CHECK-NEXT: bl llrintf
+; CHECK-NEXT: nop
+; CHECK-NEXT: mr r25, r3
; CHECK-NEXT: clrldi r3, r26, 48
-; CHECK-NEXT: fmr f21, f1
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
+; CHECK-NEXT: mtvsrd v26, r25
+; CHECK-NEXT: bl llrintf
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtfprd f0, r3
; CHECK-NEXT: clrldi r3, r27, 48
-; CHECK-NEXT: fmr f20, f1
+; CHECK-NEXT: xxmrghd v26, vs0, v26
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
+; CHECK-NEXT: bl llrintf
+; CHECK-NEXT: nop
+; CHECK-NEXT: mr r27, r3
; CHECK-NEXT: clrldi r3, r28, 48
-; CHECK-NEXT: fmr f19, f1
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r29, 48
-; CHECK-NEXT: fmr f18, f1
-; CHECK-NEXT: bl __extendhfsf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r30, 48
-; CHECK-NEXT: fmr f17, f1
-; CHECK-NEXT: bl __extendhfsf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: bl llrintf
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f17
-; CHECK-NEXT: mtvsrd v31, r3
-; CHECK-NEXT: bl llrintf
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f18
-; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: xxmrghd v31, vs0, v31
-; CHECK-NEXT: bl llrintf
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f19
-; CHECK-NEXT: mtvsrd v30, r3
-; CHECK-NEXT: bl llrintf
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f20
-; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: xxmrghd v30, vs0, v30
-; CHECK-NEXT: bl llrintf
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f21
-; CHECK-NEXT: mtvsrd v29, r3
-; CHECK-NEXT: bl llrintf
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f22
-; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: xxmrghd v29, vs0, v29
-; CHECK-NEXT: bl llrintf
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f23
-; CHECK-NEXT: mtvsrd v28, r3
-; CHECK-NEXT: bl llrintf
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f24
-; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: xxmrghd v28, vs0, v28
-; CHECK-NEXT: bl llrintf
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f25
-; CHECK-NEXT: mtvsrd v27, r3
-; CHECK-NEXT: bl llrintf
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f26
-; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: xxmrghd v27, vs0, v27
-; CHECK-NEXT: bl llrintf
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f27
-; CHECK-NEXT: mtvsrd v26, r3
+; CHECK-NEXT: mtvsrd v25, r27
; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f28
; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: xxmrghd v26, vs0, v26
-; CHECK-NEXT: bl llrintf
+; CHECK-NEXT: clrldi r3, r29, 48
+; CHECK-NEXT: xxmrghd v25, vs0, v25
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f29
-; CHECK-NEXT: mtvsrd v25, r3
; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f30
-; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: xxmrghd v25, vs0, v25
-; CHECK-NEXT: bl llrintf
+; CHECK-NEXT: mr r29, r3
+; CHECK-NEXT: clrldi r3, r30, 48
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f31
-; CHECK-NEXT: mtvsrd v24, r3
+; CHECK-NEXT: mtvsrd v24, r29
; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
; CHECK-NEXT: mtfprd f0, r3
; CHECK-NEXT: li r3, 160
-; CHECK-NEXT: vmr v2, v31
-; CHECK-NEXT: lfd f31, 424(r1) # 8-byte Folded Reload
-; CHECK-NEXT: vmr v3, v30
-; CHECK-NEXT: vmr v4, v29
-; CHECK-NEXT: lfd f30, 416(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f29, 408(r1) # 8-byte Folded Reload
+; CHECK-NEXT: vmr v6, v31
+; CHECK-NEXT: ld r30, 304(r1) # 8-byte Folded Reload
+; CHECK-NEXT: vmr v7, v30
+; CHECK-NEXT: vmr v8, v29
+; CHECK-NEXT: ld r29, 296(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r28, 288(r1) # 8-byte Folded Reload
; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 144
-; CHECK-NEXT: vmr v5, v28
-; CHECK-NEXT: vmr v6, v27
-; CHECK-NEXT: vmr v7, v26
-; CHECK-NEXT: vmr v8, v25
-; CHECK-NEXT: lfd f28, 400(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f27, 392(r1) # 8-byte Folded Reload
+; CHECK-NEXT: vmr v9, v28
+; CHECK-NEXT: vmr v2, v27
+; CHECK-NEXT: vmr v3, v26
+; CHECK-NEXT: vmr v4, v25
+; CHECK-NEXT: ld r27, 280(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r26, 272(r1) # 8-byte Folded Reload
; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 128
-; CHECK-NEXT: lfd f26, 384(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f25, 376(r1) # 8-byte Folded Reload
-; CHECK-NEXT: xxmrghd v9, vs0, v24
-; CHECK-NEXT: lfd f24, 368(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f23, 360(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f22, 352(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r25, 264(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r24, 256(r1) # 8-byte Folded Reload
+; CHECK-NEXT: xxmrghd v5, vs0, v24
+; CHECK-NEXT: ld r23, 248(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r22, 240(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r21, 232(r1) # 8-byte Folded Reload
; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 112
-; CHECK-NEXT: lfd f21, 344(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r30, 296(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f20, 336(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f19, 328(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r29, 288(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r28, 280(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r20, 224(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r19, 216(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r18, 208(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r17, 200(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r16, 192(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r15, 184(r1) # 8-byte Folded Reload
; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 96
-; CHECK-NEXT: lfd f18, 320(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r27, 272(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f17, 312(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r26, 264(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r25, 256(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r24, 248(r1) # 8-byte Folded Reload
; CHECK-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 80
-; CHECK-NEXT: ld r23, 240(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r22, 232(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r21, 224(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r20, 216(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r19, 208(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r18, 200(r1) # 8-byte Folded Reload
; CHECK-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 64
-; CHECK-NEXT: ld r17, 192(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r16, 184(r1) # 8-byte Folded Reload
; CHECK-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 48
; CHECK-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT: addi r1, r1, 432
+; CHECK-NEXT: addi r1, r1, 320
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
@@ -1443,223 +1003,199 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) nounwind {
; FAST-LABEL: llrint_v16i64_v16f16:
; FAST: # %bb.0:
; FAST-NEXT: mflr r0
-; FAST-NEXT: stfd f16, -128(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f17, -120(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f18, -112(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f19, -104(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f20, -96(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f21, -88(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f22, -80(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f23, -72(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f24, -64(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f25, -56(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f26, -48(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f27, -40(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f28, -32(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f29, -24(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
-; FAST-NEXT: stdu r1, -160(r1)
-; FAST-NEXT: fmr f26, f1
-; FAST-NEXT: lfs f1, 312(r1)
-; FAST-NEXT: std r0, 176(r1)
-; FAST-NEXT: fmr f28, f13
-; FAST-NEXT: fmr f27, f12
-; FAST-NEXT: fmr f24, f11
-; FAST-NEXT: fmr f21, f10
-; FAST-NEXT: fmr f19, f9
-; FAST-NEXT: fmr f18, f8
-; FAST-NEXT: fmr f17, f7
-; FAST-NEXT: fmr f16, f6
-; FAST-NEXT: fmr f20, f5
-; FAST-NEXT: fmr f22, f4
-; FAST-NEXT: fmr f23, f3
-; FAST-NEXT: fmr f25, f2
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: stdu r1, -304(r1)
+; FAST-NEXT: li r11, 48
+; FAST-NEXT: std r0, 320(r1)
+; FAST-NEXT: std r23, 232(r1) # 8-byte Folded Spill
+; FAST-NEXT: mr r23, r3
+; FAST-NEXT: lhz r3, 400(r1)
+; FAST-NEXT: std r16, 176(r1) # 8-byte Folded Spill
+; FAST-NEXT: std r17, 184(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r17, 416(r1)
+; FAST-NEXT: stxvd2x v24, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT: li r11, 64
+; FAST-NEXT: std r18, 192(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r18, 424(r1)
+; FAST-NEXT: std r19, 200(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r19, 432(r1)
+; FAST-NEXT: lhz r16, 408(r1)
+; FAST-NEXT: stxvd2x v25, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT: li r11, 80
+; FAST-NEXT: std r20, 208(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r20, 440(r1)
+; FAST-NEXT: stxvd2x v26, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT: li r11, 96
+; FAST-NEXT: std r21, 216(r1) # 8-byte Folded Spill
+; FAST-NEXT: std r22, 224(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r22, 456(r1)
+; FAST-NEXT: lhz r21, 448(r1)
+; FAST-NEXT: stxvd2x v27, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT: li r11, 112
+; FAST-NEXT: std r24, 240(r1) # 8-byte Folded Spill
+; FAST-NEXT: std r25, 248(r1) # 8-byte Folded Spill
+; FAST-NEXT: std r26, 256(r1) # 8-byte Folded Spill
+; FAST-NEXT: mr r26, r6
+; FAST-NEXT: mr r25, r5
+; FAST-NEXT: mr r24, r4
+; FAST-NEXT: stxvd2x v28, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT: li r11, 128
+; FAST-NEXT: std r27, 264(r1) # 8-byte Folded Spill
+; FAST-NEXT: std r28, 272(r1) # 8-byte Folded Spill
+; FAST-NEXT: std r29, 280(r1) # 8-byte Folded Spill
+; FAST-NEXT: std r30, 288(r1) # 8-byte Folded Spill
+; FAST-NEXT: mr r30, r10
+; FAST-NEXT: mr r29, r9
+; FAST-NEXT: mr r28, r8
+; FAST-NEXT: mr r27, r7
+; FAST-NEXT: stxvd2x v29, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT: li r11, 144
+; FAST-NEXT: stxvd2x v30, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT: li r11, 160
+; FAST-NEXT: stxvd2x v31, r1, r11 # 16-byte Folded Spill
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f31, f1
-; FAST-NEXT: lfs f1, 304(r1)
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v31, r3
+; FAST-NEXT: mr r3, r16
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f30, f1
-; FAST-NEXT: lfs f1, 296(r1)
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: mr r3, r17
+; FAST-NEXT: xxmrghd v31, vs0, v31
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f29, f1
-; FAST-NEXT: fmr f1, f28
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v30, r3
+; FAST-NEXT: mr r3, r18
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f28, f1
-; FAST-NEXT: fmr f1, f27
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: mr r3, r19
+; FAST-NEXT: xxmrghd v30, vs0, v30
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f27, f1
-; FAST-NEXT: fmr f1, f24
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v29, r3
+; FAST-NEXT: mr r3, r20
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f24, f1
-; FAST-NEXT: fmr f1, f21
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: mr r3, r21
+; FAST-NEXT: xxmrghd v29, vs0, v29
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f21, f1
-; FAST-NEXT: fmr f1, f19
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v28, r3
+; FAST-NEXT: mr r3, r22
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f19, f1
-; FAST-NEXT: fmr f1, f18
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: clrldi r3, r23, 48
+; FAST-NEXT: xxmrghd v28, vs0, v28
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f18, f1
-; FAST-NEXT: fmr f1, f17
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v27, r3
+; FAST-NEXT: clrldi r3, r24, 48
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f17, f1
-; FAST-NEXT: fmr f1, f16
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: clrldi r3, r25, 48
+; FAST-NEXT: xxmrghd v27, vs0, v27
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f16, f1
-; FAST-NEXT: fmr f1, f20
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v26, r3
+; FAST-NEXT: clrldi r3, r26, 48
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f20, f1
-; FAST-NEXT: fmr f1, f22
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: clrldi r3, r27, 48
+; FAST-NEXT: xxmrghd v26, vs0, v26
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f22, f1
-; FAST-NEXT: fmr f1, f23
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v25, r3
+; FAST-NEXT: clrldi r3, r28, 48
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f23, f1
-; FAST-NEXT: fmr f1, f25
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: clrldi r3, r29, 48
+; FAST-NEXT: xxmrghd v25, vs0, v25
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f25, f1
-; FAST-NEXT: fmr f1, f26
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v24, r3
+; FAST-NEXT: clrldi r3, r30, 48
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fctid f0, f25
-; FAST-NEXT: fctid f2, f23
-; FAST-NEXT: mffprd r3, f0
-; FAST-NEXT: fctid f3, f22
-; FAST-NEXT: fctid f4, f20
-; FAST-NEXT: fctid f5, f16
-; FAST-NEXT: fctid f6, f17
-; FAST-NEXT: fctid f7, f18
-; FAST-NEXT: fctid f8, f19
-; FAST-NEXT: fctid f9, f21
-; FAST-NEXT: fctid f10, f24
-; FAST-NEXT: fctid f1, f1
-; FAST-NEXT: mtfprd f0, r3
-; FAST-NEXT: mffprd r3, f2
-; FAST-NEXT: mtfprd f2, r3
-; FAST-NEXT: mffprd r3, f3
-; FAST-NEXT: mtfprd f3, r3
-; FAST-NEXT: mffprd r3, f4
-; FAST-NEXT: mtfprd f4, r3
-; FAST-NEXT: mffprd r3, f5
-; FAST-NEXT: mtfprd f5, r3
-; FAST-NEXT: mffprd r3, f6
-; FAST-NEXT: mtfprd f6, r3
-; FAST-NEXT: mffprd r3, f7
-; FAST-NEXT: mtfprd f7, r3
-; FAST-NEXT: mffprd r3, f8
-; FAST-NEXT: mtfprd f8, r3
-; FAST-NEXT: mffprd r3, f9
-; FAST-NEXT: mtfprd f9, r3
-; FAST-NEXT: mffprd r3, f10
-; FAST-NEXT: mtfprd f10, r3
-; FAST-NEXT: mffprd r3, f1
-; FAST-NEXT: mtfprd f1, r3
-; FAST-NEXT: xxmrghd v3, vs3, vs2
-; FAST-NEXT: xxmrghd v4, vs5, vs4
-; FAST-NEXT: xxmrghd v5, vs7, vs6
-; FAST-NEXT: xxmrghd v6, vs9, vs8
-; FAST-NEXT: xxmrghd v2, vs0, vs1
-; FAST-NEXT: fctid f0, f27
-; FAST-NEXT: fctid f1, f29
-; FAST-NEXT: mffprd r3, f0
-; FAST-NEXT: mtfprd f0, r3
-; FAST-NEXT: xxmrghd v7, vs0, vs10
-; FAST-NEXT: fctid f0, f28
-; FAST-NEXT: mffprd r3, f0
-; FAST-NEXT: mtfprd f0, r3
-; FAST-NEXT: mffprd r3, f1
-; FAST-NEXT: mtfprd f1, r3
-; FAST-NEXT: xxmrghd v8, vs1, vs0
-; FAST-NEXT: fctid f0, f30
-; FAST-NEXT: fctid f1, f31
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: vmr v6, v31
+; FAST-NEXT: ld r30, 288(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r29, 280(r1) # 8-byte Folded Reload
+; FAST-NEXT: vmr v7, v30
+; FAST-NEXT: vmr v8, v29
+; FAST-NEXT: ld r28, 272(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r27, 264(r1) # 8-byte Folded Reload
+; FAST-NEXT: vmr v9, v28
+; FAST-NEXT: vmr v2, v27
+; FAST-NEXT: ld r26, 256(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r25, 248(r1) # 8-byte Folded Reload
+; FAST-NEXT: vmr v3, v26
+; FAST-NEXT: vmr v4, v25
+; FAST-NEXT: ld r24, 240(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r23, 232(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r22, 224(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r21, 216(r1) # 8-byte Folded Reload
; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: ld r20, 208(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r19, 200(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r18, 192(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r17, 184(r1) # 8-byte Folded Reload
; FAST-NEXT: mtfprd f0, r3
-; FAST-NEXT: mffprd r3, f1
-; FAST-NEXT: mtfprd f1, r3
-; FAST-NEXT: xxmrghd v9, vs1, vs0
-; FAST-NEXT: addi r1, r1, 160
+; FAST-NEXT: li r3, 160
+; FAST-NEXT: ld r16, 176(r1) # 8-byte Folded Reload
+; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 144
+; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 128
+; FAST-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 112
+; FAST-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 96
+; FAST-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 80
+; FAST-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 64
+; FAST-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 48
+; FAST-NEXT: xxmrghd v5, vs0, v24
+; FAST-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: addi r1, r1, 304
; FAST-NEXT: ld r0, 16(r1)
-; FAST-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload
; FAST-NEXT: mtlr r0
-; FAST-NEXT: lfd f29, -24(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f28, -32(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f27, -40(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f26, -48(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f25, -56(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f24, -64(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f23, -72(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f22, -80(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f21, -88(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f20, -96(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f19, -104(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f18, -112(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f17, -120(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f16, -128(r1) # 8-byte Folded Reload
; FAST-NEXT: blr
%a = call <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half> %x)
ret <16 x i64> %a
@@ -1670,483 +1206,295 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) nounwind {
; BE-LABEL: llrint_v32i64_v32f16:
; BE: # %bb.0:
; BE-NEXT: mflr r0
-; BE-NEXT: stdu r1, -864(r1)
-; BE-NEXT: std r0, 880(r1)
-; BE-NEXT: stfd f20, 768(r1) # 8-byte Folded Spill
-; BE-NEXT: fmr f20, f1
-; BE-NEXT: fmr f1, f2
-; BE-NEXT: std r14, 576(r1) # 8-byte Folded Spill
-; BE-NEXT: std r15, 584(r1) # 8-byte Folded Spill
-; BE-NEXT: std r16, 592(r1) # 8-byte Folded Spill
-; BE-NEXT: std r17, 600(r1) # 8-byte Folded Spill
-; BE-NEXT: std r18, 608(r1) # 8-byte Folded Spill
-; BE-NEXT: std r19, 616(r1) # 8-byte Folded Spill
-; BE-NEXT: std r20, 624(r1) # 8-byte Folded Spill
-; BE-NEXT: std r21, 632(r1) # 8-byte Folded Spill
-; BE-NEXT: std r22, 640(r1) # 8-byte Folded Spill
-; BE-NEXT: std r23, 648(r1) # 8-byte Folded Spill
-; BE-NEXT: std r24, 656(r1) # 8-byte Folded Spill
-; BE-NEXT: std r25, 664(r1) # 8-byte Folded Spill
-; BE-NEXT: std r26, 672(r1) # 8-byte Folded Spill
-; BE-NEXT: std r27, 680(r1) # 8-byte Folded Spill
-; BE-NEXT: std r28, 688(r1) # 8-byte Folded Spill
-; BE-NEXT: std r29, 696(r1) # 8-byte Folded Spill
-; BE-NEXT: std r30, 704(r1) # 8-byte Folded Spill
-; BE-NEXT: std r31, 712(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f14, 720(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f15, 728(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f16, 736(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f17, 744(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f18, 752(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f19, 760(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f21, 776(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f22, 784(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f23, 792(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f24, 800(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f25, 808(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f26, 816(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f27, 824(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f28, 832(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f29, 840(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f30, 848(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f31, 856(r1) # 8-byte Folded Spill
-; BE-NEXT: fmr f31, f13
+; BE-NEXT: stdu r1, -624(r1)
+; BE-NEXT: std r0, 640(r1)
+; BE-NEXT: std r30, 608(r1) # 8-byte Folded Spill
; BE-NEXT: mr r30, r3
-; BE-NEXT: fmr f29, f12
-; BE-NEXT: fmr f30, f11
-; BE-NEXT: fmr f28, f10
-; BE-NEXT: fmr f27, f9
-; BE-NEXT: fmr f26, f8
-; BE-NEXT: fmr f25, f7
-; BE-NEXT: fmr f24, f6
-; BE-NEXT: fmr f23, f5
-; BE-NEXT: fmr f22, f4
-; BE-NEXT: fmr f21, f3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f20
-; BE-NEXT: std r3, 304(r1) # 8-byte Folded Spill
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f22
-; BE-NEXT: std r3, 296(r1) # 8-byte Folded Spill
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f21
-; BE-NEXT: std r3, 280(r1) # 8-byte Folded Spill
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f24
-; BE-NEXT: std r3, 264(r1) # 8-byte Folded Spill
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f23
-; BE-NEXT: std r3, 248(r1) # 8-byte Folded Spill
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f26
-; BE-NEXT: std r3, 232(r1) # 8-byte Folded Spill
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f25
+; BE-NEXT: lhz r3, 926(r1)
+; BE-NEXT: std r14, 480(r1) # 8-byte Folded Spill
+; BE-NEXT: lhz r14, 822(r1)
+; BE-NEXT: std r15, 488(r1) # 8-byte Folded Spill
+; BE-NEXT: std r19, 520(r1) # 8-byte Folded Spill
; BE-NEXT: std r3, 216(r1) # 8-byte Folded Spill
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f28
+; BE-NEXT: lhz r3, 934(r1)
+; BE-NEXT: lhz r15, 814(r1)
+; BE-NEXT: lhz r19, 742(r1)
+; BE-NEXT: std r22, 544(r1) # 8-byte Folded Spill
+; BE-NEXT: std r23, 552(r1) # 8-byte Folded Spill
+; BE-NEXT: std r25, 568(r1) # 8-byte Folded Spill
+; BE-NEXT: std r26, 576(r1) # 8-byte Folded Spill
+; BE-NEXT: std r3, 208(r1) # 8-byte Folded Spill
+; BE-NEXT: lhz r3, 910(r1)
+; BE-NEXT: lhz r26, 766(r1)
+; BE-NEXT: lhz r25, 774(r1)
+; BE-NEXT: std r27, 584(r1) # 8-byte Folded Spill
+; BE-NEXT: std r28, 592(r1) # 8-byte Folded Spill
+; BE-NEXT: std r29, 600(r1) # 8-byte Folded Spill
+; BE-NEXT: std r31, 616(r1) # 8-byte Folded Spill
; BE-NEXT: std r3, 200(r1) # 8-byte Folded Spill
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f27
+; BE-NEXT: lhz r3, 918(r1)
+; BE-NEXT: lhz r31, 798(r1)
+; BE-NEXT: lhz r29, 806(r1)
+; BE-NEXT: lhz r28, 782(r1)
+; BE-NEXT: lhz r27, 790(r1)
+; BE-NEXT: lhz r23, 750(r1)
+; BE-NEXT: lhz r22, 758(r1)
+; BE-NEXT: std r16, 496(r1) # 8-byte Folded Spill
+; BE-NEXT: std r17, 504(r1) # 8-byte Folded Spill
+; BE-NEXT: std r3, 192(r1) # 8-byte Folded Spill
+; BE-NEXT: lhz r3, 894(r1)
+; BE-NEXT: mr r17, r7
+; BE-NEXT: mr r16, r4
+; BE-NEXT: std r18, 512(r1) # 8-byte Folded Spill
+; BE-NEXT: std r20, 528(r1) # 8-byte Folded Spill
+; BE-NEXT: std r21, 536(r1) # 8-byte Folded Spill
+; BE-NEXT: std r24, 560(r1) # 8-byte Folded Spill
; BE-NEXT: std r3, 184(r1) # 8-byte Folded Spill
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f29
+; BE-NEXT: lhz r3, 902(r1)
+; BE-NEXT: mr r24, r10
+; BE-NEXT: mr r20, r9
+; BE-NEXT: mr r21, r8
+; BE-NEXT: mr r18, r6
+; BE-NEXT: std r3, 176(r1) # 8-byte Folded Spill
+; BE-NEXT: lhz r3, 878(r1)
; BE-NEXT: std r3, 168(r1) # 8-byte Folded Spill
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f30
+; BE-NEXT: lhz r3, 886(r1)
+; BE-NEXT: std r3, 160(r1) # 8-byte Folded Spill
+; BE-NEXT: lhz r3, 862(r1)
; BE-NEXT: std r3, 152(r1) # 8-byte Folded Spill
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: lfs f1, 1028(r1)
+; BE-NEXT: lhz r3, 870(r1)
+; BE-NEXT: std r3, 144(r1) # 8-byte Folded Spill
+; BE-NEXT: lhz r3, 846(r1)
; BE-NEXT: std r3, 136(r1) # 8-byte Folded Spill
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f31
+; BE-NEXT: lhz r3, 854(r1)
+; BE-NEXT: std r3, 128(r1) # 8-byte Folded Spill
+; BE-NEXT: lhz r3, 830(r1)
; BE-NEXT: std r3, 120(r1) # 8-byte Folded Spill
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: lfs f1, 1044(r1)
+; BE-NEXT: lhz r3, 838(r1)
; BE-NEXT: std r3, 112(r1) # 8-byte Folded Spill
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: lfs f1, 1036(r1)
-; BE-NEXT: mr r15, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: lfs f1, 1060(r1)
-; BE-NEXT: mr r14, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: lfs f1, 1052(r1)
-; BE-NEXT: mr r31, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: lfs f1, 1076(r1)
-; BE-NEXT: mr r29, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: lfs f1, 1068(r1)
-; BE-NEXT: mr r28, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: lfs f1, 1092(r1)
-; BE-NEXT: mr r27, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: lfs f1, 1084(r1)
-; BE-NEXT: mr r26, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: lfs f1, 1108(r1)
-; BE-NEXT: mr r25, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: lfs f1, 1100(r1)
-; BE-NEXT: mr r24, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: lfs f1, 1124(r1)
-; BE-NEXT: mr r23, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: lfs f1, 1116(r1)
-; BE-NEXT: mr r22, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: lfs f1, 1140(r1)
-; BE-NEXT: mr r21, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: lfs f1, 1132(r1)
-; BE-NEXT: mr r20, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: lfs f1, 1156(r1)
-; BE-NEXT: mr r19, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: lfs f1, 1148(r1)
-; BE-NEXT: mr r18, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: lfs f1, 1172(r1)
-; BE-NEXT: mr r17, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: lfs f1, 1164(r1)
-; BE-NEXT: mr r16, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r3, 48
+; BE-NEXT: clrldi r3, r5, 48
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
+; BE-NEXT: bl llrintf
+; BE-NEXT: nop
+; BE-NEXT: std r3, 424(r1)
; BE-NEXT: clrldi r3, r16, 48
-; BE-NEXT: stfs f1, 316(r1) # 4-byte Folded Spill
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
+; BE-NEXT: bl llrintf
+; BE-NEXT: nop
+; BE-NEXT: std r3, 416(r1)
; BE-NEXT: clrldi r3, r17, 48
-; BE-NEXT: stfs f1, 312(r1) # 4-byte Folded Spill
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
+; BE-NEXT: bl llrintf
+; BE-NEXT: nop
+; BE-NEXT: std r3, 440(r1)
; BE-NEXT: clrldi r3, r18, 48
-; BE-NEXT: stfs f1, 292(r1) # 4-byte Folded Spill
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r19, 48
-; BE-NEXT: stfs f1, 276(r1) # 4-byte Folded Spill
-; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: bl llrintf
; BE-NEXT: nop
+; BE-NEXT: std r3, 432(r1)
; BE-NEXT: clrldi r3, r20, 48
-; BE-NEXT: stfs f1, 260(r1) # 4-byte Folded Spill
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r21, 48
-; BE-NEXT: stfs f1, 244(r1) # 4-byte Folded Spill
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r22, 48
-; BE-NEXT: stfs f1, 228(r1) # 4-byte Folded Spill
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r23, 48
-; BE-NEXT: stfs f1, 212(r1) # 4-byte Folded Spill
-; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: bl llrintf
; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r24, 48
-; BE-NEXT: stfs f1, 196(r1) # 4-byte Folded Spill
+; BE-NEXT: std r3, 456(r1)
+; BE-NEXT: clrldi r3, r21, 48
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r25, 48
-; BE-NEXT: stfs f1, 180(r1) # 4-byte Folded Spill
-; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: bl llrintf
; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r26, 48
-; BE-NEXT: stfs f1, 164(r1) # 4-byte Folded Spill
+; BE-NEXT: std r3, 448(r1)
+; BE-NEXT: mr r3, r19
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r27, 48
-; BE-NEXT: stfs f1, 148(r1) # 4-byte Folded Spill
-; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: bl llrintf
; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r28, 48
-; BE-NEXT: stfs f1, 132(r1) # 4-byte Folded Spill
+; BE-NEXT: std r3, 472(r1)
+; BE-NEXT: clrldi r3, r24, 48
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r29, 48
-; BE-NEXT: fmr f18, f1
-; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: bl llrintf
; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r31, 48
-; BE-NEXT: fmr f17, f1
+; BE-NEXT: std r3, 464(r1)
+; BE-NEXT: mr r3, r22
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r14, 48
-; BE-NEXT: fmr f16, f1
-; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: bl llrintf
; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r15, 48
-; BE-NEXT: fmr f15, f1
+; BE-NEXT: std r3, 232(r1)
+; BE-NEXT: mr r3, r23
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: ld r3, 112(r1) # 8-byte Folded Reload
-; BE-NEXT: fmr f14, f1
-; BE-NEXT: clrldi r3, r3, 48
-; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: bl llrintf
; BE-NEXT: nop
-; BE-NEXT: ld r3, 120(r1) # 8-byte Folded Reload
-; BE-NEXT: fmr f31, f1
-; BE-NEXT: clrldi r3, r3, 48
+; BE-NEXT: std r3, 224(r1)
+; BE-NEXT: mr r3, r25
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: ld r3, 136(r1) # 8-byte Folded Reload
-; BE-NEXT: fmr f30, f1
-; BE-NEXT: clrldi r3, r3, 48
-; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: bl llrintf
; BE-NEXT: nop
-; BE-NEXT: ld r3, 152(r1) # 8-byte Folded Reload
-; BE-NEXT: fmr f29, f1
-; BE-NEXT: clrldi r3, r3, 48
+; BE-NEXT: std r3, 248(r1)
+; BE-NEXT: mr r3, r26
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: ld r3, 168(r1) # 8-byte Folded Reload
-; BE-NEXT: fmr f28, f1
-; BE-NEXT: clrldi r3, r3, 48
-; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: bl llrintf
; BE-NEXT: nop
-; BE-NEXT: ld r3, 184(r1) # 8-byte Folded Reload
-; BE-NEXT: fmr f27, f1
-; BE-NEXT: clrldi r3, r3, 48
+; BE-NEXT: std r3, 240(r1)
+; BE-NEXT: mr r3, r27
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: ld r3, 200(r1) # 8-byte Folded Reload
-; BE-NEXT: fmr f26, f1
-; BE-NEXT: clrldi r3, r3, 48
-; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: bl llrintf
; BE-NEXT: nop
-; BE-NEXT: ld r3, 216(r1) # 8-byte Folded Reload
-; BE-NEXT: fmr f25, f1
-; BE-NEXT: clrldi r3, r3, 48
+; BE-NEXT: std r3, 264(r1)
+; BE-NEXT: mr r3, r28
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: ld r3, 232(r1) # 8-byte Folded Reload
-; BE-NEXT: fmr f24, f1
-; BE-NEXT: clrldi r3, r3, 48
-; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: bl llrintf
; BE-NEXT: nop
-; BE-NEXT: ld r3, 248(r1) # 8-byte Folded Reload
-; BE-NEXT: fmr f23, f1
-; BE-NEXT: clrldi r3, r3, 48
+; BE-NEXT: std r3, 256(r1)
+; BE-NEXT: mr r3, r29
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: ld r3, 264(r1) # 8-byte Folded Reload
-; BE-NEXT: fmr f22, f1
-; BE-NEXT: clrldi r3, r3, 48
-; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: bl llrintf
; BE-NEXT: nop
-; BE-NEXT: ld r3, 280(r1) # 8-byte Folded Reload
-; BE-NEXT: fmr f21, f1
-; BE-NEXT: clrldi r3, r3, 48
+; BE-NEXT: std r3, 280(r1)
+; BE-NEXT: mr r3, r31
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: ld r3, 296(r1) # 8-byte Folded Reload
-; BE-NEXT: fmr f20, f1
-; BE-NEXT: clrldi r3, r3, 48
-; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: bl llrintf
; BE-NEXT: nop
-; BE-NEXT: ld r3, 304(r1) # 8-byte Folded Reload
-; BE-NEXT: fmr f19, f1
-; BE-NEXT: clrldi r3, r3, 48
+; BE-NEXT: std r3, 272(r1)
+; BE-NEXT: mr r3, r14
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
; BE-NEXT: bl llrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f19
-; BE-NEXT: std r3, 328(r1)
-; BE-NEXT: bl llrintf
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f20
-; BE-NEXT: std r3, 320(r1)
-; BE-NEXT: bl llrintf
+; BE-NEXT: std r3, 296(r1)
+; BE-NEXT: mr r3, r15
+; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f21
-; BE-NEXT: std r3, 344(r1)
; BE-NEXT: bl llrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f22
-; BE-NEXT: std r3, 336(r1)
-; BE-NEXT: bl llrintf
+; BE-NEXT: std r3, 288(r1)
+; BE-NEXT: ld r3, 112(r1) # 8-byte Folded Reload
+; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f23
-; BE-NEXT: std r3, 360(r1)
; BE-NEXT: bl llrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f24
-; BE-NEXT: std r3, 352(r1)
-; BE-NEXT: bl llrintf
+; BE-NEXT: std r3, 312(r1)
+; BE-NEXT: ld r3, 120(r1) # 8-byte Folded Reload
+; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f25
-; BE-NEXT: std r3, 376(r1)
; BE-NEXT: bl llrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f26
-; BE-NEXT: std r3, 368(r1)
-; BE-NEXT: bl llrintf
+; BE-NEXT: std r3, 304(r1)
+; BE-NEXT: ld r3, 128(r1) # 8-byte Folded Reload
+; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f27
-; BE-NEXT: std r3, 392(r1)
; BE-NEXT: bl llrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f28
-; BE-NEXT: std r3, 384(r1)
-; BE-NEXT: bl llrintf
+; BE-NEXT: std r3, 328(r1)
+; BE-NEXT: ld r3, 136(r1) # 8-byte Folded Reload
+; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f29
-; BE-NEXT: std r3, 408(r1)
; BE-NEXT: bl llrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f30
-; BE-NEXT: std r3, 400(r1)
-; BE-NEXT: bl llrintf
+; BE-NEXT: std r3, 320(r1)
+; BE-NEXT: ld r3, 144(r1) # 8-byte Folded Reload
+; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f31
-; BE-NEXT: std r3, 424(r1)
; BE-NEXT: bl llrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f14
-; BE-NEXT: std r3, 416(r1)
-; BE-NEXT: bl llrintf
+; BE-NEXT: std r3, 344(r1)
+; BE-NEXT: ld r3, 152(r1) # 8-byte Folded Reload
+; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f15
-; BE-NEXT: std r3, 440(r1)
; BE-NEXT: bl llrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f16
-; BE-NEXT: std r3, 432(r1)
-; BE-NEXT: bl llrintf
+; BE-NEXT: std r3, 336(r1)
+; BE-NEXT: ld r3, 160(r1) # 8-byte Folded Reload
+; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f17
-; BE-NEXT: std r3, 456(r1)
; BE-NEXT: bl llrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f18
-; BE-NEXT: std r3, 448(r1)
-; BE-NEXT: bl llrintf
+; BE-NEXT: std r3, 360(r1)
+; BE-NEXT: ld r3, 168(r1) # 8-byte Folded Reload
+; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: lfs f1, 132(r1) # 4-byte Folded Reload
-; BE-NEXT: std r3, 472(r1)
; BE-NEXT: bl llrintf
; BE-NEXT: nop
-; BE-NEXT: lfs f1, 148(r1) # 4-byte Folded Reload
-; BE-NEXT: std r3, 464(r1)
-; BE-NEXT: bl llrintf
+; BE-NEXT: std r3, 352(r1)
+; BE-NEXT: ld r3, 176(r1) # 8-byte Folded Reload
+; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: lfs f1, 164(r1) # 4-byte Folded Reload
-; BE-NEXT: std r3, 488(r1)
; BE-NEXT: bl llrintf
; BE-NEXT: nop
-; BE-NEXT: lfs f1, 180(r1) # 4-byte Folded Reload
-; BE-NEXT: std r3, 480(r1)
-; BE-NEXT: bl llrintf
+; BE-NEXT: std r3, 376(r1)
+; BE-NEXT: ld r3, 184(r1) # 8-byte Folded Reload
+; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: lfs f1, 196(r1) # 4-byte Folded Reload
-; BE-NEXT: std r3, 504(r1)
; BE-NEXT: bl llrintf
; BE-NEXT: nop
-; BE-NEXT: lfs f1, 212(r1) # 4-byte Folded Reload
-; BE-NEXT: std r3, 496(r1)
-; BE-NEXT: bl llrintf
+; BE-NEXT: std r3, 368(r1)
+; BE-NEXT: ld r3, 192(r1) # 8-byte Folded Reload
+; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: lfs f1, 228(r1) # 4-byte Folded Reload
-; BE-NEXT: std r3, 520(r1)
; BE-NEXT: bl llrintf
; BE-NEXT: nop
-; BE-NEXT: lfs f1, 244(r1) # 4-byte Folded Reload
-; BE-NEXT: std r3, 512(r1)
-; BE-NEXT: bl llrintf
+; BE-NEXT: std r3, 392(r1)
+; BE-NEXT: ld r3, 200(r1) # 8-byte Folded Reload
+; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: lfs f1, 260(r1) # 4-byte Folded Reload
-; BE-NEXT: std r3, 536(r1)
; BE-NEXT: bl llrintf
; BE-NEXT: nop
-; BE-NEXT: lfs f1, 276(r1) # 4-byte Folded Reload
-; BE-NEXT: std r3, 528(r1)
-; BE-NEXT: bl llrintf
+; BE-NEXT: std r3, 384(r1)
+; BE-NEXT: ld r3, 208(r1) # 8-byte Folded Reload
+; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: lfs f1, 292(r1) # 4-byte Folded Reload
-; BE-NEXT: std r3, 552(r1)
; BE-NEXT: bl llrintf
; BE-NEXT: nop
-; BE-NEXT: lfs f1, 312(r1) # 4-byte Folded Reload
-; BE-NEXT: std r3, 544(r1)
-; BE-NEXT: bl llrintf
+; BE-NEXT: std r3, 408(r1)
+; BE-NEXT: ld r3, 216(r1) # 8-byte Folded Reload
+; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: lfs f1, 316(r1) # 4-byte Folded Reload
-; BE-NEXT: std r3, 568(r1)
; BE-NEXT: bl llrintf
; BE-NEXT: nop
-; BE-NEXT: std r3, 560(r1)
-; BE-NEXT: addi r3, r1, 320
+; BE-NEXT: std r3, 400(r1)
+; BE-NEXT: addi r3, r1, 416
; BE-NEXT: lxvd2x vs0, 0, r3
-; BE-NEXT: addi r3, r1, 336
+; BE-NEXT: addi r3, r1, 432
; BE-NEXT: lxvd2x vs1, 0, r3
-; BE-NEXT: addi r3, r1, 352
+; BE-NEXT: addi r3, r1, 448
; BE-NEXT: lxvd2x vs2, 0, r3
-; BE-NEXT: addi r3, r1, 368
+; BE-NEXT: addi r3, r1, 464
; BE-NEXT: lxvd2x vs3, 0, r3
-; BE-NEXT: addi r3, r1, 384
+; BE-NEXT: addi r3, r1, 224
; BE-NEXT: lxvd2x vs4, 0, r3
-; BE-NEXT: addi r3, r1, 400
+; BE-NEXT: addi r3, r1, 240
; BE-NEXT: lxvd2x vs5, 0, r3
-; BE-NEXT: addi r3, r1, 416
+; BE-NEXT: addi r3, r1, 256
; BE-NEXT: lxvd2x vs6, 0, r3
-; BE-NEXT: addi r3, r1, 432
+; BE-NEXT: addi r3, r1, 272
; BE-NEXT: lxvd2x vs7, 0, r3
-; BE-NEXT: addi r3, r1, 448
+; BE-NEXT: addi r3, r1, 288
; BE-NEXT: lxvd2x vs8, 0, r3
-; BE-NEXT: addi r3, r1, 464
+; BE-NEXT: addi r3, r1, 304
; BE-NEXT: lxvd2x vs9, 0, r3
-; BE-NEXT: addi r3, r1, 480
+; BE-NEXT: addi r3, r1, 320
; BE-NEXT: lxvd2x vs10, 0, r3
-; BE-NEXT: addi r3, r1, 496
+; BE-NEXT: addi r3, r1, 336
; BE-NEXT: lxvd2x vs11, 0, r3
-; BE-NEXT: addi r3, r1, 512
+; BE-NEXT: addi r3, r1, 352
; BE-NEXT: lxvd2x vs12, 0, r3
-; BE-NEXT: addi r3, r1, 528
+; BE-NEXT: addi r3, r1, 368
; BE-NEXT: lxvd2x vs13, 0, r3
-; BE-NEXT: addi r3, r1, 544
+; BE-NEXT: addi r3, r1, 384
; BE-NEXT: lxvd2x v2, 0, r3
-; BE-NEXT: addi r3, r1, 560
+; BE-NEXT: addi r3, r1, 400
; BE-NEXT: lxvd2x v3, 0, r3
; BE-NEXT: li r3, 240
; BE-NEXT: stxvd2x v3, r30, r3
@@ -2179,43 +1527,25 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) nounwind {
; BE-NEXT: li r3, 16
; BE-NEXT: stxvd2x vs1, r30, r3
; BE-NEXT: stxvd2x vs0, 0, r30
-; BE-NEXT: lfd f31, 856(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f30, 848(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f29, 840(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f28, 832(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f27, 824(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f26, 816(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f25, 808(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f24, 800(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f23, 792(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f22, 784(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f21, 776(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f20, 768(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f19, 760(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f18, 752(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f17, 744(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f16, 736(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f15, 728(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f14, 720(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r31, 712(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r30, 704(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r29, 696(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r28, 688(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r27, 680(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r26, 672(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r25, 664(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r24, 656(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r23, 648(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r22, 640(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r21, 632(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r20, 624(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r19, 616(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r18, 608(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r17, 600(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r16, 592(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r15, 584(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r14, 576(r1) # 8-byte Folded Reload
-; BE-NEXT: addi r1, r1, 864
+; BE-NEXT: ld r31, 616(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r30, 608(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r29, 600(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r28, 592(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r27, 584(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r26, 576(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r25, 568(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r24, 560(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r23, 552(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r22, 544(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r21, 536(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r20, 528(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r19, 520(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r18, 512(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r17, 504(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r16, 496(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r15, 488(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r14, 480(r1) # 8-byte Folded Reload
+; BE-NEXT: addi r1, r1, 624
; BE-NEXT: ld r0, 16(r1)
; BE-NEXT: mtlr r0
; BE-NEXT: blr
@@ -2223,508 +1553,334 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) nounwind {
; CHECK-LABEL: llrint_v32i64_v32f16:
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
-; CHECK-NEXT: stdu r1, -688(r1)
-; CHECK-NEXT: li r4, 208
-; CHECK-NEXT: std r0, 704(r1)
-; CHECK-NEXT: std r14, 400(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r15, 408(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r16, 416(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r17, 424(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r18, 432(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r19, 440(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stxvd2x v20, r1, r4 # 16-byte Folded Spill
-; CHECK-NEXT: li r4, 224
-; CHECK-NEXT: std r20, 448(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r21, 456(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r22, 464(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r23, 472(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r24, 480(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r25, 488(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stxvd2x v21, r1, r4 # 16-byte Folded Spill
-; CHECK-NEXT: li r4, 240
-; CHECK-NEXT: std r26, 496(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r27, 504(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r28, 512(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r29, 520(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r30, 528(r1) # 8-byte Folded Spill
+; CHECK-NEXT: stdu r1, -576(r1)
+; CHECK-NEXT: std r0, 592(r1)
+; CHECK-NEXT: std r30, 560(r1) # 8-byte Folded Spill
; CHECK-NEXT: mr r30, r3
-; CHECK-NEXT: stxvd2x v22, r1, r4 # 16-byte Folded Spill
-; CHECK-NEXT: li r4, 256
-; CHECK-NEXT: std r31, 536(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stfd f14, 544(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stfd f15, 552(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stfd f16, 560(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stfd f17, 568(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stfd f18, 576(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stxvd2x v23, r1, r4 # 16-byte Folded Spill
-; CHECK-NEXT: li r4, 272
-; CHECK-NEXT: stfd f19, 584(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stfd f20, 592(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f20, f2
-; CHECK-NEXT: stfd f21, 600(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f21, f3
-; CHECK-NEXT: stfd f22, 608(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f22, f4
-; CHECK-NEXT: stxvd2x v24, r1, r4 # 16-byte Folded Spill
-; CHECK-NEXT: li r4, 288
-; CHECK-NEXT: stfd f23, 616(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f23, f5
-; CHECK-NEXT: stfd f24, 624(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f24, f6
-; CHECK-NEXT: stfd f25, 632(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f25, f7
-; CHECK-NEXT: stxvd2x v25, r1, r4 # 16-byte Folded Spill
-; CHECK-NEXT: li r4, 304
-; CHECK-NEXT: stfd f26, 640(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f26, f8
-; CHECK-NEXT: stfd f27, 648(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f27, f9
-; CHECK-NEXT: stfd f28, 656(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f28, f10
-; CHECK-NEXT: stxvd2x v26, r1, r4 # 16-byte Folded Spill
-; CHECK-NEXT: li r4, 320
-; CHECK-NEXT: stfd f29, 664(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f29, f11
-; CHECK-NEXT: stfd f30, 672(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f30, f12
-; CHECK-NEXT: stfd f31, 680(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f31, f13
-; CHECK-NEXT: stxvd2x v27, r1, r4 # 16-byte Folded Spill
-; CHECK-NEXT: li r4, 336
-; CHECK-NEXT: stxvd2x v28, r1, r4 # 16-byte Folded Spill
-; CHECK-NEXT: li r4, 352
-; CHECK-NEXT: stxvd2x v29, r1, r4 # 16-byte Folded Spill
-; CHECK-NEXT: li r4, 368
-; CHECK-NEXT: stxvd2x v30, r1, r4 # 16-byte Folded Spill
-; CHECK-NEXT: li r4, 384
-; CHECK-NEXT: stxvd2x v31, r1, r4 # 16-byte Folded Spill
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f20
+; CHECK-NEXT: lhz r3, 864(r1)
+; CHECK-NEXT: li r11, 240
+; CHECK-NEXT: std r14, 432(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r19, 472(r1) # 8-byte Folded Spill
+; CHECK-NEXT: lhz r14, 744(r1)
+; CHECK-NEXT: stxvd2x v20, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT: li r11, 256
+; CHECK-NEXT: std r22, 496(r1) # 8-byte Folded Spill
+; CHECK-NEXT: lhz r22, 680(r1)
+; CHECK-NEXT: std r3, 216(r1) # 8-byte Folded Spill
+; CHECK-NEXT: lhz r3, 856(r1)
+; CHECK-NEXT: lhz r19, 672(r1)
+; CHECK-NEXT: stxvd2x v21, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT: li r11, 272
+; CHECK-NEXT: std r23, 504(r1) # 8-byte Folded Spill
+; CHECK-NEXT: lhz r23, 688(r1)
+; CHECK-NEXT: stxvd2x v22, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT: std r3, 184(r1) # 8-byte Folded Spill
+; CHECK-NEXT: lhz r3, 848(r1)
+; CHECK-NEXT: li r11, 288
+; CHECK-NEXT: std r25, 520(r1) # 8-byte Folded Spill
+; CHECK-NEXT: lhz r25, 696(r1)
+; CHECK-NEXT: stxvd2x v23, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT: li r11, 304
+; CHECK-NEXT: std r26, 528(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r27, 536(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r28, 544(r1) # 8-byte Folded Spill
+; CHECK-NEXT: lhz r28, 720(r1)
+; CHECK-NEXT: lhz r27, 712(r1)
+; CHECK-NEXT: lhz r26, 704(r1)
+; CHECK-NEXT: stxvd2x v24, r1, r11 # 16-byte Folded Spill
; CHECK-NEXT: std r3, 176(r1) # 8-byte Folded Spill
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f21
-; CHECK-NEXT: std r3, 160(r1) # 8-byte Folded Spill
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f22
+; CHECK-NEXT: lhz r3, 840(r1)
+; CHECK-NEXT: li r11, 320
+; CHECK-NEXT: std r29, 552(r1) # 8-byte Folded Spill
+; CHECK-NEXT: lhz r29, 728(r1)
+; CHECK-NEXT: stxvd2x v25, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT: std r3, 152(r1) # 8-byte Folded Spill
+; CHECK-NEXT: lhz r3, 832(r1)
+; CHECK-NEXT: li r11, 336
+; CHECK-NEXT: std r31, 568(r1) # 8-byte Folded Spill
+; CHECK-NEXT: lhz r31, 736(r1)
+; CHECK-NEXT: stxvd2x v26, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT: li r11, 352
+; CHECK-NEXT: std r15, 440(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r16, 448(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r17, 456(r1) # 8-byte Folded Spill
+; CHECK-NEXT: mr r17, r6
+; CHECK-NEXT: mr r16, r5
+; CHECK-NEXT: stxvd2x v27, r1, r11 # 16-byte Folded Spill
; CHECK-NEXT: std r3, 144(r1) # 8-byte Folded Spill
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f23
-; CHECK-NEXT: std r3, 128(r1) # 8-byte Folded Spill
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f24
+; CHECK-NEXT: lhz r3, 824(r1)
+; CHECK-NEXT: li r11, 368
+; CHECK-NEXT: std r18, 464(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r20, 480(r1) # 8-byte Folded Spill
+; CHECK-NEXT: mr r20, r8
+; CHECK-NEXT: mr r18, r7
+; CHECK-NEXT: stxvd2x v28, r1, r11 # 16-byte Folded Spill
; CHECK-NEXT: std r3, 120(r1) # 8-byte Folded Spill
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f25
+; CHECK-NEXT: lhz r3, 816(r1)
+; CHECK-NEXT: li r11, 384
+; CHECK-NEXT: std r21, 488(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r24, 512(r1) # 8-byte Folded Spill
+; CHECK-NEXT: mr r24, r10
+; CHECK-NEXT: mr r21, r9
+; CHECK-NEXT: stxvd2x v29, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT: li r11, 400
; CHECK-NEXT: std r3, 112(r1) # 8-byte Folded Spill
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f26
+; CHECK-NEXT: lhz r3, 808(r1)
+; CHECK-NEXT: stxvd2x v30, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT: li r11, 416
; CHECK-NEXT: std r3, 104(r1) # 8-byte Folded Spill
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f27
+; CHECK-NEXT: lhz r3, 800(r1)
+; CHECK-NEXT: stxvd2x v31, r1, r11 # 16-byte Folded Spill
; CHECK-NEXT: std r3, 96(r1) # 8-byte Folded Spill
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f28
+; CHECK-NEXT: lhz r3, 792(r1)
; CHECK-NEXT: std r3, 88(r1) # 8-byte Folded Spill
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f29
+; CHECK-NEXT: lhz r3, 784(r1)
; CHECK-NEXT: std r3, 80(r1) # 8-byte Folded Spill
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f30
+; CHECK-NEXT: lhz r3, 776(r1)
; CHECK-NEXT: std r3, 72(r1) # 8-byte Folded Spill
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f31
+; CHECK-NEXT: lhz r3, 768(r1)
; CHECK-NEXT: std r3, 64(r1) # 8-byte Folded Spill
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: lfs f1, 832(r1)
+; CHECK-NEXT: lhz r3, 760(r1)
; CHECK-NEXT: std r3, 56(r1) # 8-byte Folded Spill
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: lfs f1, 840(r1)
+; CHECK-NEXT: lhz r3, 752(r1)
; CHECK-NEXT: std r3, 48(r1) # 8-byte Folded Spill
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: lfs f1, 848(r1)
-; CHECK-NEXT: mr r15, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: lfs f1, 856(r1)
-; CHECK-NEXT: mr r14, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: lfs f1, 864(r1)
-; CHECK-NEXT: mr r31, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: lfs f1, 872(r1)
-; CHECK-NEXT: mr r29, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: lfs f1, 880(r1)
-; CHECK-NEXT: mr r28, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: lfs f1, 888(r1)
-; CHECK-NEXT: mr r27, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: lfs f1, 896(r1)
-; CHECK-NEXT: mr r26, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: lfs f1, 904(r1)
-; CHECK-NEXT: mr r25, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: lfs f1, 912(r1)
-; CHECK-NEXT: mr r24, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: lfs f1, 920(r1)
-; CHECK-NEXT: mr r23, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: lfs f1, 928(r1)
-; CHECK-NEXT: mr r22, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: lfs f1, 936(r1)
-; CHECK-NEXT: mr r21, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: lfs f1, 944(r1)
-; CHECK-NEXT: mr r20, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: lfs f1, 952(r1)
-; CHECK-NEXT: mr r19, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: lfs f1, 960(r1)
-; CHECK-NEXT: mr r18, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: lfs f1, 968(r1)
-; CHECK-NEXT: mr r17, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: lfs f1, 976(r1)
-; CHECK-NEXT: mr r16, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r3, 48
+; CHECK-NEXT: clrldi r3, r4, 48
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: li r3, 204
-; CHECK-NEXT: stxsspx f1, r1, r3 # 4-byte Folded Spill
+; CHECK-NEXT: bl llrintf
+; CHECK-NEXT: nop
+; CHECK-NEXT: mr r15, r3
; CHECK-NEXT: clrldi r3, r16, 48
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: li r3, 200
-; CHECK-NEXT: stxsspx f1, r1, r3 # 4-byte Folded Spill
+; CHECK-NEXT: mtvsrd v31, r15
+; CHECK-NEXT: bl llrintf
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: li r3, 224
+; CHECK-NEXT: xxmrghd vs0, vs0, v31
+; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: clrldi r3, r17, 48
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
+; CHECK-NEXT: bl llrintf
+; CHECK-NEXT: nop
+; CHECK-NEXT: mr r17, r3
; CHECK-NEXT: clrldi r3, r18, 48
-; CHECK-NEXT: fmr f29, f1
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r19, 48
-; CHECK-NEXT: fmr f28, f1
-; CHECK-NEXT: bl __extendhfsf2
+; CHECK-NEXT: mtvsrd v31, r17
+; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: li r3, 192
+; CHECK-NEXT: xxmrghd vs0, vs0, v31
+; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
; CHECK-NEXT: clrldi r3, r20, 48
-; CHECK-NEXT: fmr f27, f1
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r21, 48
-; CHECK-NEXT: fmr f26, f1
-; CHECK-NEXT: bl __extendhfsf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r22, 48
-; CHECK-NEXT: fmr f25, f1
-; CHECK-NEXT: bl __extendhfsf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r23, 48
-; CHECK-NEXT: fmr f24, f1
-; CHECK-NEXT: bl __extendhfsf2
+; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r24, 48
-; CHECK-NEXT: fmr f23, f1
+; CHECK-NEXT: mr r20, r3
+; CHECK-NEXT: clrldi r3, r21, 48
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r25, 48
-; CHECK-NEXT: fmr f22, f1
-; CHECK-NEXT: bl __extendhfsf2
+; CHECK-NEXT: mtvsrd v31, r20
+; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r26, 48
-; CHECK-NEXT: fmr f21, f1
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: li r3, 160
+; CHECK-NEXT: xxmrghd vs0, vs0, v31
+; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: mr r3, r19
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r27, 48
-; CHECK-NEXT: fmr f20, f1
-; CHECK-NEXT: bl __extendhfsf2
+; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r28, 48
-; CHECK-NEXT: fmr f19, f1
+; CHECK-NEXT: mr r21, r3
+; CHECK-NEXT: clrldi r3, r24, 48
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r29, 48
-; CHECK-NEXT: fmr f18, f1
-; CHECK-NEXT: bl __extendhfsf2
+; CHECK-NEXT: mtvsrd v31, r21
+; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r31, 48
-; CHECK-NEXT: fmr f17, f1
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: li r3, 128
+; CHECK-NEXT: xxmrghd vs0, v31, vs0
+; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: mr r3, r22
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r14, 48
-; CHECK-NEXT: fmr f16, f1
-; CHECK-NEXT: bl __extendhfsf2
+; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r15, 48
-; CHECK-NEXT: fmr f15, f1
+; CHECK-NEXT: mr r24, r3
+; CHECK-NEXT: mr r3, r23
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: ld r3, 48(r1) # 8-byte Folded Reload
-; CHECK-NEXT: fmr f14, f1
-; CHECK-NEXT: clrldi r3, r3, 48
-; CHECK-NEXT: bl __extendhfsf2
+; CHECK-NEXT: mtvsrd v31, r24
+; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: ld r3, 56(r1) # 8-byte Folded Reload
-; CHECK-NEXT: fmr f30, f1
-; CHECK-NEXT: clrldi r3, r3, 48
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: mr r3, r25
+; CHECK-NEXT: xxmrghd v27, vs0, v31
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: ld r3, 64(r1) # 8-byte Folded Reload
-; CHECK-NEXT: xxlor v30, f1, f1
-; CHECK-NEXT: clrldi r3, r3, 48
-; CHECK-NEXT: bl __extendhfsf2
+; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: ld r3, 72(r1) # 8-byte Folded Reload
-; CHECK-NEXT: xxlor v29, f1, f1
-; CHECK-NEXT: clrldi r3, r3, 48
+; CHECK-NEXT: mr r25, r3
+; CHECK-NEXT: mr r3, r26
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: ld r3, 80(r1) # 8-byte Folded Reload
-; CHECK-NEXT: xxlor v28, f1, f1
-; CHECK-NEXT: clrldi r3, r3, 48
-; CHECK-NEXT: bl __extendhfsf2
+; CHECK-NEXT: mtvsrd v31, r25
+; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: ld r3, 88(r1) # 8-byte Folded Reload
-; CHECK-NEXT: xxlor v27, f1, f1
-; CHECK-NEXT: clrldi r3, r3, 48
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: mr r3, r27
+; CHECK-NEXT: xxmrghd v26, vs0, v31
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: ld r3, 96(r1) # 8-byte Folded Reload
-; CHECK-NEXT: xxlor v26, f1, f1
-; CHECK-NEXT: clrldi r3, r3, 48
-; CHECK-NEXT: bl __extendhfsf2
+; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: ld r3, 104(r1) # 8-byte Folded Reload
-; CHECK-NEXT: xxlor v25, f1, f1
-; CHECK-NEXT: clrldi r3, r3, 48
+; CHECK-NEXT: mr r27, r3
+; CHECK-NEXT: mr r3, r28
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: ld r3, 112(r1) # 8-byte Folded Reload
-; CHECK-NEXT: xxlor v24, f1, f1
-; CHECK-NEXT: clrldi r3, r3, 48
-; CHECK-NEXT: bl __extendhfsf2
+; CHECK-NEXT: mtvsrd v31, r27
+; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: ld r3, 120(r1) # 8-byte Folded Reload
-; CHECK-NEXT: xxlor v23, f1, f1
-; CHECK-NEXT: clrldi r3, r3, 48
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: mr r3, r29
+; CHECK-NEXT: xxmrghd v25, vs0, v31
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: ld r3, 128(r1) # 8-byte Folded Reload
-; CHECK-NEXT: xxlor v22, f1, f1
-; CHECK-NEXT: clrldi r3, r3, 48
-; CHECK-NEXT: bl __extendhfsf2
+; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: ld r3, 144(r1) # 8-byte Folded Reload
-; CHECK-NEXT: xxlor v21, f1, f1
-; CHECK-NEXT: clrldi r3, r3, 48
+; CHECK-NEXT: mr r29, r3
+; CHECK-NEXT: mr r3, r31
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: ld r3, 160(r1) # 8-byte Folded Reload
-; CHECK-NEXT: xxlor v20, f1, f1
-; CHECK-NEXT: clrldi r3, r3, 48
-; CHECK-NEXT: bl __extendhfsf2
+; CHECK-NEXT: mtvsrd v31, r29
+; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: ld r3, 176(r1) # 8-byte Folded Reload
-; CHECK-NEXT: fmr f31, f1
-; CHECK-NEXT: clrldi r3, r3, 48
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: mr r3, r14
+; CHECK-NEXT: xxmrghd v24, vs0, v31
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f31
-; CHECK-NEXT: mtvsrd v31, r3
-; CHECK-NEXT: bl llrintf
-; CHECK-NEXT: nop
-; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: li r3, 176
-; CHECK-NEXT: xxlor f1, v20, v20
-; CHECK-NEXT: xxmrghd vs0, vs0, v31
-; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT: bl llrintf
+; CHECK-NEXT: mr r29, r3
+; CHECK-NEXT: ld r3, 48(r1) # 8-byte Folded Reload
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: xxlor f1, v21, v21
-; CHECK-NEXT: mtvsrd v31, r3
+; CHECK-NEXT: mtvsrd v31, r29
; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: li r3, 160
-; CHECK-NEXT: xxlor f1, v22, v22
-; CHECK-NEXT: xxmrghd vs0, vs0, v31
-; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT: bl llrintf
+; CHECK-NEXT: ld r3, 56(r1) # 8-byte Folded Reload
+; CHECK-NEXT: xxmrghd v23, vs0, v31
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: xxlor f1, v23, v23
-; CHECK-NEXT: mtvsrd v31, r3
; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: li r3, 144
-; CHECK-NEXT: xxlor f1, v24, v24
-; CHECK-NEXT: xxmrghd vs0, vs0, v31
-; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT: bl llrintf
+; CHECK-NEXT: mr r29, r3
+; CHECK-NEXT: ld r3, 64(r1) # 8-byte Folded Reload
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: xxlor f1, v25, v25
-; CHECK-NEXT: mtvsrd v31, r3
+; CHECK-NEXT: mtvsrd v31, r29
; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: li r3, 128
-; CHECK-NEXT: xxlor f1, v26, v26
-; CHECK-NEXT: xxmrghd vs0, vs0, v31
-; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT: bl llrintf
+; CHECK-NEXT: ld r3, 72(r1) # 8-byte Folded Reload
+; CHECK-NEXT: xxmrghd v22, vs0, v31
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: xxlor f1, v27, v27
-; CHECK-NEXT: mtvsrd v31, r3
; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: xxlor f1, v28, v28
-; CHECK-NEXT: xxmrghd v27, vs0, v31
-; CHECK-NEXT: bl llrintf
+; CHECK-NEXT: mr r29, r3
+; CHECK-NEXT: ld r3, 80(r1) # 8-byte Folded Reload
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: xxlor f1, v29, v29
-; CHECK-NEXT: mtvsrd v31, r3
+; CHECK-NEXT: mtvsrd v31, r29
; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: xxlor f1, v30, v30
-; CHECK-NEXT: xxmrghd v29, vs0, v31
-; CHECK-NEXT: bl llrintf
+; CHECK-NEXT: ld r3, 88(r1) # 8-byte Folded Reload
+; CHECK-NEXT: xxmrghd v21, vs0, v31
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f30
-; CHECK-NEXT: mtvsrd v31, r3
; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f14
-; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: xxmrghd v31, vs0, v31
-; CHECK-NEXT: bl llrintf
+; CHECK-NEXT: mr r29, r3
+; CHECK-NEXT: ld r3, 96(r1) # 8-byte Folded Reload
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f15
-; CHECK-NEXT: mtvsrd v30, r3
+; CHECK-NEXT: mtvsrd v31, r29
; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f16
; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: xxmrghd v30, vs0, v30
-; CHECK-NEXT: bl llrintf
+; CHECK-NEXT: ld r3, 104(r1) # 8-byte Folded Reload
+; CHECK-NEXT: xxmrghd v20, vs0, v31
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f17
-; CHECK-NEXT: mtvsrd v28, r3
; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f18
-; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: xxmrghd v28, vs0, v28
-; CHECK-NEXT: bl llrintf
+; CHECK-NEXT: mr r29, r3
+; CHECK-NEXT: ld r3, 112(r1) # 8-byte Folded Reload
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f19
-; CHECK-NEXT: mtvsrd v26, r3
+; CHECK-NEXT: mtvsrd v31, r29
; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f20
; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: xxmrghd v26, vs0, v26
-; CHECK-NEXT: bl llrintf
+; CHECK-NEXT: ld r3, 120(r1) # 8-byte Folded Reload
+; CHECK-NEXT: xxmrghd v31, vs0, v31
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f21
-; CHECK-NEXT: mtvsrd v24, r3
; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f22
-; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: xxmrghd v24, vs0, v24
-; CHECK-NEXT: bl llrintf
+; CHECK-NEXT: mr r29, r3
+; CHECK-NEXT: ld r3, 144(r1) # 8-byte Folded Reload
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f23
-; CHECK-NEXT: mtvsrd v22, r3
+; CHECK-NEXT: mtvsrd v30, r29
; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f24
; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: xxmrghd v22, vs0, v22
-; CHECK-NEXT: bl llrintf
+; CHECK-NEXT: ld r3, 152(r1) # 8-byte Folded Reload
+; CHECK-NEXT: xxmrghd v30, vs0, v30
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f25
-; CHECK-NEXT: mtvsrd v20, r3
; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f26
-; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: xxmrghd v20, vs0, v20
-; CHECK-NEXT: bl llrintf
+; CHECK-NEXT: mr r29, r3
+; CHECK-NEXT: ld r3, 176(r1) # 8-byte Folded Reload
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f27
-; CHECK-NEXT: mtvsrd v21, r3
+; CHECK-NEXT: mtvsrd v29, r29
; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f28
; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: xxmrghd v21, vs0, v21
-; CHECK-NEXT: bl llrintf
+; CHECK-NEXT: ld r3, 184(r1) # 8-byte Folded Reload
+; CHECK-NEXT: xxmrghd v29, vs0, v29
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f29
-; CHECK-NEXT: mtvsrd v23, r3
; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: li r3, 200
-; CHECK-NEXT: lxsspx f1, r1, r3 # 4-byte Folded Reload
-; CHECK-NEXT: xxmrghd v23, vs0, v23
-; CHECK-NEXT: bl llrintf
+; CHECK-NEXT: mr r29, r3
+; CHECK-NEXT: ld r3, 216(r1) # 8-byte Folded Reload
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: mtvsrd v25, r3
-; CHECK-NEXT: li r3, 204
-; CHECK-NEXT: lxsspx f1, r1, r3 # 4-byte Folded Reload
+; CHECK-NEXT: mtvsrd v28, r29
; CHECK-NEXT: bl llrintf
; CHECK-NEXT: nop
; CHECK-NEXT: mtfprd f0, r3
; CHECK-NEXT: li r3, 240
-; CHECK-NEXT: xxswapd vs1, v23
+; CHECK-NEXT: xxswapd vs1, v29
; CHECK-NEXT: li r4, 128
-; CHECK-NEXT: xxswapd vs2, v21
-; CHECK-NEXT: xxswapd vs3, v31
-; CHECK-NEXT: xxmrghd v2, vs0, v25
+; CHECK-NEXT: xxswapd vs2, v30
+; CHECK-NEXT: xxswapd vs3, v25
+; CHECK-NEXT: xxmrghd v2, vs0, v28
; CHECK-NEXT: xxswapd vs0, v2
; CHECK-NEXT: stxvd2x vs0, r30, r3
; CHECK-NEXT: li r3, 224
@@ -2732,35 +1888,35 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) nounwind {
; CHECK-NEXT: li r3, 208
; CHECK-NEXT: stxvd2x vs2, r30, r3
; CHECK-NEXT: li r3, 192
-; CHECK-NEXT: xxswapd vs0, v20
+; CHECK-NEXT: xxswapd vs0, v31
; CHECK-NEXT: stxvd2x vs0, r30, r3
; CHECK-NEXT: li r3, 176
-; CHECK-NEXT: xxswapd vs1, v22
+; CHECK-NEXT: xxswapd vs1, v20
; CHECK-NEXT: stxvd2x vs1, r30, r3
; CHECK-NEXT: li r3, 160
-; CHECK-NEXT: xxswapd vs2, v28
-; CHECK-NEXT: xxswapd vs0, v24
+; CHECK-NEXT: xxswapd vs2, v23
+; CHECK-NEXT: xxswapd vs0, v21
; CHECK-NEXT: stxvd2x vs0, r30, r3
; CHECK-NEXT: li r3, 144
-; CHECK-NEXT: xxswapd vs1, v26
+; CHECK-NEXT: xxswapd vs1, v22
; CHECK-NEXT: stxvd2x vs1, r30, r3
; CHECK-NEXT: li r3, 128
; CHECK-NEXT: stxvd2x vs2, r30, r3
; CHECK-NEXT: li r3, 112
-; CHECK-NEXT: xxswapd vs0, v30
+; CHECK-NEXT: xxswapd vs0, v24
; CHECK-NEXT: stxvd2x vs0, r30, r3
; CHECK-NEXT: li r3, 96
; CHECK-NEXT: stxvd2x vs3, r30, r3
; CHECK-NEXT: li r3, 80
; CHECK-NEXT: lxvd2x vs2, r1, r4 # 16-byte Folded Reload
-; CHECK-NEXT: li r4, 144
-; CHECK-NEXT: xxswapd vs1, v29
+; CHECK-NEXT: li r4, 160
+; CHECK-NEXT: xxswapd vs1, v26
; CHECK-NEXT: stxvd2x vs1, r30, r3
; CHECK-NEXT: li r3, 64
; CHECK-NEXT: lxvd2x vs1, r1, r4 # 16-byte Folded Reload
-; CHECK-NEXT: li r4, 160
+; CHECK-NEXT: li r4, 192
; CHECK-NEXT: lxvd2x vs3, r1, r4 # 16-byte Folded Reload
-; CHECK-NEXT: li r4, 176
+; CHECK-NEXT: li r4, 224
; CHECK-NEXT: lxvd2x vs4, r1, r4 # 16-byte Folded Reload
; CHECK-NEXT: xxswapd vs0, v27
; CHECK-NEXT: stxvd2x vs0, r30, r3
@@ -2773,69 +1929,51 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) nounwind {
; CHECK-NEXT: li r3, 16
; CHECK-NEXT: xxswapd vs3, vs3
; CHECK-NEXT: stxvd2x vs3, r30, r3
-; CHECK-NEXT: li r3, 384
+; CHECK-NEXT: li r3, 416
; CHECK-NEXT: xxswapd vs4, vs4
; CHECK-NEXT: stxvd2x vs4, 0, r30
; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT: li r3, 368
-; CHECK-NEXT: lfd f31, 680(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f30, 672(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f29, 664(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f28, 656(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f27, 648(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f26, 640(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f25, 632(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f24, 624(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f23, 616(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f22, 608(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f21, 600(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f20, 592(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f19, 584(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f18, 576(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f17, 568(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f16, 560(r1) # 8-byte Folded Reload
+; CHECK-NEXT: li r3, 400
+; CHECK-NEXT: ld r31, 568(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r30, 560(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r29, 552(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r28, 544(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r27, 536(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r26, 528(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r25, 520(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r24, 512(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r23, 504(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r22, 496(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r21, 488(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r20, 480(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r19, 472(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r18, 464(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r17, 456(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r16, 448(r1) # 8-byte Folded Reload
; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT: li r3, 352
-; CHECK-NEXT: lfd f15, 552(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f14, 544(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r31, 536(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r30, 528(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r29, 520(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r28, 512(r1) # 8-byte Folded Reload
+; CHECK-NEXT: li r3, 384
+; CHECK-NEXT: ld r15, 440(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r14, 432(r1) # 8-byte Folded Reload
; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT: li r3, 336
-; CHECK-NEXT: ld r27, 504(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r26, 496(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r25, 488(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r24, 480(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r23, 472(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r22, 464(r1) # 8-byte Folded Reload
+; CHECK-NEXT: li r3, 368
; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT: li r3, 320
-; CHECK-NEXT: ld r21, 456(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r20, 448(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r19, 440(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r18, 432(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r17, 424(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r16, 416(r1) # 8-byte Folded Reload
+; CHECK-NEXT: li r3, 352
; CHECK-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT: li r3, 304
-; CHECK-NEXT: ld r15, 408(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r14, 400(r1) # 8-byte Folded Reload
+; CHECK-NEXT: li r3, 336
; CHECK-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT: li r3, 288
+; CHECK-NEXT: li r3, 320
; CHECK-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT: li r3, 272
+; CHECK-NEXT: li r3, 304
; CHECK-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT: li r3, 256
+; CHECK-NEXT: li r3, 288
; CHECK-NEXT: lxvd2x v23, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT: li r3, 240
+; CHECK-NEXT: li r3, 272
; CHECK-NEXT: lxvd2x v22, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT: li r3, 224
+; CHECK-NEXT: li r3, 256
; CHECK-NEXT: lxvd2x v21, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT: li r3, 208
+; CHECK-NEXT: li r3, 240
; CHECK-NEXT: lxvd2x v20, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT: addi r1, r1, 688
+; CHECK-NEXT: addi r1, r1, 576
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
@@ -2843,516 +1981,410 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) nounwind {
; FAST-LABEL: llrint_v32i64_v32f16:
; FAST: # %bb.0:
; FAST-NEXT: mflr r0
-; FAST-NEXT: stdu r1, -480(r1)
-; FAST-NEXT: li r4, 128
-; FAST-NEXT: std r0, 496(r1)
-; FAST-NEXT: std r30, 320(r1) # 8-byte Folded Spill
+; FAST-NEXT: stdu r1, -560(r1)
+; FAST-NEXT: std r0, 576(r1)
+; FAST-NEXT: std r30, 544(r1) # 8-byte Folded Spill
; FAST-NEXT: mr r30, r3
-; FAST-NEXT: stfd f14, 336(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f15, 344(r1) # 8-byte Folded Spill
-; FAST-NEXT: fmr f14, f5
-; FAST-NEXT: stfd f16, 352(r1) # 8-byte Folded Spill
-; FAST-NEXT: stxvd2x v20, r1, r4 # 16-byte Folded Spill
-; FAST-NEXT: li r4, 144
-; FAST-NEXT: fmr f16, f4
-; FAST-NEXT: stfd f17, 360(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f18, 368(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f19, 376(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f20, 384(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f21, 392(r1) # 8-byte Folded Spill
-; FAST-NEXT: stxvd2x v21, r1, r4 # 16-byte Folded Spill
-; FAST-NEXT: li r4, 160
-; FAST-NEXT: stfd f22, 400(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f23, 408(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f24, 416(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f25, 424(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f26, 432(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f27, 440(r1) # 8-byte Folded Spill
-; FAST-NEXT: stxvd2x v22, r1, r4 # 16-byte Folded Spill
-; FAST-NEXT: li r4, 176
-; FAST-NEXT: xxlor v22, f3, f3
-; FAST-NEXT: stfd f28, 448(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f29, 456(r1) # 8-byte Folded Spill
-; FAST-NEXT: fmr f29, f9
-; FAST-NEXT: stfd f30, 464(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f31, 472(r1) # 8-byte Folded Spill
-; FAST-NEXT: stxvd2x v23, r1, r4 # 16-byte Folded Spill
-; FAST-NEXT: li r4, 192
-; FAST-NEXT: xxlor v23, f2, f2
-; FAST-NEXT: stxvd2x v24, r1, r4 # 16-byte Folded Spill
-; FAST-NEXT: li r4, 208
-; FAST-NEXT: stxvd2x v25, r1, r4 # 16-byte Folded Spill
-; FAST-NEXT: li r4, 224
-; FAST-NEXT: xxlor v25, f13, f13
-; FAST-NEXT: stxvd2x v26, r1, r4 # 16-byte Folded Spill
-; FAST-NEXT: li r4, 240
-; FAST-NEXT: xxlor v26, f12, f12
-; FAST-NEXT: stxvd2x v27, r1, r4 # 16-byte Folded Spill
-; FAST-NEXT: li r4, 256
-; FAST-NEXT: xxlor v27, f11, f11
-; FAST-NEXT: stxvd2x v28, r1, r4 # 16-byte Folded Spill
-; FAST-NEXT: li r4, 272
-; FAST-NEXT: xxlor v28, f10, f10
-; FAST-NEXT: stxvd2x v29, r1, r4 # 16-byte Folded Spill
-; FAST-NEXT: li r4, 288
-; FAST-NEXT: xxlor v29, f8, f8
-; FAST-NEXT: stxvd2x v30, r1, r4 # 16-byte Folded Spill
-; FAST-NEXT: li r4, 304
-; FAST-NEXT: xxlor v30, f7, f7
-; FAST-NEXT: stxvd2x v31, r1, r4 # 16-byte Folded Spill
-; FAST-NEXT: li r4, 44
-; FAST-NEXT: xxlor v31, f6, f6
-; FAST-NEXT: stxsspx f1, r1, r4 # 4-byte Folded Spill
-; FAST-NEXT: lfs f1, 768(r1)
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: lhz r3, 848(r1)
+; FAST-NEXT: li r11, 224
+; FAST-NEXT: std r14, 416(r1) # 8-byte Folded Spill
+; FAST-NEXT: std r15, 424(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r15, 736(r1)
+; FAST-NEXT: stxvd2x v20, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT: li r11, 240
+; FAST-NEXT: std r19, 456(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r14, 728(r1)
+; FAST-NEXT: std r3, 184(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r3, 840(r1)
+; FAST-NEXT: lhz r19, 656(r1)
+; FAST-NEXT: stxvd2x v21, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT: li r11, 256
+; FAST-NEXT: std r21, 472(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r21, 664(r1)
+; FAST-NEXT: stxvd2x v22, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT: std r3, 176(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r3, 832(r1)
+; FAST-NEXT: li r11, 272
+; FAST-NEXT: std r23, 488(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r23, 672(r1)
+; FAST-NEXT: stxvd2x v23, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT: li r11, 288
+; FAST-NEXT: std r24, 496(r1) # 8-byte Folded Spill
+; FAST-NEXT: std r26, 512(r1) # 8-byte Folded Spill
+; FAST-NEXT: std r27, 520(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r27, 696(r1)
+; FAST-NEXT: lhz r26, 688(r1)
+; FAST-NEXT: lhz r24, 680(r1)
+; FAST-NEXT: stxvd2x v24, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT: std r3, 152(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r3, 824(r1)
+; FAST-NEXT: li r11, 304
+; FAST-NEXT: std r28, 528(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r28, 704(r1)
+; FAST-NEXT: stxvd2x v25, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT: std r3, 144(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r3, 816(r1)
+; FAST-NEXT: li r11, 320
+; FAST-NEXT: std r29, 536(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r29, 712(r1)
+; FAST-NEXT: stxvd2x v26, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT: li r11, 336
+; FAST-NEXT: std r31, 552(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r31, 720(r1)
+; FAST-NEXT: std r16, 432(r1) # 8-byte Folded Spill
+; FAST-NEXT: std r17, 440(r1) # 8-byte Folded Spill
+; FAST-NEXT: mr r17, r6
+; FAST-NEXT: mr r16, r5
+; FAST-NEXT: stxvd2x v27, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT: std r3, 136(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r3, 808(r1)
+; FAST-NEXT: li r11, 352
+; FAST-NEXT: std r18, 448(r1) # 8-byte Folded Spill
+; FAST-NEXT: std r20, 464(r1) # 8-byte Folded Spill
+; FAST-NEXT: mr r20, r8
+; FAST-NEXT: mr r18, r7
+; FAST-NEXT: stxvd2x v28, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT: std r3, 104(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r3, 800(r1)
+; FAST-NEXT: li r11, 368
+; FAST-NEXT: std r22, 480(r1) # 8-byte Folded Spill
+; FAST-NEXT: std r25, 504(r1) # 8-byte Folded Spill
+; FAST-NEXT: mr r25, r10
+; FAST-NEXT: mr r22, r9
+; FAST-NEXT: stxvd2x v29, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT: li r11, 384
+; FAST-NEXT: std r3, 96(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r3, 792(r1)
+; FAST-NEXT: stxvd2x v30, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT: li r11, 400
+; FAST-NEXT: std r3, 88(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r3, 784(r1)
+; FAST-NEXT: stxvd2x v31, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT: std r3, 80(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r3, 776(r1)
+; FAST-NEXT: std r3, 72(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r3, 768(r1)
+; FAST-NEXT: std r3, 64(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r3, 760(r1)
+; FAST-NEXT: std r3, 56(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r3, 752(r1)
+; FAST-NEXT: std r3, 48(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r3, 744(r1)
+; FAST-NEXT: std r3, 40(r1) # 8-byte Folded Spill
+; FAST-NEXT: clrldi r3, r4, 48
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: li r3, 120
-; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill
-; FAST-NEXT: lfs f1, 760(r1)
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v31, r3
+; FAST-NEXT: clrldi r3, r16, 48
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: li r3, 112
-; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill
-; FAST-NEXT: lfs f1, 752(r1)
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: li r3, 208
+; FAST-NEXT: xxmrghd vs0, vs0, v31
+; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: clrldi r3, r17, 48
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: li r3, 104
-; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill
-; FAST-NEXT: lfs f1, 744(r1)
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v31, r3
+; FAST-NEXT: clrldi r3, r18, 48
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: li r3, 96
-; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill
-; FAST-NEXT: lfs f1, 736(r1)
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: li r3, 192
+; FAST-NEXT: xxmrghd vs0, vs0, v31
+; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: clrldi r3, r20, 48
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: li r3, 88
-; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill
-; FAST-NEXT: lfs f1, 728(r1)
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v31, r3
+; FAST-NEXT: clrldi r3, r22, 48
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: li r3, 80
-; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill
-; FAST-NEXT: lfs f1, 720(r1)
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: li r3, 160
+; FAST-NEXT: xxmrghd vs0, vs0, v31
+; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: mr r3, r19
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: li r3, 72
-; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill
-; FAST-NEXT: lfs f1, 712(r1)
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v31, r3
+; FAST-NEXT: clrldi r3, r25, 48
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: li r3, 64
-; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill
-; FAST-NEXT: lfs f1, 704(r1)
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: li r3, 112
+; FAST-NEXT: xxmrghd vs0, v31, vs0
+; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: mr r3, r21
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: li r3, 56
-; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill
-; FAST-NEXT: lfs f1, 696(r1)
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v31, r3
+; FAST-NEXT: mr r3, r23
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: li r3, 48
-; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill
-; FAST-NEXT: lfs f1, 688(r1)
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: mr r3, r24
+; FAST-NEXT: xxmrghd v27, vs0, v31
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: xxlor v21, f1, f1
-; FAST-NEXT: lfs f1, 680(r1)
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v31, r3
+; FAST-NEXT: mr r3, r26
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: xxlor v20, f1, f1
-; FAST-NEXT: lfs f1, 672(r1)
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: mr r3, r27
+; FAST-NEXT: xxmrghd v26, vs0, v31
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: xxlor v24, f1, f1
-; FAST-NEXT: lfs f1, 664(r1)
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v31, r3
+; FAST-NEXT: mr r3, r28
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f31, f1
-; FAST-NEXT: lfs f1, 656(r1)
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: mr r3, r29
+; FAST-NEXT: xxmrghd v25, vs0, v31
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f30, f1
-; FAST-NEXT: lfs f1, 648(r1)
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v31, r3
+; FAST-NEXT: mr r3, r31
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f28, f1
-; FAST-NEXT: lfs f1, 640(r1)
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: mr r3, r14
+; FAST-NEXT: xxmrghd v24, vs0, v31
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f27, f1
-; FAST-NEXT: lfs f1, 632(r1)
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v31, r3
+; FAST-NEXT: mr r3, r15
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f26, f1
-; FAST-NEXT: lfs f1, 624(r1)
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: ld r3, 40(r1) # 8-byte Folded Reload
+; FAST-NEXT: xxmrghd v23, vs0, v31
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f25, f1
-; FAST-NEXT: xxlor f1, v25, v25
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v31, r3
+; FAST-NEXT: ld r3, 48(r1) # 8-byte Folded Reload
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f24, f1
-; FAST-NEXT: xxlor f1, v26, v26
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: ld r3, 56(r1) # 8-byte Folded Reload
+; FAST-NEXT: xxmrghd v22, vs0, v31
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f23, f1
-; FAST-NEXT: xxlor f1, v27, v27
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v31, r3
+; FAST-NEXT: ld r3, 64(r1) # 8-byte Folded Reload
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f22, f1
-; FAST-NEXT: xxlor f1, v28, v28
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: ld r3, 72(r1) # 8-byte Folded Reload
+; FAST-NEXT: xxmrghd v21, vs0, v31
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f21, f1
-; FAST-NEXT: fmr f1, f29
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v31, r3
+; FAST-NEXT: ld r3, 80(r1) # 8-byte Folded Reload
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f20, f1
-; FAST-NEXT: xxlor f1, v29, v29
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: ld r3, 88(r1) # 8-byte Folded Reload
+; FAST-NEXT: xxmrghd v20, vs0, v31
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f19, f1
-; FAST-NEXT: xxlor f1, v30, v30
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v31, r3
+; FAST-NEXT: ld r3, 96(r1) # 8-byte Folded Reload
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f18, f1
-; FAST-NEXT: xxlor f1, v31, v31
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: ld r3, 104(r1) # 8-byte Folded Reload
+; FAST-NEXT: xxmrghd v31, vs0, v31
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f29, f1
-; FAST-NEXT: fmr f1, f14
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v30, r3
+; FAST-NEXT: ld r3, 136(r1) # 8-byte Folded Reload
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f14, f1
-; FAST-NEXT: fmr f1, f16
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: ld r3, 144(r1) # 8-byte Folded Reload
+; FAST-NEXT: xxmrghd v30, vs0, v30
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f16, f1
-; FAST-NEXT: xxlor f1, v22, v22
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v29, r3
+; FAST-NEXT: ld r3, 152(r1) # 8-byte Folded Reload
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f17, f1
-; FAST-NEXT: xxlor f1, v23, v23
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: ld r3, 176(r1) # 8-byte Folded Reload
+; FAST-NEXT: xxmrghd v29, vs0, v29
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: li r3, 44
-; FAST-NEXT: fmr f15, f1
-; FAST-NEXT: lxsspx f1, r1, r3 # 4-byte Folded Reload
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v28, r3
+; FAST-NEXT: ld r3, 184(r1) # 8-byte Folded Reload
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fctid f3, f15
-; FAST-NEXT: fctid f4, f17
-; FAST-NEXT: mffprd r3, f3
-; FAST-NEXT: fctid f5, f16
-; FAST-NEXT: fctid f6, f14
-; FAST-NEXT: fctid f7, f18
-; FAST-NEXT: fctid f8, f19
-; FAST-NEXT: fctid f13, f1
-; FAST-NEXT: fctid f9, f20
-; FAST-NEXT: fctid f10, f22
-; FAST-NEXT: fctid f11, f24
-; FAST-NEXT: fctid f12, f25
-; FAST-NEXT: fctid f2, f23
-; FAST-NEXT: fctid f0, f21
-; FAST-NEXT: mtvsrd v2, r3
-; FAST-NEXT: mffprd r3, f4
-; FAST-NEXT: mtvsrd v3, r3
-; FAST-NEXT: mffprd r3, f5
-; FAST-NEXT: mtfprd f5, r3
-; FAST-NEXT: mffprd r3, f6
-; FAST-NEXT: mtfprd f1, r3
-; FAST-NEXT: mffprd r3, f7
-; FAST-NEXT: mtfprd f6, r3
-; FAST-NEXT: mffprd r3, f8
-; FAST-NEXT: mtfprd f7, r3
-; FAST-NEXT: mffprd r3, f9
-; FAST-NEXT: mtfprd f3, r3
-; FAST-NEXT: mffprd r3, f10
-; FAST-NEXT: mtfprd f4, r3
-; FAST-NEXT: mffprd r3, f11
-; FAST-NEXT: fctid f11, f31
-; FAST-NEXT: lfd f31, 56(r1) # 8-byte Folded Reload
-; FAST-NEXT: mtfprd f8, r3
-; FAST-NEXT: mffprd r3, f12
-; FAST-NEXT: xxlor f12, v24, v24
-; FAST-NEXT: fctid f31, f31
-; FAST-NEXT: fctid f12, f12
-; FAST-NEXT: mtfprd f9, r3
-; FAST-NEXT: mffprd r3, f13
-; FAST-NEXT: lfd f13, 48(r1) # 8-byte Folded Reload
-; FAST-NEXT: mtfprd f10, r3
-; FAST-NEXT: fctid f13, f13
-; FAST-NEXT: xxmrghd v3, vs5, v3
-; FAST-NEXT: fctid f5, f26
-; FAST-NEXT: mffprd r3, f5
-; FAST-NEXT: mtfprd f5, r3
-; FAST-NEXT: xxmrghd v4, vs7, vs6
-; FAST-NEXT: fctid f6, f27
-; FAST-NEXT: fctid f7, f28
-; FAST-NEXT: mffprd r3, f6
-; FAST-NEXT: lfd f28, 96(r1) # 8-byte Folded Reload
-; FAST-NEXT: fctid f28, f28
-; FAST-NEXT: mtfprd f6, r3
-; FAST-NEXT: mffprd r3, f7
-; FAST-NEXT: mtfprd f7, r3
-; FAST-NEXT: xxmrghd v2, v2, vs10
-; FAST-NEXT: fctid f10, f30
-; FAST-NEXT: mffprd r3, f10
-; FAST-NEXT: lfd f30, 80(r1) # 8-byte Folded Reload
-; FAST-NEXT: fctid f30, f30
-; FAST-NEXT: mtfprd f10, r3
-; FAST-NEXT: mffprd r3, f11
-; FAST-NEXT: mtfprd f11, r3
-; FAST-NEXT: mffprd r3, f12
-; FAST-NEXT: mtfprd f12, r3
-; FAST-NEXT: xxmrghd v5, vs12, vs11
-; FAST-NEXT: xxlor f11, v20, v20
-; FAST-NEXT: xxlor f12, v21, v21
-; FAST-NEXT: fctid f11, f11
-; FAST-NEXT: fctid f12, f12
-; FAST-NEXT: mffprd r3, f11
-; FAST-NEXT: mtfprd f11, r3
-; FAST-NEXT: mffprd r3, f12
-; FAST-NEXT: mtfprd f12, r3
-; FAST-NEXT: mffprd r3, f13
-; FAST-NEXT: mtfprd f13, r3
-; FAST-NEXT: mffprd r3, f31
-; FAST-NEXT: lfd f31, 64(r1) # 8-byte Folded Reload
-; FAST-NEXT: fctid f31, f31
-; FAST-NEXT: mtvsrd v0, r3
-; FAST-NEXT: mffprd r3, f31
-; FAST-NEXT: lfd f31, 72(r1) # 8-byte Folded Reload
-; FAST-NEXT: mtvsrd v1, r3
-; FAST-NEXT: mffprd r3, f30
-; FAST-NEXT: lfd f30, 88(r1) # 8-byte Folded Reload
-; FAST-NEXT: fctid f31, f31
-; FAST-NEXT: mtvsrd v6, r3
-; FAST-NEXT: mffprd r3, f28
-; FAST-NEXT: lfd f28, 104(r1) # 8-byte Folded Reload
-; FAST-NEXT: fctid f30, f30
-; FAST-NEXT: fctid f28, f28
-; FAST-NEXT: mtvsrd v7, r3
-; FAST-NEXT: mffprd r3, f28
-; FAST-NEXT: lfd f28, 112(r1) # 8-byte Folded Reload
-; FAST-NEXT: fctid f28, f28
-; FAST-NEXT: mtvsrd v8, r3
-; FAST-NEXT: mffprd r3, f28
-; FAST-NEXT: lfd f28, 120(r1) # 8-byte Folded Reload
-; FAST-NEXT: fctid f28, f28
-; FAST-NEXT: xxmrghd v10, vs12, vs11
-; FAST-NEXT: xxmrghd v0, v0, vs13
-; FAST-NEXT: xxswapd vs12, v0
-; FAST-NEXT: xxmrghd v0, vs9, vs8
-; FAST-NEXT: xxmrghd v7, v8, v7
-; FAST-NEXT: mtvsrd v8, r3
-; FAST-NEXT: mffprd r3, f28
-; FAST-NEXT: mtvsrd v9, r3
-; FAST-NEXT: mffprd r3, f30
-; FAST-NEXT: xxswapd v7, v7
-; FAST-NEXT: xxmrghd v8, v9, v8
-; FAST-NEXT: mtvsrd v9, r3
-; FAST-NEXT: mffprd r3, f31
-; FAST-NEXT: xxswapd v8, v8
-; FAST-NEXT: xxmrghd v6, v9, v6
-; FAST-NEXT: mtvsrd v9, r3
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: xxswapd vs1, v29
+; FAST-NEXT: li r4, 112
+; FAST-NEXT: xxswapd vs2, v30
+; FAST-NEXT: xxswapd vs3, v25
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
; FAST-NEXT: li r3, 240
-; FAST-NEXT: stxvd2x v8, r30, r3
+; FAST-NEXT: xxmrghd v2, vs0, v28
+; FAST-NEXT: xxswapd vs0, v2
+; FAST-NEXT: stxvd2x vs0, r30, r3
; FAST-NEXT: li r3, 224
-; FAST-NEXT: stxvd2x v7, r30, r3
+; FAST-NEXT: stxvd2x vs1, r30, r3
; FAST-NEXT: li r3, 208
-; FAST-NEXT: xxswapd vs11, v6
-; FAST-NEXT: xxmrghd v6, vs10, vs7
-; FAST-NEXT: stxvd2x vs11, r30, r3
+; FAST-NEXT: stxvd2x vs2, r30, r3
; FAST-NEXT: li r3, 192
-; FAST-NEXT: xxmrghd v1, v9, v1
-; FAST-NEXT: xxswapd vs11, v1
-; FAST-NEXT: xxmrghd v1, vs6, vs5
-; FAST-NEXT: xxswapd vs5, v10
-; FAST-NEXT: xxswapd vs6, v5
-; FAST-NEXT: stxvd2x vs11, r30, r3
+; FAST-NEXT: xxswapd vs0, v31
+; FAST-NEXT: stxvd2x vs0, r30, r3
; FAST-NEXT: li r3, 176
-; FAST-NEXT: stxvd2x vs12, r30, r3
+; FAST-NEXT: xxswapd vs1, v20
+; FAST-NEXT: stxvd2x vs1, r30, r3
; FAST-NEXT: li r3, 160
-; FAST-NEXT: stxvd2x vs5, r30, r3
+; FAST-NEXT: xxswapd vs2, v23
+; FAST-NEXT: xxswapd vs0, v21
+; FAST-NEXT: stxvd2x vs0, r30, r3
; FAST-NEXT: li r3, 144
-; FAST-NEXT: stxvd2x vs6, r30, r3
-; FAST-NEXT: mffprd r3, f2
-; FAST-NEXT: mtfprd f7, r3
+; FAST-NEXT: xxswapd vs1, v22
+; FAST-NEXT: stxvd2x vs1, r30, r3
; FAST-NEXT: li r3, 128
-; FAST-NEXT: xxswapd vs5, v6
-; FAST-NEXT: stxvd2x vs5, r30, r3
-; FAST-NEXT: li r3, 112
-; FAST-NEXT: xxswapd vs2, v1
-; FAST-NEXT: xxswapd vs6, v0
; FAST-NEXT: stxvd2x vs2, r30, r3
+; FAST-NEXT: li r3, 112
+; FAST-NEXT: xxswapd vs0, v24
+; FAST-NEXT: stxvd2x vs0, r30, r3
; FAST-NEXT: li r3, 96
-; FAST-NEXT: fctid f2, f29
-; FAST-NEXT: stxvd2x vs6, r30, r3
-; FAST-NEXT: mffprd r3, f0
-; FAST-NEXT: mtfprd f0, r3
-; FAST-NEXT: mffprd r3, f2
-; FAST-NEXT: mtfprd f2, r3
+; FAST-NEXT: stxvd2x vs3, r30, r3
; FAST-NEXT: li r3, 80
-; FAST-NEXT: xxmrghd v5, vs7, vs4
-; FAST-NEXT: xxswapd vs4, v2
-; FAST-NEXT: xxmrghd v0, vs0, vs3
-; FAST-NEXT: xxswapd vs0, v5
-; FAST-NEXT: xxswapd vs3, v3
-; FAST-NEXT: stxvd2x vs0, r30, r3
+; FAST-NEXT: lxvd2x vs2, r1, r4 # 16-byte Folded Reload
+; FAST-NEXT: li r4, 160
+; FAST-NEXT: xxswapd vs1, v26
+; FAST-NEXT: stxvd2x vs1, r30, r3
; FAST-NEXT: li r3, 64
-; FAST-NEXT: xxswapd vs0, v0
+; FAST-NEXT: lxvd2x vs1, r1, r4 # 16-byte Folded Reload
+; FAST-NEXT: li r4, 192
+; FAST-NEXT: lxvd2x vs3, r1, r4 # 16-byte Folded Reload
+; FAST-NEXT: li r4, 208
+; FAST-NEXT: lxvd2x vs4, r1, r4 # 16-byte Folded Reload
+; FAST-NEXT: xxswapd vs0, v27
; FAST-NEXT: stxvd2x vs0, r30, r3
; FAST-NEXT: li r3, 48
-; FAST-NEXT: xxmrghd v5, vs2, vs1
-; FAST-NEXT: xxswapd vs1, v4
-; FAST-NEXT: stxvd2x vs1, r30, r3
-; FAST-NEXT: li r3, 32
-; FAST-NEXT: xxswapd vs2, v5
+; FAST-NEXT: xxswapd vs2, vs2
; FAST-NEXT: stxvd2x vs2, r30, r3
+; FAST-NEXT: li r3, 32
+; FAST-NEXT: xxswapd vs1, vs1
+; FAST-NEXT: stxvd2x vs1, r30, r3
; FAST-NEXT: li r3, 16
+; FAST-NEXT: xxswapd vs3, vs3
; FAST-NEXT: stxvd2x vs3, r30, r3
-; FAST-NEXT: li r3, 304
+; FAST-NEXT: li r3, 400
+; FAST-NEXT: xxswapd vs4, vs4
; FAST-NEXT: stxvd2x vs4, 0, r30
-; FAST-NEXT: lfd f31, 472(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f30, 464(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f29, 456(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f28, 448(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f27, 440(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f26, 432(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f25, 424(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f24, 416(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f23, 408(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f22, 400(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f21, 392(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f20, 384(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f19, 376(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f18, 368(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f17, 360(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f16, 352(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f15, 344(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f14, 336(r1) # 8-byte Folded Reload
; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
-; FAST-NEXT: li r3, 288
-; FAST-NEXT: ld r30, 320(r1) # 8-byte Folded Reload
+; FAST-NEXT: li r3, 384
+; FAST-NEXT: ld r31, 552(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r30, 544(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r29, 536(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r28, 528(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r27, 520(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r26, 512(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r25, 504(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r24, 496(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r23, 488(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r22, 480(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r21, 472(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r20, 464(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r19, 456(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r18, 448(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r17, 440(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r16, 432(r1) # 8-byte Folded Reload
; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
-; FAST-NEXT: li r3, 272
+; FAST-NEXT: li r3, 368
+; FAST-NEXT: ld r15, 424(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r14, 416(r1) # 8-byte Folded Reload
; FAST-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload
-; FAST-NEXT: li r3, 256
+; FAST-NEXT: li r3, 352
; FAST-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload
-; FAST-NEXT: li r3, 240
+; FAST-NEXT: li r3, 336
; FAST-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload
-; FAST-NEXT: li r3, 224
+; FAST-NEXT: li r3, 320
; FAST-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload
-; FAST-NEXT: li r3, 208
+; FAST-NEXT: li r3, 304
; FAST-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload
-; FAST-NEXT: li r3, 192
+; FAST-NEXT: li r3, 288
; FAST-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload
-; FAST-NEXT: li r3, 176
+; FAST-NEXT: li r3, 272
; FAST-NEXT: lxvd2x v23, r1, r3 # 16-byte Folded Reload
-; FAST-NEXT: li r3, 160
+; FAST-NEXT: li r3, 256
; FAST-NEXT: lxvd2x v22, r1, r3 # 16-byte Folded Reload
-; FAST-NEXT: li r3, 144
+; FAST-NEXT: li r3, 240
; FAST-NEXT: lxvd2x v21, r1, r3 # 16-byte Folded Reload
-; FAST-NEXT: li r3, 128
+; FAST-NEXT: li r3, 224
; FAST-NEXT: lxvd2x v20, r1, r3 # 16-byte Folded Reload
-; FAST-NEXT: addi r1, r1, 480
+; FAST-NEXT: addi r1, r1, 560
; FAST-NEXT: ld r0, 16(r1)
; FAST-NEXT: mtlr r0
; FAST-NEXT: blr
diff --git a/llvm/test/CodeGen/PowerPC/vector-lrint.ll b/llvm/test/CodeGen/PowerPC/vector-lrint.ll
index 6c824be017e81..90bd869ef0265 100644
--- a/llvm/test/CodeGen/PowerPC/vector-lrint.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-lrint.ll
@@ -33,10 +33,8 @@ define <1 x i64> @lrint_v1f16(<1 x half> %x) nounwind {
; BE: # %bb.0:
; BE-NEXT: mflr r0
; BE-NEXT: stdu r1, -112(r1)
-; BE-NEXT: std r0, 128(r1)
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
; BE-NEXT: clrldi r3, r3, 48
+; BE-NEXT: std r0, 128(r1)
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
; BE-NEXT: bl lrintf
@@ -50,10 +48,8 @@ define <1 x i64> @lrint_v1f16(<1 x half> %x) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -32(r1)
-; CHECK-NEXT: std r0, 48(r1)
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
; CHECK-NEXT: clrldi r3, r3, 48
+; CHECK-NEXT: std r0, 48(r1)
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
; CHECK-NEXT: bl lrintf
@@ -67,10 +63,8 @@ define <1 x i64> @lrint_v1f16(<1 x half> %x) nounwind {
; FAST: # %bb.0:
; FAST-NEXT: mflr r0
; FAST-NEXT: stdu r1, -32(r1)
-; FAST-NEXT: std r0, 48(r1)
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: std r0, 48(r1)
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
; FAST-NEXT: fctid f0, f1
@@ -88,37 +82,26 @@ define <2 x i64> @lrint_v2f16(<2 x half> %x) nounwind {
; BE-LABEL: lrint_v2f16:
; BE: # %bb.0:
; BE-NEXT: mflr r0
-; BE-NEXT: stdu r1, -160(r1)
-; BE-NEXT: std r0, 176(r1)
-; BE-NEXT: stfd f31, 152(r1) # 8-byte Folded Spill
-; BE-NEXT: fmr f31, f1
-; BE-NEXT: fmr f1, f2
-; BE-NEXT: std r30, 136(r1) # 8-byte Folded Spill
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f31
+; BE-NEXT: stdu r1, -144(r1)
+; BE-NEXT: std r0, 160(r1)
+; BE-NEXT: std r30, 128(r1) # 8-byte Folded Spill
; BE-NEXT: mr r30, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r3, 48
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r30, 48
-; BE-NEXT: fmr f31, f1
+; BE-NEXT: clrldi r3, r4, 48
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f31
; BE-NEXT: std r3, 120(r1)
+; BE-NEXT: clrldi r3, r30, 48
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
; BE-NEXT: bl lrintf
; BE-NEXT: nop
; BE-NEXT: std r3, 112(r1)
; BE-NEXT: addi r3, r1, 112
-; BE-NEXT: ld r30, 136(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f31, 152(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r30, 128(r1) # 8-byte Folded Reload
; BE-NEXT: lxvd2x v2, 0, r3
-; BE-NEXT: addi r1, r1, 160
+; BE-NEXT: addi r1, r1, 144
; BE-NEXT: ld r0, 16(r1)
; BE-NEXT: mtlr r0
; BE-NEXT: blr
@@ -127,35 +110,28 @@ define <2 x i64> @lrint_v2f16(<2 x half> %x) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -96(r1)
-; CHECK-NEXT: li r3, 48
-; CHECK-NEXT: std r0, 112(r1)
-; CHECK-NEXT: std r30, 72(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stfd f31, 88(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f31, f2
-; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f31
-; CHECK-NEXT: mr r30, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
+; CHECK-NEXT: li r5, 48
; CHECK-NEXT: clrldi r3, r3, 48
+; CHECK-NEXT: std r0, 112(r1)
+; CHECK-NEXT: std r29, 72(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r30, 80(r1) # 8-byte Folded Spill
+; CHECK-NEXT: mr r30, r4
+; CHECK-NEXT: stxvd2x v31, r1, r5 # 16-byte Folded Spill
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
+; CHECK-NEXT: bl lrintf
+; CHECK-NEXT: nop
+; CHECK-NEXT: mr r29, r3
; CHECK-NEXT: clrldi r3, r30, 48
-; CHECK-NEXT: fmr f31, f1
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: bl lrintf
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f31
-; CHECK-NEXT: mtvsrd v31, r3
+; CHECK-NEXT: mtvsrd v31, r29
; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
; CHECK-NEXT: mtfprd f0, r3
; CHECK-NEXT: li r3, 48
-; CHECK-NEXT: lfd f31, 88(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r30, 72(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r30, 80(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r29, 72(r1) # 8-byte Folded Reload
; CHECK-NEXT: xxmrghd v2, vs0, v31
; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: addi r1, r1, 96
@@ -166,35 +142,30 @@ define <2 x i64> @lrint_v2f16(<2 x half> %x) nounwind {
; FAST-LABEL: lrint_v2f16:
; FAST: # %bb.0:
; FAST-NEXT: mflr r0
-; FAST-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
-; FAST-NEXT: stdu r1, -48(r1)
-; FAST-NEXT: fmr f31, f1
-; FAST-NEXT: fmr f1, f2
-; FAST-NEXT: std r0, 64(r1)
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
+; FAST-NEXT: stdu r1, -80(r1)
+; FAST-NEXT: li r5, 48
; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: std r0, 96(r1)
+; FAST-NEXT: std r30, 64(r1) # 8-byte Folded Spill
+; FAST-NEXT: mr r30, r4
+; FAST-NEXT: stxvd2x v31, r1, r5 # 16-byte Folded Spill
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f30, f1
-; FAST-NEXT: fmr f1, f31
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v31, r3
+; FAST-NEXT: clrldi r3, r30, 48
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
; FAST-NEXT: fctid f0, f1
-; FAST-NEXT: fctid f1, f30
+; FAST-NEXT: ld r30, 64(r1) # 8-byte Folded Reload
; FAST-NEXT: mffprd r3, f0
; FAST-NEXT: mtfprd f0, r3
-; FAST-NEXT: mffprd r3, f1
-; FAST-NEXT: mtfprd f1, r3
-; FAST-NEXT: xxmrghd v2, vs1, vs0
-; FAST-NEXT: addi r1, r1, 48
+; FAST-NEXT: li r3, 48
+; FAST-NEXT: xxmrghd v2, vs0, v31
+; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: addi r1, r1, 80
; FAST-NEXT: ld r0, 16(r1)
-; FAST-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload
; FAST-NEXT: mtlr r0
; FAST-NEXT: blr
%a = call <2 x i64> @llvm.lrint.v2i64.v2f16(<2 x half> %x)
@@ -206,73 +177,46 @@ define <4 x i64> @lrint_v4f16(<4 x half> %x) nounwind {
; BE-LABEL: lrint_v4f16:
; BE: # %bb.0:
; BE-NEXT: mflr r0
-; BE-NEXT: stdu r1, -208(r1)
-; BE-NEXT: std r0, 224(r1)
-; BE-NEXT: stfd f29, 184(r1) # 8-byte Folded Spill
-; BE-NEXT: fmr f29, f1
-; BE-NEXT: fmr f1, f2
-; BE-NEXT: std r28, 152(r1) # 8-byte Folded Spill
-; BE-NEXT: std r29, 160(r1) # 8-byte Folded Spill
-; BE-NEXT: std r30, 168(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f30, 192(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f31, 200(r1) # 8-byte Folded Spill
-; BE-NEXT: fmr f31, f4
-; BE-NEXT: fmr f30, f3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f29
-; BE-NEXT: mr r30, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f31
-; BE-NEXT: mr r29, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f30
+; BE-NEXT: stdu r1, -176(r1)
+; BE-NEXT: std r0, 192(r1)
+; BE-NEXT: std r28, 144(r1) # 8-byte Folded Spill
; BE-NEXT: mr r28, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r3, 48
+; BE-NEXT: clrldi r3, r4, 48
+; BE-NEXT: std r29, 152(r1) # 8-byte Folded Spill
+; BE-NEXT: std r30, 160(r1) # 8-byte Folded Spill
+; BE-NEXT: mr r30, r6
+; BE-NEXT: mr r29, r5
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
+; BE-NEXT: bl lrintf
+; BE-NEXT: nop
+; BE-NEXT: std r3, 120(r1)
; BE-NEXT: clrldi r3, r28, 48
-; BE-NEXT: fmr f31, f1
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r29, 48
-; BE-NEXT: fmr f30, f1
-; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: bl lrintf
; BE-NEXT: nop
+; BE-NEXT: std r3, 112(r1)
; BE-NEXT: clrldi r3, r30, 48
-; BE-NEXT: fmr f29, f1
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f29
-; BE-NEXT: std r3, 120(r1)
-; BE-NEXT: bl lrintf
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f30
-; BE-NEXT: std r3, 112(r1)
-; BE-NEXT: bl lrintf
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f31
; BE-NEXT: std r3, 136(r1)
+; BE-NEXT: clrldi r3, r29, 48
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
; BE-NEXT: bl lrintf
; BE-NEXT: nop
; BE-NEXT: std r3, 128(r1)
; BE-NEXT: addi r3, r1, 112
-; BE-NEXT: ld r30, 168(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f31, 200(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f30, 192(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f29, 184(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r30, 160(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r29, 152(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r28, 144(r1) # 8-byte Folded Reload
; BE-NEXT: lxvd2x v2, 0, r3
; BE-NEXT: addi r3, r1, 128
-; BE-NEXT: ld r29, 160(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r28, 152(r1) # 8-byte Folded Reload
; BE-NEXT: lxvd2x v3, 0, r3
-; BE-NEXT: addi r1, r1, 208
+; BE-NEXT: addi r1, r1, 176
; BE-NEXT: ld r0, 16(r1)
; BE-NEXT: mtlr r0
; BE-NEXT: blr
@@ -280,79 +224,57 @@ define <4 x i64> @lrint_v4f16(<4 x half> %x) nounwind {
; CHECK-LABEL: lrint_v4f16:
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
-; CHECK-NEXT: stdu r1, -144(r1)
-; CHECK-NEXT: li r3, 48
-; CHECK-NEXT: std r0, 160(r1)
-; CHECK-NEXT: std r28, 88(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r29, 96(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r30, 104(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stfd f29, 120(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f29, f2
-; CHECK-NEXT: stfd f30, 128(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f30, f3
-; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT: li r3, 64
-; CHECK-NEXT: stfd f31, 136(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f31, f4
-; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f29
-; CHECK-NEXT: mr r30, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f30
-; CHECK-NEXT: mr r29, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f31
-; CHECK-NEXT: mr r28, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
+; CHECK-NEXT: stdu r1, -128(r1)
+; CHECK-NEXT: li r7, 48
+; CHECK-NEXT: std r0, 144(r1)
; CHECK-NEXT: clrldi r3, r3, 48
+; CHECK-NEXT: std r27, 88(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r28, 96(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r29, 104(r1) # 8-byte Folded Spill
+; CHECK-NEXT: mr r29, r5
+; CHECK-NEXT: mr r28, r4
+; CHECK-NEXT: stxvd2x v30, r1, r7 # 16-byte Folded Spill
+; CHECK-NEXT: li r7, 64
+; CHECK-NEXT: std r30, 112(r1) # 8-byte Folded Spill
+; CHECK-NEXT: mr r30, r6
+; CHECK-NEXT: stxvd2x v31, r1, r7 # 16-byte Folded Spill
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r28, 48
-; CHECK-NEXT: fmr f31, f1
-; CHECK-NEXT: bl __extendhfsf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r29, 48
-; CHECK-NEXT: fmr f30, f1
-; CHECK-NEXT: bl __extendhfsf2
+; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r30, 48
-; CHECK-NEXT: fmr f29, f1
+; CHECK-NEXT: mr r27, r3
+; CHECK-NEXT: clrldi r3, r28, 48
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
+; CHECK-NEXT: mtvsrd v31, r27
; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f29
-; CHECK-NEXT: mtvsrd v31, r3
-; CHECK-NEXT: bl lrintf
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f30
; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: clrldi r3, r29, 48
; CHECK-NEXT: xxmrghd v31, vs0, v31
+; CHECK-NEXT: bl __extendhfsf2
+; CHECK-NEXT: nop
; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f31
-; CHECK-NEXT: mtvsrd v30, r3
+; CHECK-NEXT: mr r29, r3
+; CHECK-NEXT: clrldi r3, r30, 48
+; CHECK-NEXT: bl __extendhfsf2
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtvsrd v30, r29
; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
; CHECK-NEXT: mtfprd f0, r3
; CHECK-NEXT: li r3, 64
; CHECK-NEXT: vmr v2, v31
-; CHECK-NEXT: lfd f31, 136(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f30, 128(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f29, 120(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r30, 104(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r29, 96(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r30, 112(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r29, 104(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r28, 96(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r27, 88(r1) # 8-byte Folded Reload
; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 48
-; CHECK-NEXT: ld r28, 88(r1) # 8-byte Folded Reload
; CHECK-NEXT: xxmrghd v3, vs0, v30
; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT: addi r1, r1, 144
+; CHECK-NEXT: addi r1, r1, 128
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
@@ -360,63 +282,55 @@ define <4 x i64> @lrint_v4f16(<4 x half> %x) nounwind {
; FAST-LABEL: lrint_v4f16:
; FAST: # %bb.0:
; FAST-NEXT: mflr r0
-; FAST-NEXT: stfd f28, -32(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f29, -24(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
-; FAST-NEXT: stdu r1, -64(r1)
-; FAST-NEXT: fmr f29, f1
-; FAST-NEXT: fmr f1, f4
-; FAST-NEXT: std r0, 80(r1)
-; FAST-NEXT: fmr f31, f3
-; FAST-NEXT: fmr f30, f2
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
+; FAST-NEXT: stdu r1, -112(r1)
+; FAST-NEXT: li r7, 48
+; FAST-NEXT: std r0, 128(r1)
; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: std r28, 80(r1) # 8-byte Folded Spill
+; FAST-NEXT: std r29, 88(r1) # 8-byte Folded Spill
+; FAST-NEXT: std r30, 96(r1) # 8-byte Folded Spill
+; FAST-NEXT: mr r30, r6
+; FAST-NEXT: mr r29, r5
+; FAST-NEXT: stxvd2x v30, r1, r7 # 16-byte Folded Spill
+; FAST-NEXT: li r7, 64
+; FAST-NEXT: mr r28, r4
+; FAST-NEXT: stxvd2x v31, r1, r7 # 16-byte Folded Spill
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f28, f1
-; FAST-NEXT: fmr f1, f31
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v31, r3
+; FAST-NEXT: clrldi r3, r28, 48
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f31, f1
-; FAST-NEXT: fmr f1, f30
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: clrldi r3, r29, 48
+; FAST-NEXT: xxmrghd v31, vs0, v31
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f30, f1
-; FAST-NEXT: fmr f1, f29
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v30, r3
+; FAST-NEXT: clrldi r3, r30, 48
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fctid f0, f30
-; FAST-NEXT: fctid f2, f31
-; FAST-NEXT: mffprd r3, f0
-; FAST-NEXT: fctid f1, f1
-; FAST-NEXT: mtfprd f0, r3
-; FAST-NEXT: mffprd r3, f2
-; FAST-NEXT: mtfprd f2, r3
-; FAST-NEXT: mffprd r3, f1
-; FAST-NEXT: mtfprd f1, r3
-; FAST-NEXT: xxmrghd v2, vs0, vs1
-; FAST-NEXT: fctid f0, f28
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: vmr v2, v31
+; FAST-NEXT: ld r30, 96(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r29, 88(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r28, 80(r1) # 8-byte Folded Reload
; FAST-NEXT: mffprd r3, f0
; FAST-NEXT: mtfprd f0, r3
-; FAST-NEXT: xxmrghd v3, vs0, vs2
-; FAST-NEXT: addi r1, r1, 64
+; FAST-NEXT: li r3, 64
+; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 48
+; FAST-NEXT: xxmrghd v3, vs0, v30
+; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: addi r1, r1, 112
; FAST-NEXT: ld r0, 16(r1)
-; FAST-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload
; FAST-NEXT: mtlr r0
-; FAST-NEXT: lfd f29, -24(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f28, -32(r1) # 8-byte Folded Reload
; FAST-NEXT: blr
%a = call <4 x i64> @llvm.lrint.v4i64.v4f16(<4 x half> %x)
ret <4 x i64> %a
@@ -427,145 +341,86 @@ define <8 x i64> @lrint_v8f16(<8 x half> %x) nounwind {
; BE-LABEL: lrint_v8f16:
; BE: # %bb.0:
; BE-NEXT: mflr r0
-; BE-NEXT: stdu r1, -304(r1)
-; BE-NEXT: std r0, 320(r1)
-; BE-NEXT: stfd f25, 248(r1) # 8-byte Folded Spill
-; BE-NEXT: fmr f25, f1
-; BE-NEXT: fmr f1, f2
-; BE-NEXT: std r24, 184(r1) # 8-byte Folded Spill
-; BE-NEXT: std r25, 192(r1) # 8-byte Folded Spill
-; BE-NEXT: std r26, 200(r1) # 8-byte Folded Spill
-; BE-NEXT: std r27, 208(r1) # 8-byte Folded Spill
-; BE-NEXT: std r28, 216(r1) # 8-byte Folded Spill
-; BE-NEXT: std r29, 224(r1) # 8-byte Folded Spill
-; BE-NEXT: std r30, 232(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f26, 256(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f27, 264(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f28, 272(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f29, 280(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f30, 288(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f31, 296(r1) # 8-byte Folded Spill
-; BE-NEXT: fmr f31, f8
-; BE-NEXT: fmr f30, f7
-; BE-NEXT: fmr f29, f6
-; BE-NEXT: fmr f28, f5
-; BE-NEXT: fmr f27, f4
-; BE-NEXT: fmr f26, f3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f25
-; BE-NEXT: mr r30, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f27
-; BE-NEXT: mr r29, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f26
-; BE-NEXT: mr r28, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f29
-; BE-NEXT: mr r27, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f28
-; BE-NEXT: mr r26, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f31
-; BE-NEXT: mr r25, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f30
+; BE-NEXT: stdu r1, -240(r1)
+; BE-NEXT: std r0, 256(r1)
+; BE-NEXT: std r24, 176(r1) # 8-byte Folded Spill
; BE-NEXT: mr r24, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r3, 48
+; BE-NEXT: clrldi r3, r4, 48
+; BE-NEXT: std r25, 184(r1) # 8-byte Folded Spill
+; BE-NEXT: std r26, 192(r1) # 8-byte Folded Spill
+; BE-NEXT: std r27, 200(r1) # 8-byte Folded Spill
+; BE-NEXT: std r28, 208(r1) # 8-byte Folded Spill
+; BE-NEXT: std r29, 216(r1) # 8-byte Folded Spill
+; BE-NEXT: std r30, 224(r1) # 8-byte Folded Spill
+; BE-NEXT: mr r29, r10
+; BE-NEXT: mr r30, r9
+; BE-NEXT: mr r27, r8
+; BE-NEXT: mr r28, r7
+; BE-NEXT: mr r26, r6
+; BE-NEXT: mr r25, r5
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
+; BE-NEXT: bl lrintf
+; BE-NEXT: nop
+; BE-NEXT: std r3, 120(r1)
; BE-NEXT: clrldi r3, r24, 48
-; BE-NEXT: fmr f31, f1
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r25, 48
-; BE-NEXT: fmr f30, f1
-; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: bl lrintf
; BE-NEXT: nop
+; BE-NEXT: std r3, 112(r1)
; BE-NEXT: clrldi r3, r26, 48
-; BE-NEXT: fmr f29, f1
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r27, 48
-; BE-NEXT: fmr f28, f1
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r28, 48
-; BE-NEXT: fmr f27, f1
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r29, 48
-; BE-NEXT: fmr f26, f1
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r30, 48
-; BE-NEXT: fmr f25, f1
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f25
-; BE-NEXT: std r3, 120(r1)
-; BE-NEXT: bl lrintf
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f26
-; BE-NEXT: std r3, 112(r1)
-; BE-NEXT: bl lrintf
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f27
; BE-NEXT: std r3, 136(r1)
+; BE-NEXT: clrldi r3, r25, 48
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f28
; BE-NEXT: std r3, 128(r1)
+; BE-NEXT: clrldi r3, r27, 48
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f29
; BE-NEXT: std r3, 152(r1)
+; BE-NEXT: clrldi r3, r28, 48
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f30
; BE-NEXT: std r3, 144(r1)
+; BE-NEXT: clrldi r3, r29, 48
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f31
; BE-NEXT: std r3, 168(r1)
+; BE-NEXT: clrldi r3, r30, 48
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
; BE-NEXT: bl lrintf
; BE-NEXT: nop
; BE-NEXT: std r3, 160(r1)
; BE-NEXT: addi r3, r1, 112
-; BE-NEXT: ld r30, 232(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f31, 296(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f30, 288(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f29, 280(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r30, 224(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r29, 216(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r28, 208(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r27, 200(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r26, 192(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r25, 184(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r24, 176(r1) # 8-byte Folded Reload
; BE-NEXT: lxvd2x v2, 0, r3
; BE-NEXT: addi r3, r1, 128
-; BE-NEXT: lfd f28, 272(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f27, 264(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f26, 256(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r29, 224(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r28, 216(r1) # 8-byte Folded Reload
; BE-NEXT: lxvd2x v3, 0, r3
; BE-NEXT: addi r3, r1, 144
-; BE-NEXT: lfd f25, 248(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r27, 208(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r26, 200(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r25, 192(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r24, 184(r1) # 8-byte Folded Reload
; BE-NEXT: lxvd2x v4, 0, r3
; BE-NEXT: addi r3, r1, 160
; BE-NEXT: lxvd2x v5, 0, r3
-; BE-NEXT: addi r1, r1, 304
+; BE-NEXT: addi r1, r1, 240
; BE-NEXT: ld r0, 16(r1)
; BE-NEXT: mtlr r0
; BE-NEXT: blr
@@ -573,159 +428,107 @@ define <8 x i64> @lrint_v8f16(<8 x half> %x) nounwind {
; CHECK-LABEL: lrint_v8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
-; CHECK-NEXT: stdu r1, -240(r1)
-; CHECK-NEXT: li r3, 48
-; CHECK-NEXT: std r0, 256(r1)
-; CHECK-NEXT: std r24, 120(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r25, 128(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r26, 136(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r27, 144(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r28, 152(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r29, 160(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT: li r3, 64
-; CHECK-NEXT: std r30, 168(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stfd f25, 184(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f25, f2
-; CHECK-NEXT: stfd f26, 192(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f26, f3
-; CHECK-NEXT: stfd f27, 200(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f27, f4
-; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT: li r3, 80
-; CHECK-NEXT: stfd f28, 208(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f28, f5
-; CHECK-NEXT: stfd f29, 216(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f29, f6
-; CHECK-NEXT: stfd f30, 224(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f30, f7
-; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT: li r3, 96
-; CHECK-NEXT: stfd f31, 232(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f31, f8
-; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f25
-; CHECK-NEXT: mr r30, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f26
-; CHECK-NEXT: mr r29, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f27
-; CHECK-NEXT: mr r28, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f28
-; CHECK-NEXT: mr r27, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f29
-; CHECK-NEXT: mr r26, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f30
-; CHECK-NEXT: mr r25, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f31
-; CHECK-NEXT: mr r24, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
+; CHECK-NEXT: stdu r1, -192(r1)
+; CHECK-NEXT: li r11, 48
+; CHECK-NEXT: std r0, 208(r1)
; CHECK-NEXT: clrldi r3, r3, 48
+; CHECK-NEXT: std r23, 120(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r24, 128(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r25, 136(r1) # 8-byte Folded Spill
+; CHECK-NEXT: mr r25, r5
+; CHECK-NEXT: mr r24, r4
+; CHECK-NEXT: stxvd2x v28, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT: li r11, 64
+; CHECK-NEXT: std r26, 144(r1) # 8-byte Folded Spill
+; CHECK-NEXT: mr r26, r6
+; CHECK-NEXT: std r27, 152(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r28, 160(r1) # 8-byte Folded Spill
+; CHECK-NEXT: mr r28, r8
+; CHECK-NEXT: mr r27, r7
+; CHECK-NEXT: stxvd2x v29, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT: li r11, 80
+; CHECK-NEXT: std r29, 168(r1) # 8-byte Folded Spill
+; CHECK-NEXT: mr r29, r9
+; CHECK-NEXT: std r30, 176(r1) # 8-byte Folded Spill
+; CHECK-NEXT: mr r30, r10
+; CHECK-NEXT: stxvd2x v30, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT: li r11, 96
+; CHECK-NEXT: stxvd2x v31, r1, r11 # 16-byte Folded Spill
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
+; CHECK-NEXT: bl lrintf
+; CHECK-NEXT: nop
+; CHECK-NEXT: mr r23, r3
; CHECK-NEXT: clrldi r3, r24, 48
-; CHECK-NEXT: fmr f31, f1
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
+; CHECK-NEXT: mtvsrd v31, r23
+; CHECK-NEXT: bl lrintf
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtfprd f0, r3
; CHECK-NEXT: clrldi r3, r25, 48
-; CHECK-NEXT: fmr f30, f1
+; CHECK-NEXT: xxmrghd v31, vs0, v31
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
+; CHECK-NEXT: bl lrintf
+; CHECK-NEXT: nop
+; CHECK-NEXT: mr r25, r3
; CHECK-NEXT: clrldi r3, r26, 48
-; CHECK-NEXT: fmr f29, f1
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r27, 48
-; CHECK-NEXT: fmr f28, f1
-; CHECK-NEXT: bl __extendhfsf2
+; CHECK-NEXT: mtvsrd v30, r25
+; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r28, 48
-; CHECK-NEXT: fmr f27, f1
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: clrldi r3, r27, 48
+; CHECK-NEXT: xxmrghd v30, vs0, v30
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r29, 48
-; CHECK-NEXT: fmr f26, f1
-; CHECK-NEXT: bl __extendhfsf2
+; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r30, 48
-; CHECK-NEXT: fmr f25, f1
+; CHECK-NEXT: mr r27, r3
+; CHECK-NEXT: clrldi r3, r28, 48
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
+; CHECK-NEXT: mtvsrd v29, r27
; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f25
-; CHECK-NEXT: mtvsrd v31, r3
-; CHECK-NEXT: bl lrintf
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f26
; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: xxmrghd v31, vs0, v31
-; CHECK-NEXT: bl lrintf
+; CHECK-NEXT: clrldi r3, r29, 48
+; CHECK-NEXT: xxmrghd v29, vs0, v29
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f27
-; CHECK-NEXT: mtvsrd v30, r3
; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f28
-; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: xxmrghd v30, vs0, v30
-; CHECK-NEXT: bl lrintf
+; CHECK-NEXT: mr r29, r3
+; CHECK-NEXT: clrldi r3, r30, 48
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f29
-; CHECK-NEXT: mtvsrd v29, r3
-; CHECK-NEXT: bl lrintf
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f30
-; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: xxmrghd v29, vs0, v29
-; CHECK-NEXT: bl lrintf
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f31
-; CHECK-NEXT: mtvsrd v28, r3
+; CHECK-NEXT: mtvsrd v28, r29
; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
; CHECK-NEXT: mtfprd f0, r3
; CHECK-NEXT: li r3, 96
; CHECK-NEXT: vmr v2, v31
-; CHECK-NEXT: lfd f31, 232(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r30, 176(r1) # 8-byte Folded Reload
; CHECK-NEXT: vmr v3, v30
; CHECK-NEXT: vmr v4, v29
-; CHECK-NEXT: lfd f30, 224(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f29, 216(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r29, 168(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r28, 160(r1) # 8-byte Folded Reload
; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 80
-; CHECK-NEXT: lfd f28, 208(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f27, 200(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f26, 192(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f25, 184(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r30, 168(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r29, 160(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r27, 152(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r26, 144(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r25, 136(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r24, 128(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r23, 120(r1) # 8-byte Folded Reload
; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 64
-; CHECK-NEXT: ld r28, 152(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r27, 144(r1) # 8-byte Folded Reload
-; CHECK-NEXT: xxmrghd v5, vs0, v28
-; CHECK-NEXT: ld r26, 136(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r25, 128(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r24, 120(r1) # 8-byte Folded Reload
; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 48
+; CHECK-NEXT: xxmrghd v5, vs0, v28
; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT: addi r1, r1, 240
+; CHECK-NEXT: addi r1, r1, 192
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
@@ -733,117 +536,103 @@ define <8 x i64> @lrint_v8f16(<8 x half> %x) nounwind {
; FAST-LABEL: lrint_v8f16:
; FAST: # %bb.0:
; FAST-NEXT: mflr r0
-; FAST-NEXT: stfd f24, -64(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f25, -56(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f26, -48(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f27, -40(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f28, -32(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f29, -24(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
-; FAST-NEXT: stdu r1, -96(r1)
-; FAST-NEXT: fmr f24, f1
-; FAST-NEXT: fmr f1, f8
-; FAST-NEXT: std r0, 112(r1)
-; FAST-NEXT: fmr f30, f7
-; FAST-NEXT: fmr f29, f6
-; FAST-NEXT: fmr f28, f5
-; FAST-NEXT: fmr f27, f4
-; FAST-NEXT: fmr f26, f3
-; FAST-NEXT: fmr f25, f2
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
+; FAST-NEXT: stdu r1, -176(r1)
+; FAST-NEXT: li r11, 48
+; FAST-NEXT: std r0, 192(r1)
; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: std r24, 112(r1) # 8-byte Folded Spill
+; FAST-NEXT: std r25, 120(r1) # 8-byte Folded Spill
+; FAST-NEXT: std r26, 128(r1) # 8-byte Folded Spill
+; FAST-NEXT: mr r26, r6
+; FAST-NEXT: mr r25, r5
+; FAST-NEXT: stxvd2x v28, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT: li r11, 64
+; FAST-NEXT: std r27, 136(r1) # 8-byte Folded Spill
+; FAST-NEXT: mr r27, r7
+; FAST-NEXT: std r28, 144(r1) # 8-byte Folded Spill
+; FAST-NEXT: std r29, 152(r1) # 8-byte Folded Spill
+; FAST-NEXT: mr r29, r9
+; FAST-NEXT: mr r28, r8
+; FAST-NEXT: stxvd2x v29, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT: li r11, 80
+; FAST-NEXT: std r30, 160(r1) # 8-byte Folded Spill
+; FAST-NEXT: mr r30, r10
+; FAST-NEXT: mr r24, r4
+; FAST-NEXT: stxvd2x v30, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT: li r11, 96
+; FAST-NEXT: stxvd2x v31, r1, r11 # 16-byte Folded Spill
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f31, f1
-; FAST-NEXT: fmr f1, f30
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v31, r3
+; FAST-NEXT: clrldi r3, r24, 48
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f30, f1
-; FAST-NEXT: fmr f1, f29
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: clrldi r3, r25, 48
+; FAST-NEXT: xxmrghd v31, vs0, v31
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f29, f1
-; FAST-NEXT: fmr f1, f28
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v30, r3
+; FAST-NEXT: clrldi r3, r26, 48
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f28, f1
-; FAST-NEXT: fmr f1, f27
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: clrldi r3, r27, 48
+; FAST-NEXT: xxmrghd v30, vs0, v30
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f27, f1
-; FAST-NEXT: fmr f1, f26
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v29, r3
+; FAST-NEXT: clrldi r3, r28, 48
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f26, f1
-; FAST-NEXT: fmr f1, f25
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: clrldi r3, r29, 48
+; FAST-NEXT: xxmrghd v29, vs0, v29
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f25, f1
-; FAST-NEXT: fmr f1, f24
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v28, r3
+; FAST-NEXT: clrldi r3, r30, 48
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fctid f0, f25
-; FAST-NEXT: fctid f2, f26
-; FAST-NEXT: mffprd r3, f0
-; FAST-NEXT: fctid f3, f27
-; FAST-NEXT: fctid f4, f28
-; FAST-NEXT: fctid f5, f29
-; FAST-NEXT: fctid f6, f30
-; FAST-NEXT: fctid f1, f1
-; FAST-NEXT: mtfprd f0, r3
-; FAST-NEXT: mffprd r3, f2
-; FAST-NEXT: mtfprd f2, r3
-; FAST-NEXT: mffprd r3, f3
-; FAST-NEXT: mtfprd f3, r3
-; FAST-NEXT: mffprd r3, f4
-; FAST-NEXT: mtfprd f4, r3
-; FAST-NEXT: mffprd r3, f5
-; FAST-NEXT: mtfprd f5, r3
-; FAST-NEXT: mffprd r3, f6
-; FAST-NEXT: mtfprd f6, r3
-; FAST-NEXT: mffprd r3, f1
-; FAST-NEXT: mtfprd f1, r3
-; FAST-NEXT: xxmrghd v3, vs3, vs2
-; FAST-NEXT: xxmrghd v4, vs5, vs4
-; FAST-NEXT: xxmrghd v2, vs0, vs1
-; FAST-NEXT: fctid f0, f31
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: vmr v2, v31
+; FAST-NEXT: ld r30, 160(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r29, 152(r1) # 8-byte Folded Reload
+; FAST-NEXT: vmr v3, v30
+; FAST-NEXT: vmr v4, v29
+; FAST-NEXT: ld r28, 144(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r27, 136(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r26, 128(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r25, 120(r1) # 8-byte Folded Reload
; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: ld r24, 112(r1) # 8-byte Folded Reload
; FAST-NEXT: mtfprd f0, r3
-; FAST-NEXT: xxmrghd v5, vs0, vs6
-; FAST-NEXT: addi r1, r1, 96
+; FAST-NEXT: li r3, 96
+; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 80
+; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 64
+; FAST-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 48
+; FAST-NEXT: xxmrghd v5, vs0, v28
+; FAST-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: addi r1, r1, 176
; FAST-NEXT: ld r0, 16(r1)
-; FAST-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload
; FAST-NEXT: mtlr r0
-; FAST-NEXT: lfd f29, -24(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f28, -32(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f27, -40(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f26, -48(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f25, -56(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f24, -64(r1) # 8-byte Folded Reload
; FAST-NEXT: blr
%a = call <8 x i64> @llvm.lrint.v8i64.v8f16(<8 x half> %x)
ret <8 x i64> %a
@@ -854,286 +643,166 @@ define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) nounwind {
; BE-LABEL: lrint_v16i64_v16f16:
; BE: # %bb.0:
; BE-NEXT: mflr r0
-; BE-NEXT: stdu r1, -496(r1)
-; BE-NEXT: std r0, 512(r1)
-; BE-NEXT: stfd f20, 400(r1) # 8-byte Folded Spill
-; BE-NEXT: fmr f20, f1
-; BE-NEXT: fmr f1, f2
-; BE-NEXT: std r16, 248(r1) # 8-byte Folded Spill
-; BE-NEXT: std r17, 256(r1) # 8-byte Folded Spill
-; BE-NEXT: std r18, 264(r1) # 8-byte Folded Spill
-; BE-NEXT: std r19, 272(r1) # 8-byte Folded Spill
-; BE-NEXT: std r20, 280(r1) # 8-byte Folded Spill
-; BE-NEXT: std r21, 288(r1) # 8-byte Folded Spill
-; BE-NEXT: std r22, 296(r1) # 8-byte Folded Spill
-; BE-NEXT: std r23, 304(r1) # 8-byte Folded Spill
-; BE-NEXT: std r24, 312(r1) # 8-byte Folded Spill
-; BE-NEXT: std r25, 320(r1) # 8-byte Folded Spill
-; BE-NEXT: std r26, 328(r1) # 8-byte Folded Spill
-; BE-NEXT: std r27, 336(r1) # 8-byte Folded Spill
-; BE-NEXT: std r28, 344(r1) # 8-byte Folded Spill
-; BE-NEXT: std r29, 352(r1) # 8-byte Folded Spill
-; BE-NEXT: std r30, 360(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f17, 376(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f18, 384(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f19, 392(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f21, 408(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f22, 416(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f23, 424(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f24, 432(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f25, 440(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f26, 448(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f27, 456(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f28, 464(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f29, 472(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f30, 480(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f31, 488(r1) # 8-byte Folded Spill
-; BE-NEXT: fmr f31, f13
-; BE-NEXT: fmr f29, f12
-; BE-NEXT: fmr f30, f11
-; BE-NEXT: fmr f28, f10
-; BE-NEXT: fmr f27, f9
-; BE-NEXT: fmr f26, f8
-; BE-NEXT: fmr f25, f7
-; BE-NEXT: fmr f24, f6
-; BE-NEXT: fmr f23, f5
-; BE-NEXT: fmr f22, f4
-; BE-NEXT: fmr f21, f3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f20
-; BE-NEXT: mr r30, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f22
-; BE-NEXT: mr r29, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f21
-; BE-NEXT: mr r28, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f24
-; BE-NEXT: mr r27, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f23
-; BE-NEXT: mr r26, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f26
-; BE-NEXT: mr r25, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f25
+; BE-NEXT: stdu r1, -368(r1)
+; BE-NEXT: std r0, 384(r1)
+; BE-NEXT: std r24, 304(r1) # 8-byte Folded Spill
; BE-NEXT: mr r24, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f28
-; BE-NEXT: mr r23, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f27
-; BE-NEXT: mr r22, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f29
-; BE-NEXT: mr r21, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f30
-; BE-NEXT: mr r20, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: lfs f1, 652(r1)
-; BE-NEXT: mr r19, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f31
-; BE-NEXT: mr r18, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: lfs f1, 668(r1)
-; BE-NEXT: mr r17, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: lfs f1, 660(r1)
-; BE-NEXT: mr r16, r3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r3, 48
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r16, 48
-; BE-NEXT: fmr f31, f1
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r17, 48
-; BE-NEXT: fmr f30, f1
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r18, 48
-; BE-NEXT: fmr f29, f1
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r19, 48
-; BE-NEXT: fmr f28, f1
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r20, 48
-; BE-NEXT: fmr f27, f1
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r21, 48
-; BE-NEXT: fmr f26, f1
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r22, 48
-; BE-NEXT: fmr f25, f1
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r23, 48
-; BE-NEXT: fmr f24, f1
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r24, 48
-; BE-NEXT: fmr f23, f1
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r25, 48
-; BE-NEXT: fmr f22, f1
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r26, 48
-; BE-NEXT: fmr f21, f1
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r27, 48
-; BE-NEXT: fmr f20, f1
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r28, 48
-; BE-NEXT: fmr f19, f1
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r29, 48
-; BE-NEXT: fmr f18, f1
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r30, 48
-; BE-NEXT: fmr f17, f1
+; BE-NEXT: lhz r3, 494(r1)
+; BE-NEXT: std r16, 240(r1) # 8-byte Folded Spill
+; BE-NEXT: lhz r16, 486(r1)
+; BE-NEXT: std r17, 248(r1) # 8-byte Folded Spill
+; BE-NEXT: std r18, 256(r1) # 8-byte Folded Spill
+; BE-NEXT: std r19, 264(r1) # 8-byte Folded Spill
+; BE-NEXT: std r20, 272(r1) # 8-byte Folded Spill
+; BE-NEXT: std r21, 280(r1) # 8-byte Folded Spill
+; BE-NEXT: std r22, 288(r1) # 8-byte Folded Spill
+; BE-NEXT: lhz r22, 534(r1)
+; BE-NEXT: lhz r21, 542(r1)
+; BE-NEXT: lhz r20, 518(r1)
+; BE-NEXT: lhz r19, 526(r1)
+; BE-NEXT: lhz r18, 502(r1)
+; BE-NEXT: lhz r17, 510(r1)
+; BE-NEXT: std r23, 296(r1) # 8-byte Folded Spill
+; BE-NEXT: std r25, 312(r1) # 8-byte Folded Spill
+; BE-NEXT: std r26, 320(r1) # 8-byte Folded Spill
+; BE-NEXT: std r27, 328(r1) # 8-byte Folded Spill
+; BE-NEXT: std r28, 336(r1) # 8-byte Folded Spill
+; BE-NEXT: std r29, 344(r1) # 8-byte Folded Spill
+; BE-NEXT: std r30, 352(r1) # 8-byte Folded Spill
+; BE-NEXT: mr r29, r10
+; BE-NEXT: mr r30, r9
+; BE-NEXT: mr r27, r8
+; BE-NEXT: mr r28, r7
+; BE-NEXT: mr r25, r6
+; BE-NEXT: mr r26, r5
+; BE-NEXT: mr r23, r4
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f17
; BE-NEXT: std r3, 120(r1)
+; BE-NEXT: mr r3, r16
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f18
; BE-NEXT: std r3, 112(r1)
+; BE-NEXT: mr r3, r17
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f19
; BE-NEXT: std r3, 136(r1)
+; BE-NEXT: mr r3, r18
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f20
; BE-NEXT: std r3, 128(r1)
+; BE-NEXT: mr r3, r19
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f21
; BE-NEXT: std r3, 152(r1)
+; BE-NEXT: mr r3, r20
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f22
; BE-NEXT: std r3, 144(r1)
+; BE-NEXT: mr r3, r21
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f23
; BE-NEXT: std r3, 168(r1)
+; BE-NEXT: mr r3, r22
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f24
; BE-NEXT: std r3, 160(r1)
+; BE-NEXT: clrldi r3, r23, 48
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f25
; BE-NEXT: std r3, 184(r1)
+; BE-NEXT: clrldi r3, r24, 48
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f26
; BE-NEXT: std r3, 176(r1)
+; BE-NEXT: clrldi r3, r25, 48
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f27
; BE-NEXT: std r3, 200(r1)
+; BE-NEXT: clrldi r3, r26, 48
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f28
; BE-NEXT: std r3, 192(r1)
+; BE-NEXT: clrldi r3, r27, 48
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f29
; BE-NEXT: std r3, 216(r1)
+; BE-NEXT: clrldi r3, r28, 48
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f30
; BE-NEXT: std r3, 208(r1)
+; BE-NEXT: clrldi r3, r29, 48
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f31
; BE-NEXT: std r3, 232(r1)
+; BE-NEXT: clrldi r3, r30, 48
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
; BE-NEXT: bl lrintf
; BE-NEXT: nop
; BE-NEXT: std r3, 224(r1)
; BE-NEXT: addi r3, r1, 112
-; BE-NEXT: ld r30, 360(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f31, 488(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f30, 480(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f29, 472(r1) # 8-byte Folded Reload
-; BE-NEXT: lxvd2x v2, 0, r3
+; BE-NEXT: ld r30, 352(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r29, 344(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r28, 336(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r27, 328(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r26, 320(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r25, 312(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r24, 304(r1) # 8-byte Folded Reload
+; BE-NEXT: lxvd2x v6, 0, r3
; BE-NEXT: addi r3, r1, 128
-; BE-NEXT: lfd f28, 464(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f27, 456(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f26, 448(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r29, 352(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r28, 344(r1) # 8-byte Folded Reload
-; BE-NEXT: lxvd2x v3, 0, r3
+; BE-NEXT: ld r23, 296(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r22, 288(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r21, 280(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r20, 272(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r19, 264(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r18, 256(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r17, 248(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r16, 240(r1) # 8-byte Folded Reload
+; BE-NEXT: lxvd2x v7, 0, r3
; BE-NEXT: addi r3, r1, 144
-; BE-NEXT: lfd f25, 440(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f24, 432(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f23, 424(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r27, 336(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r26, 328(r1) # 8-byte Folded Reload
-; BE-NEXT: lxvd2x v4, 0, r3
+; BE-NEXT: lxvd2x v8, 0, r3
; BE-NEXT: addi r3, r1, 160
-; BE-NEXT: lfd f22, 416(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f21, 408(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f20, 400(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r25, 320(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r24, 312(r1) # 8-byte Folded Reload
-; BE-NEXT: lxvd2x v5, 0, r3
+; BE-NEXT: lxvd2x v9, 0, r3
; BE-NEXT: addi r3, r1, 176
-; BE-NEXT: lfd f19, 392(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f18, 384(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f17, 376(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r23, 304(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r22, 296(r1) # 8-byte Folded Reload
-; BE-NEXT: lxvd2x v6, 0, r3
+; BE-NEXT: lxvd2x v2, 0, r3
; BE-NEXT: addi r3, r1, 192
-; BE-NEXT: ld r21, 288(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r20, 280(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r19, 272(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r18, 264(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r17, 256(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r16, 248(r1) # 8-byte Folded Reload
-; BE-NEXT: lxvd2x v7, 0, r3
+; BE-NEXT: lxvd2x v3, 0, r3
; BE-NEXT: addi r3, r1, 208
-; BE-NEXT: lxvd2x v8, 0, r3
+; BE-NEXT: lxvd2x v4, 0, r3
; BE-NEXT: addi r3, r1, 224
-; BE-NEXT: lxvd2x v9, 0, r3
-; BE-NEXT: addi r1, r1, 496
+; BE-NEXT: lxvd2x v5, 0, r3
+; BE-NEXT: addi r1, r1, 368
; BE-NEXT: ld r0, 16(r1)
; BE-NEXT: mtlr r0
; BE-NEXT: blr
@@ -1141,316 +810,207 @@ define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) nounwind {
; CHECK-LABEL: lrint_v16i64_v16f16:
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
-; CHECK-NEXT: stdu r1, -432(r1)
-; CHECK-NEXT: li r3, 48
-; CHECK-NEXT: std r0, 448(r1)
-; CHECK-NEXT: std r16, 184(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r17, 192(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r18, 200(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r19, 208(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r20, 216(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r21, 224(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stxvd2x v24, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT: li r3, 64
-; CHECK-NEXT: std r22, 232(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r23, 240(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r24, 248(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r25, 256(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r26, 264(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r27, 272(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stxvd2x v25, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT: li r3, 80
-; CHECK-NEXT: std r28, 280(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r29, 288(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r30, 296(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stfd f17, 312(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stfd f18, 320(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stfd f19, 328(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stxvd2x v26, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT: li r3, 96
-; CHECK-NEXT: stfd f20, 336(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f20, f2
-; CHECK-NEXT: stfd f21, 344(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f21, f3
-; CHECK-NEXT: stfd f22, 352(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f22, f4
-; CHECK-NEXT: stxvd2x v27, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT: li r3, 112
-; CHECK-NEXT: stfd f23, 360(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f23, f5
-; CHECK-NEXT: stfd f24, 368(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f24, f6
-; CHECK-NEXT: stfd f25, 376(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f25, f7
-; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT: li r3, 128
-; CHECK-NEXT: stfd f26, 384(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f26, f8
-; CHECK-NEXT: stfd f27, 392(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f27, f9
-; CHECK-NEXT: stfd f28, 400(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f28, f10
-; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT: li r3, 144
-; CHECK-NEXT: stfd f29, 408(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f29, f11
-; CHECK-NEXT: stfd f30, 416(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f30, f12
-; CHECK-NEXT: stfd f31, 424(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f31, f13
-; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT: li r3, 160
-; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f20
-; CHECK-NEXT: mr r30, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f21
-; CHECK-NEXT: mr r29, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f22
-; CHECK-NEXT: mr r28, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f23
-; CHECK-NEXT: mr r27, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f24
-; CHECK-NEXT: mr r26, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f25
-; CHECK-NEXT: mr r25, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f26
-; CHECK-NEXT: mr r24, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f27
+; CHECK-NEXT: stdu r1, -320(r1)
+; CHECK-NEXT: li r11, 48
+; CHECK-NEXT: std r0, 336(r1)
+; CHECK-NEXT: std r23, 248(r1) # 8-byte Folded Spill
; CHECK-NEXT: mr r23, r3
-; CHECK-NEXT: bl __truncsfhf2
+; CHECK-NEXT: lhz r3, 416(r1)
+; CHECK-NEXT: std r16, 192(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r17, 200(r1) # 8-byte Folded Spill
+; CHECK-NEXT: lhz r17, 432(r1)
+; CHECK-NEXT: stxvd2x v24, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT: li r11, 64
+; CHECK-NEXT: std r18, 208(r1) # 8-byte Folded Spill
+; CHECK-NEXT: lhz r18, 440(r1)
+; CHECK-NEXT: std r19, 216(r1) # 8-byte Folded Spill
+; CHECK-NEXT: lhz r19, 448(r1)
+; CHECK-NEXT: lhz r16, 424(r1)
+; CHECK-NEXT: stxvd2x v25, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT: li r11, 80
+; CHECK-NEXT: std r20, 224(r1) # 8-byte Folded Spill
+; CHECK-NEXT: lhz r20, 456(r1)
+; CHECK-NEXT: stxvd2x v26, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT: li r11, 96
+; CHECK-NEXT: std r21, 232(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r22, 240(r1) # 8-byte Folded Spill
+; CHECK-NEXT: lhz r22, 472(r1)
+; CHECK-NEXT: lhz r21, 464(r1)
+; CHECK-NEXT: stxvd2x v27, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT: li r11, 112
+; CHECK-NEXT: std r15, 184(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r24, 256(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r25, 264(r1) # 8-byte Folded Spill
+; CHECK-NEXT: mr r25, r5
+; CHECK-NEXT: mr r24, r4
+; CHECK-NEXT: stxvd2x v28, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT: li r11, 128
+; CHECK-NEXT: std r26, 272(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r27, 280(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r28, 288(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r29, 296(r1) # 8-byte Folded Spill
+; CHECK-NEXT: mr r29, r9
+; CHECK-NEXT: mr r28, r8
+; CHECK-NEXT: mr r27, r7
+; CHECK-NEXT: mr r26, r6
+; CHECK-NEXT: stxvd2x v29, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT: li r11, 144
+; CHECK-NEXT: std r30, 304(r1) # 8-byte Folded Spill
+; CHECK-NEXT: mr r30, r10
+; CHECK-NEXT: stxvd2x v30, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT: li r11, 160
+; CHECK-NEXT: stxvd2x v31, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f28
-; CHECK-NEXT: mr r22, r3
-; CHECK-NEXT: bl __truncsfhf2
+; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f29
-; CHECK-NEXT: mr r21, r3
-; CHECK-NEXT: bl __truncsfhf2
+; CHECK-NEXT: mr r15, r3
+; CHECK-NEXT: mr r3, r16
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f30
-; CHECK-NEXT: mr r20, r3
-; CHECK-NEXT: bl __truncsfhf2
+; CHECK-NEXT: mtvsrd v31, r15
+; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f31
-; CHECK-NEXT: mr r19, r3
-; CHECK-NEXT: bl __truncsfhf2
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: mr r3, r17
+; CHECK-NEXT: xxmrghd v31, vs0, v31
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: lfs f1, 568(r1)
-; CHECK-NEXT: mr r18, r3
-; CHECK-NEXT: bl __truncsfhf2
+; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: lfs f1, 576(r1)
; CHECK-NEXT: mr r17, r3
-; CHECK-NEXT: bl __truncsfhf2
+; CHECK-NEXT: mr r3, r18
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: lfs f1, 584(r1)
-; CHECK-NEXT: mr r16, r3
-; CHECK-NEXT: bl __truncsfhf2
+; CHECK-NEXT: mtvsrd v30, r17
+; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r3, 48
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: mr r3, r19
+; CHECK-NEXT: xxmrghd v30, vs0, v30
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r16, 48
-; CHECK-NEXT: fmr f31, f1
-; CHECK-NEXT: bl __extendhfsf2
+; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r17, 48
-; CHECK-NEXT: fmr f30, f1
+; CHECK-NEXT: mr r19, r3
+; CHECK-NEXT: mr r3, r20
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r18, 48
-; CHECK-NEXT: fmr f29, f1
-; CHECK-NEXT: bl __extendhfsf2
+; CHECK-NEXT: mtvsrd v29, r19
+; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r19, 48
-; CHECK-NEXT: fmr f28, f1
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: mr r3, r21
+; CHECK-NEXT: xxmrghd v29, vs0, v29
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r20, 48
-; CHECK-NEXT: fmr f27, f1
-; CHECK-NEXT: bl __extendhfsf2
+; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r21, 48
-; CHECK-NEXT: fmr f26, f1
+; CHECK-NEXT: mr r21, r3
+; CHECK-NEXT: mr r3, r22
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r22, 48
-; CHECK-NEXT: fmr f25, f1
-; CHECK-NEXT: bl __extendhfsf2
+; CHECK-NEXT: mtvsrd v28, r21
+; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
+; CHECK-NEXT: mtfprd f0, r3
; CHECK-NEXT: clrldi r3, r23, 48
-; CHECK-NEXT: fmr f24, f1
+; CHECK-NEXT: xxmrghd v28, vs0, v28
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
+; CHECK-NEXT: bl lrintf
+; CHECK-NEXT: nop
+; CHECK-NEXT: mr r23, r3
; CHECK-NEXT: clrldi r3, r24, 48
-; CHECK-NEXT: fmr f23, f1
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
+; CHECK-NEXT: mtvsrd v27, r23
+; CHECK-NEXT: bl lrintf
+; CHECK-NEXT: nop
+; CHECK-NEXT: mtfprd f0, r3
; CHECK-NEXT: clrldi r3, r25, 48
-; CHECK-NEXT: fmr f22, f1
+; CHECK-NEXT: xxmrghd v27, vs0, v27
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r26, 48
-; CHECK-NEXT: fmr f21, f1
-; CHECK-NEXT: bl __extendhfsf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r27, 48
-; CHECK-NEXT: fmr f20, f1
-; CHECK-NEXT: bl __extendhfsf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r28, 48
-; CHECK-NEXT: fmr f19, f1
-; CHECK-NEXT: bl __extendhfsf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r29, 48
-; CHECK-NEXT: fmr f18, f1
-; CHECK-NEXT: bl __extendhfsf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r30, 48
-; CHECK-NEXT: fmr f17, f1
-; CHECK-NEXT: bl __extendhfsf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: bl lrintf
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f17
-; CHECK-NEXT: mtvsrd v31, r3
-; CHECK-NEXT: bl lrintf
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f18
-; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: xxmrghd v31, vs0, v31
-; CHECK-NEXT: bl lrintf
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f19
-; CHECK-NEXT: mtvsrd v30, r3
; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f20
-; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: xxmrghd v30, vs0, v30
-; CHECK-NEXT: bl lrintf
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f21
-; CHECK-NEXT: mtvsrd v29, r3
-; CHECK-NEXT: bl lrintf
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f22
-; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: xxmrghd v29, vs0, v29
-; CHECK-NEXT: bl lrintf
+; CHECK-NEXT: mr r25, r3
+; CHECK-NEXT: clrldi r3, r26, 48
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f23
-; CHECK-NEXT: mtvsrd v28, r3
+; CHECK-NEXT: mtvsrd v26, r25
; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f24
; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: xxmrghd v28, vs0, v28
-; CHECK-NEXT: bl lrintf
+; CHECK-NEXT: clrldi r3, r27, 48
+; CHECK-NEXT: xxmrghd v26, vs0, v26
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f25
-; CHECK-NEXT: mtvsrd v27, r3
; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f26
-; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: xxmrghd v27, vs0, v27
-; CHECK-NEXT: bl lrintf
+; CHECK-NEXT: mr r27, r3
+; CHECK-NEXT: clrldi r3, r28, 48
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f27
-; CHECK-NEXT: mtvsrd v26, r3
+; CHECK-NEXT: mtvsrd v25, r27
; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f28
; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: xxmrghd v26, vs0, v26
-; CHECK-NEXT: bl lrintf
+; CHECK-NEXT: clrldi r3, r29, 48
+; CHECK-NEXT: xxmrghd v25, vs0, v25
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f29
-; CHECK-NEXT: mtvsrd v25, r3
; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f30
-; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: xxmrghd v25, vs0, v25
-; CHECK-NEXT: bl lrintf
+; CHECK-NEXT: mr r29, r3
+; CHECK-NEXT: clrldi r3, r30, 48
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f31
-; CHECK-NEXT: mtvsrd v24, r3
+; CHECK-NEXT: mtvsrd v24, r29
; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
; CHECK-NEXT: mtfprd f0, r3
; CHECK-NEXT: li r3, 160
-; CHECK-NEXT: vmr v2, v31
-; CHECK-NEXT: lfd f31, 424(r1) # 8-byte Folded Reload
-; CHECK-NEXT: vmr v3, v30
-; CHECK-NEXT: vmr v4, v29
-; CHECK-NEXT: lfd f30, 416(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f29, 408(r1) # 8-byte Folded Reload
+; CHECK-NEXT: vmr v6, v31
+; CHECK-NEXT: ld r30, 304(r1) # 8-byte Folded Reload
+; CHECK-NEXT: vmr v7, v30
+; CHECK-NEXT: vmr v8, v29
+; CHECK-NEXT: ld r29, 296(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r28, 288(r1) # 8-byte Folded Reload
; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 144
-; CHECK-NEXT: vmr v5, v28
-; CHECK-NEXT: vmr v6, v27
-; CHECK-NEXT: vmr v7, v26
-; CHECK-NEXT: vmr v8, v25
-; CHECK-NEXT: lfd f28, 400(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f27, 392(r1) # 8-byte Folded Reload
+; CHECK-NEXT: vmr v9, v28
+; CHECK-NEXT: vmr v2, v27
+; CHECK-NEXT: vmr v3, v26
+; CHECK-NEXT: vmr v4, v25
+; CHECK-NEXT: ld r27, 280(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r26, 272(r1) # 8-byte Folded Reload
; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 128
-; CHECK-NEXT: lfd f26, 384(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f25, 376(r1) # 8-byte Folded Reload
-; CHECK-NEXT: xxmrghd v9, vs0, v24
-; CHECK-NEXT: lfd f24, 368(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f23, 360(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f22, 352(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r25, 264(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r24, 256(r1) # 8-byte Folded Reload
+; CHECK-NEXT: xxmrghd v5, vs0, v24
+; CHECK-NEXT: ld r23, 248(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r22, 240(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r21, 232(r1) # 8-byte Folded Reload
; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 112
-; CHECK-NEXT: lfd f21, 344(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r30, 296(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f20, 336(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f19, 328(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r29, 288(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r28, 280(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r20, 224(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r19, 216(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r18, 208(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r17, 200(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r16, 192(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r15, 184(r1) # 8-byte Folded Reload
; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 96
-; CHECK-NEXT: lfd f18, 320(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r27, 272(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f17, 312(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r26, 264(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r25, 256(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r24, 248(r1) # 8-byte Folded Reload
; CHECK-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 80
-; CHECK-NEXT: ld r23, 240(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r22, 232(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r21, 224(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r20, 216(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r19, 208(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r18, 200(r1) # 8-byte Folded Reload
; CHECK-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 64
-; CHECK-NEXT: ld r17, 192(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r16, 184(r1) # 8-byte Folded Reload
; CHECK-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload
; CHECK-NEXT: li r3, 48
; CHECK-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT: addi r1, r1, 432
+; CHECK-NEXT: addi r1, r1, 320
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
@@ -1458,223 +1018,199 @@ define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) nounwind {
; FAST-LABEL: lrint_v16i64_v16f16:
; FAST: # %bb.0:
; FAST-NEXT: mflr r0
-; FAST-NEXT: stfd f16, -128(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f17, -120(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f18, -112(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f19, -104(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f20, -96(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f21, -88(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f22, -80(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f23, -72(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f24, -64(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f25, -56(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f26, -48(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f27, -40(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f28, -32(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f29, -24(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
-; FAST-NEXT: stdu r1, -160(r1)
-; FAST-NEXT: fmr f26, f1
-; FAST-NEXT: lfs f1, 312(r1)
-; FAST-NEXT: std r0, 176(r1)
-; FAST-NEXT: fmr f28, f13
-; FAST-NEXT: fmr f27, f12
-; FAST-NEXT: fmr f24, f11
-; FAST-NEXT: fmr f21, f10
-; FAST-NEXT: fmr f19, f9
-; FAST-NEXT: fmr f18, f8
-; FAST-NEXT: fmr f17, f7
-; FAST-NEXT: fmr f16, f6
-; FAST-NEXT: fmr f20, f5
-; FAST-NEXT: fmr f22, f4
-; FAST-NEXT: fmr f23, f3
-; FAST-NEXT: fmr f25, f2
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: stdu r1, -304(r1)
+; FAST-NEXT: li r11, 48
+; FAST-NEXT: std r0, 320(r1)
+; FAST-NEXT: std r23, 232(r1) # 8-byte Folded Spill
+; FAST-NEXT: mr r23, r3
+; FAST-NEXT: lhz r3, 400(r1)
+; FAST-NEXT: std r16, 176(r1) # 8-byte Folded Spill
+; FAST-NEXT: std r17, 184(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r17, 416(r1)
+; FAST-NEXT: stxvd2x v24, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT: li r11, 64
+; FAST-NEXT: std r18, 192(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r18, 424(r1)
+; FAST-NEXT: std r19, 200(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r19, 432(r1)
+; FAST-NEXT: lhz r16, 408(r1)
+; FAST-NEXT: stxvd2x v25, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT: li r11, 80
+; FAST-NEXT: std r20, 208(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r20, 440(r1)
+; FAST-NEXT: stxvd2x v26, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT: li r11, 96
+; FAST-NEXT: std r21, 216(r1) # 8-byte Folded Spill
+; FAST-NEXT: std r22, 224(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r22, 456(r1)
+; FAST-NEXT: lhz r21, 448(r1)
+; FAST-NEXT: stxvd2x v27, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT: li r11, 112
+; FAST-NEXT: std r24, 240(r1) # 8-byte Folded Spill
+; FAST-NEXT: std r25, 248(r1) # 8-byte Folded Spill
+; FAST-NEXT: std r26, 256(r1) # 8-byte Folded Spill
+; FAST-NEXT: mr r26, r6
+; FAST-NEXT: mr r25, r5
+; FAST-NEXT: mr r24, r4
+; FAST-NEXT: stxvd2x v28, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT: li r11, 128
+; FAST-NEXT: std r27, 264(r1) # 8-byte Folded Spill
+; FAST-NEXT: std r28, 272(r1) # 8-byte Folded Spill
+; FAST-NEXT: std r29, 280(r1) # 8-byte Folded Spill
+; FAST-NEXT: std r30, 288(r1) # 8-byte Folded Spill
+; FAST-NEXT: mr r30, r10
+; FAST-NEXT: mr r29, r9
+; FAST-NEXT: mr r28, r8
+; FAST-NEXT: mr r27, r7
+; FAST-NEXT: stxvd2x v29, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT: li r11, 144
+; FAST-NEXT: stxvd2x v30, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT: li r11, 160
+; FAST-NEXT: stxvd2x v31, r1, r11 # 16-byte Folded Spill
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f31, f1
-; FAST-NEXT: lfs f1, 304(r1)
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v31, r3
+; FAST-NEXT: mr r3, r16
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f30, f1
-; FAST-NEXT: lfs f1, 296(r1)
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: mr r3, r17
+; FAST-NEXT: xxmrghd v31, vs0, v31
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f29, f1
-; FAST-NEXT: fmr f1, f28
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v30, r3
+; FAST-NEXT: mr r3, r18
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f28, f1
-; FAST-NEXT: fmr f1, f27
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: mr r3, r19
+; FAST-NEXT: xxmrghd v30, vs0, v30
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f27, f1
-; FAST-NEXT: fmr f1, f24
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v29, r3
+; FAST-NEXT: mr r3, r20
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f24, f1
-; FAST-NEXT: fmr f1, f21
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: mr r3, r21
+; FAST-NEXT: xxmrghd v29, vs0, v29
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f21, f1
-; FAST-NEXT: fmr f1, f19
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v28, r3
+; FAST-NEXT: mr r3, r22
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f19, f1
-; FAST-NEXT: fmr f1, f18
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: clrldi r3, r23, 48
+; FAST-NEXT: xxmrghd v28, vs0, v28
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f18, f1
-; FAST-NEXT: fmr f1, f17
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v27, r3
+; FAST-NEXT: clrldi r3, r24, 48
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f17, f1
-; FAST-NEXT: fmr f1, f16
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: clrldi r3, r25, 48
+; FAST-NEXT: xxmrghd v27, vs0, v27
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f16, f1
-; FAST-NEXT: fmr f1, f20
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v26, r3
+; FAST-NEXT: clrldi r3, r26, 48
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f20, f1
-; FAST-NEXT: fmr f1, f22
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: clrldi r3, r27, 48
+; FAST-NEXT: xxmrghd v26, vs0, v26
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f22, f1
-; FAST-NEXT: fmr f1, f23
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v25, r3
+; FAST-NEXT: clrldi r3, r28, 48
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f23, f1
-; FAST-NEXT: fmr f1, f25
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: clrldi r3, r29, 48
+; FAST-NEXT: xxmrghd v25, vs0, v25
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f25, f1
-; FAST-NEXT: fmr f1, f26
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v24, r3
+; FAST-NEXT: clrldi r3, r30, 48
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fctid f0, f25
-; FAST-NEXT: fctid f2, f23
-; FAST-NEXT: mffprd r3, f0
-; FAST-NEXT: fctid f3, f22
-; FAST-NEXT: fctid f4, f20
-; FAST-NEXT: fctid f5, f16
-; FAST-NEXT: fctid f6, f17
-; FAST-NEXT: fctid f7, f18
-; FAST-NEXT: fctid f8, f19
-; FAST-NEXT: fctid f9, f21
-; FAST-NEXT: fctid f10, f24
-; FAST-NEXT: fctid f1, f1
-; FAST-NEXT: mtfprd f0, r3
-; FAST-NEXT: mffprd r3, f2
-; FAST-NEXT: mtfprd f2, r3
-; FAST-NEXT: mffprd r3, f3
-; FAST-NEXT: mtfprd f3, r3
-; FAST-NEXT: mffprd r3, f4
-; FAST-NEXT: mtfprd f4, r3
-; FAST-NEXT: mffprd r3, f5
-; FAST-NEXT: mtfprd f5, r3
-; FAST-NEXT: mffprd r3, f6
-; FAST-NEXT: mtfprd f6, r3
-; FAST-NEXT: mffprd r3, f7
-; FAST-NEXT: mtfprd f7, r3
-; FAST-NEXT: mffprd r3, f8
-; FAST-NEXT: mtfprd f8, r3
-; FAST-NEXT: mffprd r3, f9
-; FAST-NEXT: mtfprd f9, r3
-; FAST-NEXT: mffprd r3, f10
-; FAST-NEXT: mtfprd f10, r3
-; FAST-NEXT: mffprd r3, f1
-; FAST-NEXT: mtfprd f1, r3
-; FAST-NEXT: xxmrghd v3, vs3, vs2
-; FAST-NEXT: xxmrghd v4, vs5, vs4
-; FAST-NEXT: xxmrghd v5, vs7, vs6
-; FAST-NEXT: xxmrghd v6, vs9, vs8
-; FAST-NEXT: xxmrghd v2, vs0, vs1
-; FAST-NEXT: fctid f0, f27
-; FAST-NEXT: fctid f1, f29
-; FAST-NEXT: mffprd r3, f0
-; FAST-NEXT: mtfprd f0, r3
-; FAST-NEXT: xxmrghd v7, vs0, vs10
-; FAST-NEXT: fctid f0, f28
-; FAST-NEXT: mffprd r3, f0
-; FAST-NEXT: mtfprd f0, r3
-; FAST-NEXT: mffprd r3, f1
-; FAST-NEXT: mtfprd f1, r3
-; FAST-NEXT: xxmrghd v8, vs1, vs0
-; FAST-NEXT: fctid f0, f30
-; FAST-NEXT: fctid f1, f31
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: vmr v6, v31
+; FAST-NEXT: ld r30, 288(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r29, 280(r1) # 8-byte Folded Reload
+; FAST-NEXT: vmr v7, v30
+; FAST-NEXT: vmr v8, v29
+; FAST-NEXT: ld r28, 272(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r27, 264(r1) # 8-byte Folded Reload
+; FAST-NEXT: vmr v9, v28
+; FAST-NEXT: vmr v2, v27
+; FAST-NEXT: ld r26, 256(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r25, 248(r1) # 8-byte Folded Reload
+; FAST-NEXT: vmr v3, v26
+; FAST-NEXT: vmr v4, v25
+; FAST-NEXT: ld r24, 240(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r23, 232(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r22, 224(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r21, 216(r1) # 8-byte Folded Reload
; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: ld r20, 208(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r19, 200(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r18, 192(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r17, 184(r1) # 8-byte Folded Reload
; FAST-NEXT: mtfprd f0, r3
-; FAST-NEXT: mffprd r3, f1
-; FAST-NEXT: mtfprd f1, r3
-; FAST-NEXT: xxmrghd v9, vs1, vs0
-; FAST-NEXT: addi r1, r1, 160
+; FAST-NEXT: li r3, 160
+; FAST-NEXT: ld r16, 176(r1) # 8-byte Folded Reload
+; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 144
+; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 128
+; FAST-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 112
+; FAST-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 96
+; FAST-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 80
+; FAST-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 64
+; FAST-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: li r3, 48
+; FAST-NEXT: xxmrghd v5, vs0, v24
+; FAST-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT: addi r1, r1, 304
; FAST-NEXT: ld r0, 16(r1)
-; FAST-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload
; FAST-NEXT: mtlr r0
-; FAST-NEXT: lfd f29, -24(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f28, -32(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f27, -40(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f26, -48(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f25, -56(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f24, -64(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f23, -72(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f22, -80(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f21, -88(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f20, -96(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f19, -104(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f18, -112(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f17, -120(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f16, -128(r1) # 8-byte Folded Reload
; FAST-NEXT: blr
%a = call <16 x i64> @llvm.lrint.v16i64.v16f16(<16 x half> %x)
ret <16 x i64> %a
@@ -1685,483 +1221,295 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) nounwind {
; BE-LABEL: lrint_v32i64_v32f16:
; BE: # %bb.0:
; BE-NEXT: mflr r0
-; BE-NEXT: stdu r1, -864(r1)
-; BE-NEXT: std r0, 880(r1)
-; BE-NEXT: stfd f20, 768(r1) # 8-byte Folded Spill
-; BE-NEXT: fmr f20, f1
-; BE-NEXT: fmr f1, f2
-; BE-NEXT: std r14, 576(r1) # 8-byte Folded Spill
-; BE-NEXT: std r15, 584(r1) # 8-byte Folded Spill
-; BE-NEXT: std r16, 592(r1) # 8-byte Folded Spill
-; BE-NEXT: std r17, 600(r1) # 8-byte Folded Spill
-; BE-NEXT: std r18, 608(r1) # 8-byte Folded Spill
-; BE-NEXT: std r19, 616(r1) # 8-byte Folded Spill
-; BE-NEXT: std r20, 624(r1) # 8-byte Folded Spill
-; BE-NEXT: std r21, 632(r1) # 8-byte Folded Spill
-; BE-NEXT: std r22, 640(r1) # 8-byte Folded Spill
-; BE-NEXT: std r23, 648(r1) # 8-byte Folded Spill
-; BE-NEXT: std r24, 656(r1) # 8-byte Folded Spill
-; BE-NEXT: std r25, 664(r1) # 8-byte Folded Spill
-; BE-NEXT: std r26, 672(r1) # 8-byte Folded Spill
-; BE-NEXT: std r27, 680(r1) # 8-byte Folded Spill
-; BE-NEXT: std r28, 688(r1) # 8-byte Folded Spill
-; BE-NEXT: std r29, 696(r1) # 8-byte Folded Spill
-; BE-NEXT: std r30, 704(r1) # 8-byte Folded Spill
-; BE-NEXT: std r31, 712(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f14, 720(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f15, 728(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f16, 736(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f17, 744(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f18, 752(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f19, 760(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f21, 776(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f22, 784(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f23, 792(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f24, 800(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f25, 808(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f26, 816(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f27, 824(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f28, 832(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f29, 840(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f30, 848(r1) # 8-byte Folded Spill
-; BE-NEXT: stfd f31, 856(r1) # 8-byte Folded Spill
-; BE-NEXT: fmr f31, f13
+; BE-NEXT: stdu r1, -624(r1)
+; BE-NEXT: std r0, 640(r1)
+; BE-NEXT: std r30, 608(r1) # 8-byte Folded Spill
; BE-NEXT: mr r30, r3
-; BE-NEXT: fmr f29, f12
-; BE-NEXT: fmr f30, f11
-; BE-NEXT: fmr f28, f10
-; BE-NEXT: fmr f27, f9
-; BE-NEXT: fmr f26, f8
-; BE-NEXT: fmr f25, f7
-; BE-NEXT: fmr f24, f6
-; BE-NEXT: fmr f23, f5
-; BE-NEXT: fmr f22, f4
-; BE-NEXT: fmr f21, f3
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f20
-; BE-NEXT: std r3, 304(r1) # 8-byte Folded Spill
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f22
-; BE-NEXT: std r3, 296(r1) # 8-byte Folded Spill
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f21
-; BE-NEXT: std r3, 280(r1) # 8-byte Folded Spill
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f24
-; BE-NEXT: std r3, 264(r1) # 8-byte Folded Spill
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f23
-; BE-NEXT: std r3, 248(r1) # 8-byte Folded Spill
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f26
-; BE-NEXT: std r3, 232(r1) # 8-byte Folded Spill
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f25
+; BE-NEXT: lhz r3, 926(r1)
+; BE-NEXT: std r14, 480(r1) # 8-byte Folded Spill
+; BE-NEXT: lhz r14, 822(r1)
+; BE-NEXT: std r15, 488(r1) # 8-byte Folded Spill
+; BE-NEXT: std r19, 520(r1) # 8-byte Folded Spill
; BE-NEXT: std r3, 216(r1) # 8-byte Folded Spill
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f28
+; BE-NEXT: lhz r3, 934(r1)
+; BE-NEXT: lhz r15, 814(r1)
+; BE-NEXT: lhz r19, 742(r1)
+; BE-NEXT: std r22, 544(r1) # 8-byte Folded Spill
+; BE-NEXT: std r23, 552(r1) # 8-byte Folded Spill
+; BE-NEXT: std r25, 568(r1) # 8-byte Folded Spill
+; BE-NEXT: std r26, 576(r1) # 8-byte Folded Spill
+; BE-NEXT: std r3, 208(r1) # 8-byte Folded Spill
+; BE-NEXT: lhz r3, 910(r1)
+; BE-NEXT: lhz r26, 766(r1)
+; BE-NEXT: lhz r25, 774(r1)
+; BE-NEXT: std r27, 584(r1) # 8-byte Folded Spill
+; BE-NEXT: std r28, 592(r1) # 8-byte Folded Spill
+; BE-NEXT: std r29, 600(r1) # 8-byte Folded Spill
+; BE-NEXT: std r31, 616(r1) # 8-byte Folded Spill
; BE-NEXT: std r3, 200(r1) # 8-byte Folded Spill
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f27
+; BE-NEXT: lhz r3, 918(r1)
+; BE-NEXT: lhz r31, 798(r1)
+; BE-NEXT: lhz r29, 806(r1)
+; BE-NEXT: lhz r28, 782(r1)
+; BE-NEXT: lhz r27, 790(r1)
+; BE-NEXT: lhz r23, 750(r1)
+; BE-NEXT: lhz r22, 758(r1)
+; BE-NEXT: std r16, 496(r1) # 8-byte Folded Spill
+; BE-NEXT: std r17, 504(r1) # 8-byte Folded Spill
+; BE-NEXT: std r3, 192(r1) # 8-byte Folded Spill
+; BE-NEXT: lhz r3, 894(r1)
+; BE-NEXT: mr r17, r7
+; BE-NEXT: mr r16, r4
+; BE-NEXT: std r18, 512(r1) # 8-byte Folded Spill
+; BE-NEXT: std r20, 528(r1) # 8-byte Folded Spill
+; BE-NEXT: std r21, 536(r1) # 8-byte Folded Spill
+; BE-NEXT: std r24, 560(r1) # 8-byte Folded Spill
; BE-NEXT: std r3, 184(r1) # 8-byte Folded Spill
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f29
+; BE-NEXT: lhz r3, 902(r1)
+; BE-NEXT: mr r24, r10
+; BE-NEXT: mr r20, r9
+; BE-NEXT: mr r21, r8
+; BE-NEXT: mr r18, r6
+; BE-NEXT: std r3, 176(r1) # 8-byte Folded Spill
+; BE-NEXT: lhz r3, 878(r1)
; BE-NEXT: std r3, 168(r1) # 8-byte Folded Spill
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f30
+; BE-NEXT: lhz r3, 886(r1)
+; BE-NEXT: std r3, 160(r1) # 8-byte Folded Spill
+; BE-NEXT: lhz r3, 862(r1)
; BE-NEXT: std r3, 152(r1) # 8-byte Folded Spill
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: lfs f1, 1028(r1)
+; BE-NEXT: lhz r3, 870(r1)
+; BE-NEXT: std r3, 144(r1) # 8-byte Folded Spill
+; BE-NEXT: lhz r3, 846(r1)
; BE-NEXT: std r3, 136(r1) # 8-byte Folded Spill
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f31
+; BE-NEXT: lhz r3, 854(r1)
+; BE-NEXT: std r3, 128(r1) # 8-byte Folded Spill
+; BE-NEXT: lhz r3, 830(r1)
; BE-NEXT: std r3, 120(r1) # 8-byte Folded Spill
-; BE-NEXT: bl __truncsfhf2
-; BE-NEXT: nop
-; BE-NEXT: lfs f1, 1044(r1)
+; BE-NEXT: lhz r3, 838(r1)
; BE-NEXT: std r3, 112(r1) # 8-byte Folded Spill
-; BE-NEXT: bl __truncsfhf2
+; BE-NEXT: clrldi r3, r5, 48
+; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: lfs f1, 1036(r1)
-; BE-NEXT: mr r15, r3
-; BE-NEXT: bl __truncsfhf2
+; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: lfs f1, 1060(r1)
-; BE-NEXT: mr r14, r3
-; BE-NEXT: bl __truncsfhf2
+; BE-NEXT: std r3, 424(r1)
+; BE-NEXT: clrldi r3, r16, 48
+; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: lfs f1, 1052(r1)
-; BE-NEXT: mr r31, r3
-; BE-NEXT: bl __truncsfhf2
+; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: lfs f1, 1076(r1)
-; BE-NEXT: mr r29, r3
-; BE-NEXT: bl __truncsfhf2
+; BE-NEXT: std r3, 416(r1)
+; BE-NEXT: clrldi r3, r17, 48
+; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: lfs f1, 1068(r1)
-; BE-NEXT: mr r28, r3
-; BE-NEXT: bl __truncsfhf2
+; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: lfs f1, 1092(r1)
-; BE-NEXT: mr r27, r3
-; BE-NEXT: bl __truncsfhf2
+; BE-NEXT: std r3, 440(r1)
+; BE-NEXT: clrldi r3, r18, 48
+; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: lfs f1, 1084(r1)
-; BE-NEXT: mr r26, r3
-; BE-NEXT: bl __truncsfhf2
+; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: lfs f1, 1108(r1)
-; BE-NEXT: mr r25, r3
-; BE-NEXT: bl __truncsfhf2
+; BE-NEXT: std r3, 432(r1)
+; BE-NEXT: clrldi r3, r20, 48
+; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: lfs f1, 1100(r1)
-; BE-NEXT: mr r24, r3
-; BE-NEXT: bl __truncsfhf2
+; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: lfs f1, 1124(r1)
-; BE-NEXT: mr r23, r3
-; BE-NEXT: bl __truncsfhf2
+; BE-NEXT: std r3, 456(r1)
+; BE-NEXT: clrldi r3, r21, 48
+; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: lfs f1, 1116(r1)
-; BE-NEXT: mr r22, r3
-; BE-NEXT: bl __truncsfhf2
+; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: lfs f1, 1140(r1)
-; BE-NEXT: mr r21, r3
-; BE-NEXT: bl __truncsfhf2
+; BE-NEXT: std r3, 448(r1)
+; BE-NEXT: mr r3, r19
+; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: lfs f1, 1132(r1)
-; BE-NEXT: mr r20, r3
-; BE-NEXT: bl __truncsfhf2
+; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: lfs f1, 1156(r1)
-; BE-NEXT: mr r19, r3
-; BE-NEXT: bl __truncsfhf2
+; BE-NEXT: std r3, 472(r1)
+; BE-NEXT: clrldi r3, r24, 48
+; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: lfs f1, 1148(r1)
-; BE-NEXT: mr r18, r3
-; BE-NEXT: bl __truncsfhf2
+; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: lfs f1, 1172(r1)
-; BE-NEXT: mr r17, r3
-; BE-NEXT: bl __truncsfhf2
+; BE-NEXT: std r3, 464(r1)
+; BE-NEXT: mr r3, r22
+; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: lfs f1, 1164(r1)
-; BE-NEXT: mr r16, r3
-; BE-NEXT: bl __truncsfhf2
+; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r3, 48
+; BE-NEXT: std r3, 232(r1)
+; BE-NEXT: mr r3, r23
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r16, 48
-; BE-NEXT: stfs f1, 316(r1) # 4-byte Folded Spill
-; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r17, 48
-; BE-NEXT: stfs f1, 312(r1) # 4-byte Folded Spill
+; BE-NEXT: std r3, 224(r1)
+; BE-NEXT: mr r3, r25
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r18, 48
-; BE-NEXT: stfs f1, 292(r1) # 4-byte Folded Spill
-; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r19, 48
-; BE-NEXT: stfs f1, 276(r1) # 4-byte Folded Spill
+; BE-NEXT: std r3, 248(r1)
+; BE-NEXT: mr r3, r26
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r20, 48
-; BE-NEXT: stfs f1, 260(r1) # 4-byte Folded Spill
-; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r21, 48
-; BE-NEXT: stfs f1, 244(r1) # 4-byte Folded Spill
+; BE-NEXT: std r3, 240(r1)
+; BE-NEXT: mr r3, r27
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r22, 48
-; BE-NEXT: stfs f1, 228(r1) # 4-byte Folded Spill
-; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r23, 48
-; BE-NEXT: stfs f1, 212(r1) # 4-byte Folded Spill
+; BE-NEXT: std r3, 264(r1)
+; BE-NEXT: mr r3, r28
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r24, 48
-; BE-NEXT: stfs f1, 196(r1) # 4-byte Folded Spill
+; BE-NEXT: bl lrintf
+; BE-NEXT: nop
+; BE-NEXT: std r3, 256(r1)
+; BE-NEXT: mr r3, r29
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r25, 48
-; BE-NEXT: stfs f1, 180(r1) # 4-byte Folded Spill
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r26, 48
-; BE-NEXT: stfs f1, 164(r1) # 4-byte Folded Spill
-; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r27, 48
-; BE-NEXT: stfs f1, 148(r1) # 4-byte Folded Spill
+; BE-NEXT: std r3, 280(r1)
+; BE-NEXT: mr r3, r31
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r28, 48
-; BE-NEXT: stfs f1, 132(r1) # 4-byte Folded Spill
-; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r29, 48
-; BE-NEXT: fmr f18, f1
+; BE-NEXT: std r3, 272(r1)
+; BE-NEXT: mr r3, r14
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r31, 48
-; BE-NEXT: fmr f17, f1
-; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r14, 48
-; BE-NEXT: fmr f16, f1
+; BE-NEXT: std r3, 296(r1)
+; BE-NEXT: mr r3, r15
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: clrldi r3, r15, 48
-; BE-NEXT: fmr f15, f1
-; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: bl lrintf
; BE-NEXT: nop
+; BE-NEXT: std r3, 288(r1)
; BE-NEXT: ld r3, 112(r1) # 8-byte Folded Reload
-; BE-NEXT: fmr f14, f1
-; BE-NEXT: clrldi r3, r3, 48
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: ld r3, 120(r1) # 8-byte Folded Reload
-; BE-NEXT: fmr f31, f1
-; BE-NEXT: clrldi r3, r3, 48
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: ld r3, 136(r1) # 8-byte Folded Reload
-; BE-NEXT: fmr f30, f1
-; BE-NEXT: clrldi r3, r3, 48
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: ld r3, 152(r1) # 8-byte Folded Reload
-; BE-NEXT: fmr f29, f1
-; BE-NEXT: clrldi r3, r3, 48
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: ld r3, 168(r1) # 8-byte Folded Reload
-; BE-NEXT: fmr f28, f1
-; BE-NEXT: clrldi r3, r3, 48
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: ld r3, 184(r1) # 8-byte Folded Reload
-; BE-NEXT: fmr f27, f1
-; BE-NEXT: clrldi r3, r3, 48
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: ld r3, 200(r1) # 8-byte Folded Reload
-; BE-NEXT: fmr f26, f1
-; BE-NEXT: clrldi r3, r3, 48
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: ld r3, 216(r1) # 8-byte Folded Reload
-; BE-NEXT: fmr f25, f1
-; BE-NEXT: clrldi r3, r3, 48
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: ld r3, 232(r1) # 8-byte Folded Reload
-; BE-NEXT: fmr f24, f1
-; BE-NEXT: clrldi r3, r3, 48
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: ld r3, 248(r1) # 8-byte Folded Reload
-; BE-NEXT: fmr f23, f1
-; BE-NEXT: clrldi r3, r3, 48
-; BE-NEXT: bl __extendhfsf2
-; BE-NEXT: nop
-; BE-NEXT: ld r3, 264(r1) # 8-byte Folded Reload
-; BE-NEXT: fmr f22, f1
-; BE-NEXT: clrldi r3, r3, 48
-; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: ld r3, 280(r1) # 8-byte Folded Reload
-; BE-NEXT: fmr f21, f1
-; BE-NEXT: clrldi r3, r3, 48
+; BE-NEXT: std r3, 312(r1)
+; BE-NEXT: ld r3, 120(r1) # 8-byte Folded Reload
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: ld r3, 296(r1) # 8-byte Folded Reload
-; BE-NEXT: fmr f20, f1
-; BE-NEXT: clrldi r3, r3, 48
-; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: ld r3, 304(r1) # 8-byte Folded Reload
-; BE-NEXT: fmr f19, f1
-; BE-NEXT: clrldi r3, r3, 48
+; BE-NEXT: std r3, 304(r1)
+; BE-NEXT: ld r3, 128(r1) # 8-byte Folded Reload
; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f19
; BE-NEXT: std r3, 328(r1)
-; BE-NEXT: bl lrintf
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f20
-; BE-NEXT: std r3, 320(r1)
-; BE-NEXT: bl lrintf
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f21
-; BE-NEXT: std r3, 344(r1)
-; BE-NEXT: bl lrintf
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f22
-; BE-NEXT: std r3, 336(r1)
-; BE-NEXT: bl lrintf
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f23
-; BE-NEXT: std r3, 360(r1)
-; BE-NEXT: bl lrintf
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f24
-; BE-NEXT: std r3, 352(r1)
-; BE-NEXT: bl lrintf
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f25
-; BE-NEXT: std r3, 376(r1)
-; BE-NEXT: bl lrintf
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f26
-; BE-NEXT: std r3, 368(r1)
-; BE-NEXT: bl lrintf
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f27
-; BE-NEXT: std r3, 392(r1)
-; BE-NEXT: bl lrintf
-; BE-NEXT: nop
-; BE-NEXT: fmr f1, f28
-; BE-NEXT: std r3, 384(r1)
-; BE-NEXT: bl lrintf
+; BE-NEXT: ld r3, 136(r1) # 8-byte Folded Reload
+; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f29
-; BE-NEXT: std r3, 408(r1)
; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f30
-; BE-NEXT: std r3, 400(r1)
-; BE-NEXT: bl lrintf
+; BE-NEXT: std r3, 320(r1)
+; BE-NEXT: ld r3, 144(r1) # 8-byte Folded Reload
+; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f31
-; BE-NEXT: std r3, 424(r1)
; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f14
-; BE-NEXT: std r3, 416(r1)
-; BE-NEXT: bl lrintf
+; BE-NEXT: std r3, 344(r1)
+; BE-NEXT: ld r3, 152(r1) # 8-byte Folded Reload
+; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f15
-; BE-NEXT: std r3, 440(r1)
; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f16
-; BE-NEXT: std r3, 432(r1)
-; BE-NEXT: bl lrintf
+; BE-NEXT: std r3, 336(r1)
+; BE-NEXT: ld r3, 160(r1) # 8-byte Folded Reload
+; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f17
-; BE-NEXT: std r3, 456(r1)
; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: fmr f1, f18
-; BE-NEXT: std r3, 448(r1)
-; BE-NEXT: bl lrintf
+; BE-NEXT: std r3, 360(r1)
+; BE-NEXT: ld r3, 168(r1) # 8-byte Folded Reload
+; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: lfs f1, 132(r1) # 4-byte Folded Reload
-; BE-NEXT: std r3, 472(r1)
; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: lfs f1, 148(r1) # 4-byte Folded Reload
-; BE-NEXT: std r3, 464(r1)
-; BE-NEXT: bl lrintf
+; BE-NEXT: std r3, 352(r1)
+; BE-NEXT: ld r3, 176(r1) # 8-byte Folded Reload
+; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: lfs f1, 164(r1) # 4-byte Folded Reload
-; BE-NEXT: std r3, 488(r1)
; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: lfs f1, 180(r1) # 4-byte Folded Reload
-; BE-NEXT: std r3, 480(r1)
-; BE-NEXT: bl lrintf
+; BE-NEXT: std r3, 376(r1)
+; BE-NEXT: ld r3, 184(r1) # 8-byte Folded Reload
+; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: lfs f1, 196(r1) # 4-byte Folded Reload
-; BE-NEXT: std r3, 504(r1)
; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: lfs f1, 212(r1) # 4-byte Folded Reload
-; BE-NEXT: std r3, 496(r1)
-; BE-NEXT: bl lrintf
+; BE-NEXT: std r3, 368(r1)
+; BE-NEXT: ld r3, 192(r1) # 8-byte Folded Reload
+; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: lfs f1, 228(r1) # 4-byte Folded Reload
-; BE-NEXT: std r3, 520(r1)
; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: lfs f1, 244(r1) # 4-byte Folded Reload
-; BE-NEXT: std r3, 512(r1)
-; BE-NEXT: bl lrintf
+; BE-NEXT: std r3, 392(r1)
+; BE-NEXT: ld r3, 200(r1) # 8-byte Folded Reload
+; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: lfs f1, 260(r1) # 4-byte Folded Reload
-; BE-NEXT: std r3, 536(r1)
; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: lfs f1, 276(r1) # 4-byte Folded Reload
-; BE-NEXT: std r3, 528(r1)
-; BE-NEXT: bl lrintf
+; BE-NEXT: std r3, 384(r1)
+; BE-NEXT: ld r3, 208(r1) # 8-byte Folded Reload
+; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: lfs f1, 292(r1) # 4-byte Folded Reload
-; BE-NEXT: std r3, 552(r1)
; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: lfs f1, 312(r1) # 4-byte Folded Reload
-; BE-NEXT: std r3, 544(r1)
-; BE-NEXT: bl lrintf
+; BE-NEXT: std r3, 408(r1)
+; BE-NEXT: ld r3, 216(r1) # 8-byte Folded Reload
+; BE-NEXT: bl __extendhfsf2
; BE-NEXT: nop
-; BE-NEXT: lfs f1, 316(r1) # 4-byte Folded Reload
-; BE-NEXT: std r3, 568(r1)
; BE-NEXT: bl lrintf
; BE-NEXT: nop
-; BE-NEXT: std r3, 560(r1)
-; BE-NEXT: addi r3, r1, 320
+; BE-NEXT: std r3, 400(r1)
+; BE-NEXT: addi r3, r1, 416
; BE-NEXT: lxvd2x vs0, 0, r3
-; BE-NEXT: addi r3, r1, 336
+; BE-NEXT: addi r3, r1, 432
; BE-NEXT: lxvd2x vs1, 0, r3
-; BE-NEXT: addi r3, r1, 352
+; BE-NEXT: addi r3, r1, 448
; BE-NEXT: lxvd2x vs2, 0, r3
-; BE-NEXT: addi r3, r1, 368
+; BE-NEXT: addi r3, r1, 464
; BE-NEXT: lxvd2x vs3, 0, r3
-; BE-NEXT: addi r3, r1, 384
+; BE-NEXT: addi r3, r1, 224
; BE-NEXT: lxvd2x vs4, 0, r3
-; BE-NEXT: addi r3, r1, 400
+; BE-NEXT: addi r3, r1, 240
; BE-NEXT: lxvd2x vs5, 0, r3
-; BE-NEXT: addi r3, r1, 416
+; BE-NEXT: addi r3, r1, 256
; BE-NEXT: lxvd2x vs6, 0, r3
-; BE-NEXT: addi r3, r1, 432
+; BE-NEXT: addi r3, r1, 272
; BE-NEXT: lxvd2x vs7, 0, r3
-; BE-NEXT: addi r3, r1, 448
+; BE-NEXT: addi r3, r1, 288
; BE-NEXT: lxvd2x vs8, 0, r3
-; BE-NEXT: addi r3, r1, 464
+; BE-NEXT: addi r3, r1, 304
; BE-NEXT: lxvd2x vs9, 0, r3
-; BE-NEXT: addi r3, r1, 480
+; BE-NEXT: addi r3, r1, 320
; BE-NEXT: lxvd2x vs10, 0, r3
-; BE-NEXT: addi r3, r1, 496
+; BE-NEXT: addi r3, r1, 336
; BE-NEXT: lxvd2x vs11, 0, r3
-; BE-NEXT: addi r3, r1, 512
+; BE-NEXT: addi r3, r1, 352
; BE-NEXT: lxvd2x vs12, 0, r3
-; BE-NEXT: addi r3, r1, 528
+; BE-NEXT: addi r3, r1, 368
; BE-NEXT: lxvd2x vs13, 0, r3
-; BE-NEXT: addi r3, r1, 544
+; BE-NEXT: addi r3, r1, 384
; BE-NEXT: lxvd2x v2, 0, r3
-; BE-NEXT: addi r3, r1, 560
+; BE-NEXT: addi r3, r1, 400
; BE-NEXT: lxvd2x v3, 0, r3
; BE-NEXT: li r3, 240
; BE-NEXT: stxvd2x v3, r30, r3
@@ -2194,43 +1542,25 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) nounwind {
; BE-NEXT: li r3, 16
; BE-NEXT: stxvd2x vs1, r30, r3
; BE-NEXT: stxvd2x vs0, 0, r30
-; BE-NEXT: lfd f31, 856(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f30, 848(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f29, 840(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f28, 832(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f27, 824(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f26, 816(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f25, 808(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f24, 800(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f23, 792(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f22, 784(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f21, 776(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f20, 768(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f19, 760(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f18, 752(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f17, 744(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f16, 736(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f15, 728(r1) # 8-byte Folded Reload
-; BE-NEXT: lfd f14, 720(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r31, 712(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r30, 704(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r29, 696(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r28, 688(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r27, 680(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r26, 672(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r25, 664(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r24, 656(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r23, 648(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r22, 640(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r21, 632(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r20, 624(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r19, 616(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r18, 608(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r17, 600(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r16, 592(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r15, 584(r1) # 8-byte Folded Reload
-; BE-NEXT: ld r14, 576(r1) # 8-byte Folded Reload
-; BE-NEXT: addi r1, r1, 864
+; BE-NEXT: ld r31, 616(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r30, 608(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r29, 600(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r28, 592(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r27, 584(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r26, 576(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r25, 568(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r24, 560(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r23, 552(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r22, 544(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r21, 536(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r20, 528(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r19, 520(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r18, 512(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r17, 504(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r16, 496(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r15, 488(r1) # 8-byte Folded Reload
+; BE-NEXT: ld r14, 480(r1) # 8-byte Folded Reload
+; BE-NEXT: addi r1, r1, 624
; BE-NEXT: ld r0, 16(r1)
; BE-NEXT: mtlr r0
; BE-NEXT: blr
@@ -2238,508 +1568,334 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) nounwind {
; CHECK-LABEL: lrint_v32i64_v32f16:
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
-; CHECK-NEXT: stdu r1, -688(r1)
-; CHECK-NEXT: li r4, 208
-; CHECK-NEXT: std r0, 704(r1)
-; CHECK-NEXT: std r14, 400(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r15, 408(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r16, 416(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r17, 424(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r18, 432(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r19, 440(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stxvd2x v20, r1, r4 # 16-byte Folded Spill
-; CHECK-NEXT: li r4, 224
-; CHECK-NEXT: std r20, 448(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r21, 456(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r22, 464(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r23, 472(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r24, 480(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r25, 488(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stxvd2x v21, r1, r4 # 16-byte Folded Spill
-; CHECK-NEXT: li r4, 240
-; CHECK-NEXT: std r26, 496(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r27, 504(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r28, 512(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r29, 520(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r30, 528(r1) # 8-byte Folded Spill
+; CHECK-NEXT: stdu r1, -576(r1)
+; CHECK-NEXT: std r0, 592(r1)
+; CHECK-NEXT: std r30, 560(r1) # 8-byte Folded Spill
; CHECK-NEXT: mr r30, r3
-; CHECK-NEXT: stxvd2x v22, r1, r4 # 16-byte Folded Spill
-; CHECK-NEXT: li r4, 256
-; CHECK-NEXT: std r31, 536(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stfd f14, 544(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stfd f15, 552(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stfd f16, 560(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stfd f17, 568(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stfd f18, 576(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stxvd2x v23, r1, r4 # 16-byte Folded Spill
-; CHECK-NEXT: li r4, 272
-; CHECK-NEXT: stfd f19, 584(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stfd f20, 592(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f20, f2
-; CHECK-NEXT: stfd f21, 600(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f21, f3
-; CHECK-NEXT: stfd f22, 608(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f22, f4
-; CHECK-NEXT: stxvd2x v24, r1, r4 # 16-byte Folded Spill
-; CHECK-NEXT: li r4, 288
-; CHECK-NEXT: stfd f23, 616(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f23, f5
-; CHECK-NEXT: stfd f24, 624(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f24, f6
-; CHECK-NEXT: stfd f25, 632(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f25, f7
-; CHECK-NEXT: stxvd2x v25, r1, r4 # 16-byte Folded Spill
-; CHECK-NEXT: li r4, 304
-; CHECK-NEXT: stfd f26, 640(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f26, f8
-; CHECK-NEXT: stfd f27, 648(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f27, f9
-; CHECK-NEXT: stfd f28, 656(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f28, f10
-; CHECK-NEXT: stxvd2x v26, r1, r4 # 16-byte Folded Spill
-; CHECK-NEXT: li r4, 320
-; CHECK-NEXT: stfd f29, 664(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f29, f11
-; CHECK-NEXT: stfd f30, 672(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f30, f12
-; CHECK-NEXT: stfd f31, 680(r1) # 8-byte Folded Spill
-; CHECK-NEXT: fmr f31, f13
-; CHECK-NEXT: stxvd2x v27, r1, r4 # 16-byte Folded Spill
-; CHECK-NEXT: li r4, 336
-; CHECK-NEXT: stxvd2x v28, r1, r4 # 16-byte Folded Spill
-; CHECK-NEXT: li r4, 352
-; CHECK-NEXT: stxvd2x v29, r1, r4 # 16-byte Folded Spill
-; CHECK-NEXT: li r4, 368
-; CHECK-NEXT: stxvd2x v30, r1, r4 # 16-byte Folded Spill
-; CHECK-NEXT: li r4, 384
-; CHECK-NEXT: stxvd2x v31, r1, r4 # 16-byte Folded Spill
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f20
+; CHECK-NEXT: lhz r3, 864(r1)
+; CHECK-NEXT: li r11, 240
+; CHECK-NEXT: std r14, 432(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r19, 472(r1) # 8-byte Folded Spill
+; CHECK-NEXT: lhz r14, 744(r1)
+; CHECK-NEXT: stxvd2x v20, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT: li r11, 256
+; CHECK-NEXT: std r22, 496(r1) # 8-byte Folded Spill
+; CHECK-NEXT: lhz r22, 680(r1)
+; CHECK-NEXT: std r3, 216(r1) # 8-byte Folded Spill
+; CHECK-NEXT: lhz r3, 856(r1)
+; CHECK-NEXT: lhz r19, 672(r1)
+; CHECK-NEXT: stxvd2x v21, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT: li r11, 272
+; CHECK-NEXT: std r23, 504(r1) # 8-byte Folded Spill
+; CHECK-NEXT: lhz r23, 688(r1)
+; CHECK-NEXT: stxvd2x v22, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT: std r3, 184(r1) # 8-byte Folded Spill
+; CHECK-NEXT: lhz r3, 848(r1)
+; CHECK-NEXT: li r11, 288
+; CHECK-NEXT: std r25, 520(r1) # 8-byte Folded Spill
+; CHECK-NEXT: lhz r25, 696(r1)
+; CHECK-NEXT: stxvd2x v23, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT: li r11, 304
+; CHECK-NEXT: std r26, 528(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r27, 536(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r28, 544(r1) # 8-byte Folded Spill
+; CHECK-NEXT: lhz r28, 720(r1)
+; CHECK-NEXT: lhz r27, 712(r1)
+; CHECK-NEXT: lhz r26, 704(r1)
+; CHECK-NEXT: stxvd2x v24, r1, r11 # 16-byte Folded Spill
; CHECK-NEXT: std r3, 176(r1) # 8-byte Folded Spill
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f21
-; CHECK-NEXT: std r3, 160(r1) # 8-byte Folded Spill
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f22
+; CHECK-NEXT: lhz r3, 840(r1)
+; CHECK-NEXT: li r11, 320
+; CHECK-NEXT: std r29, 552(r1) # 8-byte Folded Spill
+; CHECK-NEXT: lhz r29, 728(r1)
+; CHECK-NEXT: stxvd2x v25, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT: std r3, 152(r1) # 8-byte Folded Spill
+; CHECK-NEXT: lhz r3, 832(r1)
+; CHECK-NEXT: li r11, 336
+; CHECK-NEXT: std r31, 568(r1) # 8-byte Folded Spill
+; CHECK-NEXT: lhz r31, 736(r1)
+; CHECK-NEXT: stxvd2x v26, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT: li r11, 352
+; CHECK-NEXT: std r15, 440(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r16, 448(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r17, 456(r1) # 8-byte Folded Spill
+; CHECK-NEXT: mr r17, r6
+; CHECK-NEXT: mr r16, r5
+; CHECK-NEXT: stxvd2x v27, r1, r11 # 16-byte Folded Spill
; CHECK-NEXT: std r3, 144(r1) # 8-byte Folded Spill
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f23
-; CHECK-NEXT: std r3, 128(r1) # 8-byte Folded Spill
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f24
+; CHECK-NEXT: lhz r3, 824(r1)
+; CHECK-NEXT: li r11, 368
+; CHECK-NEXT: std r18, 464(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r20, 480(r1) # 8-byte Folded Spill
+; CHECK-NEXT: mr r20, r8
+; CHECK-NEXT: mr r18, r7
+; CHECK-NEXT: stxvd2x v28, r1, r11 # 16-byte Folded Spill
; CHECK-NEXT: std r3, 120(r1) # 8-byte Folded Spill
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f25
+; CHECK-NEXT: lhz r3, 816(r1)
+; CHECK-NEXT: li r11, 384
+; CHECK-NEXT: std r21, 488(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r24, 512(r1) # 8-byte Folded Spill
+; CHECK-NEXT: mr r24, r10
+; CHECK-NEXT: mr r21, r9
+; CHECK-NEXT: stxvd2x v29, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT: li r11, 400
; CHECK-NEXT: std r3, 112(r1) # 8-byte Folded Spill
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f26
+; CHECK-NEXT: lhz r3, 808(r1)
+; CHECK-NEXT: stxvd2x v30, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT: li r11, 416
; CHECK-NEXT: std r3, 104(r1) # 8-byte Folded Spill
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f27
+; CHECK-NEXT: lhz r3, 800(r1)
+; CHECK-NEXT: stxvd2x v31, r1, r11 # 16-byte Folded Spill
; CHECK-NEXT: std r3, 96(r1) # 8-byte Folded Spill
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f28
+; CHECK-NEXT: lhz r3, 792(r1)
; CHECK-NEXT: std r3, 88(r1) # 8-byte Folded Spill
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f29
+; CHECK-NEXT: lhz r3, 784(r1)
; CHECK-NEXT: std r3, 80(r1) # 8-byte Folded Spill
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f30
+; CHECK-NEXT: lhz r3, 776(r1)
; CHECK-NEXT: std r3, 72(r1) # 8-byte Folded Spill
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f31
+; CHECK-NEXT: lhz r3, 768(r1)
; CHECK-NEXT: std r3, 64(r1) # 8-byte Folded Spill
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: lfs f1, 832(r1)
+; CHECK-NEXT: lhz r3, 760(r1)
; CHECK-NEXT: std r3, 56(r1) # 8-byte Folded Spill
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: lfs f1, 840(r1)
+; CHECK-NEXT: lhz r3, 752(r1)
; CHECK-NEXT: std r3, 48(r1) # 8-byte Folded Spill
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: lfs f1, 848(r1)
-; CHECK-NEXT: mr r15, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: lfs f1, 856(r1)
-; CHECK-NEXT: mr r14, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: lfs f1, 864(r1)
-; CHECK-NEXT: mr r31, r3
-; CHECK-NEXT: bl __truncsfhf2
+; CHECK-NEXT: clrldi r3, r4, 48
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: lfs f1, 872(r1)
-; CHECK-NEXT: mr r29, r3
-; CHECK-NEXT: bl __truncsfhf2
+; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: lfs f1, 880(r1)
-; CHECK-NEXT: mr r28, r3
-; CHECK-NEXT: bl __truncsfhf2
+; CHECK-NEXT: mr r15, r3
+; CHECK-NEXT: clrldi r3, r16, 48
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: lfs f1, 888(r1)
-; CHECK-NEXT: mr r27, r3
-; CHECK-NEXT: bl __truncsfhf2
+; CHECK-NEXT: mtvsrd v31, r15
+; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: lfs f1, 896(r1)
-; CHECK-NEXT: mr r26, r3
-; CHECK-NEXT: bl __truncsfhf2
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: li r3, 224
+; CHECK-NEXT: xxmrghd vs0, vs0, v31
+; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: clrldi r3, r17, 48
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: lfs f1, 904(r1)
-; CHECK-NEXT: mr r25, r3
-; CHECK-NEXT: bl __truncsfhf2
+; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: lfs f1, 912(r1)
-; CHECK-NEXT: mr r24, r3
-; CHECK-NEXT: bl __truncsfhf2
+; CHECK-NEXT: mr r17, r3
+; CHECK-NEXT: clrldi r3, r18, 48
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: lfs f1, 920(r1)
-; CHECK-NEXT: mr r23, r3
-; CHECK-NEXT: bl __truncsfhf2
+; CHECK-NEXT: mtvsrd v31, r17
+; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: lfs f1, 928(r1)
-; CHECK-NEXT: mr r22, r3
-; CHECK-NEXT: bl __truncsfhf2
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: li r3, 192
+; CHECK-NEXT: xxmrghd vs0, vs0, v31
+; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: clrldi r3, r20, 48
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: lfs f1, 936(r1)
-; CHECK-NEXT: mr r21, r3
-; CHECK-NEXT: bl __truncsfhf2
+; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: lfs f1, 944(r1)
; CHECK-NEXT: mr r20, r3
-; CHECK-NEXT: bl __truncsfhf2
+; CHECK-NEXT: clrldi r3, r21, 48
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: lfs f1, 952(r1)
-; CHECK-NEXT: mr r19, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: lfs f1, 960(r1)
-; CHECK-NEXT: mr r18, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: lfs f1, 968(r1)
-; CHECK-NEXT: mr r17, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: lfs f1, 976(r1)
-; CHECK-NEXT: mr r16, r3
-; CHECK-NEXT: bl __truncsfhf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r3, 48
-; CHECK-NEXT: bl __extendhfsf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: li r3, 204
-; CHECK-NEXT: stxsspx f1, r1, r3 # 4-byte Folded Spill
-; CHECK-NEXT: clrldi r3, r16, 48
-; CHECK-NEXT: bl __extendhfsf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: li r3, 200
-; CHECK-NEXT: stxsspx f1, r1, r3 # 4-byte Folded Spill
-; CHECK-NEXT: clrldi r3, r17, 48
-; CHECK-NEXT: bl __extendhfsf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r18, 48
-; CHECK-NEXT: fmr f29, f1
-; CHECK-NEXT: bl __extendhfsf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r19, 48
-; CHECK-NEXT: fmr f28, f1
-; CHECK-NEXT: bl __extendhfsf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r20, 48
-; CHECK-NEXT: fmr f27, f1
-; CHECK-NEXT: bl __extendhfsf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r21, 48
-; CHECK-NEXT: fmr f26, f1
-; CHECK-NEXT: bl __extendhfsf2
+; CHECK-NEXT: mtvsrd v31, r20
+; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r22, 48
-; CHECK-NEXT: fmr f25, f1
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: li r3, 160
+; CHECK-NEXT: xxmrghd vs0, vs0, v31
+; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: mr r3, r19
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r23, 48
-; CHECK-NEXT: fmr f24, f1
-; CHECK-NEXT: bl __extendhfsf2
+; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
+; CHECK-NEXT: mr r21, r3
; CHECK-NEXT: clrldi r3, r24, 48
-; CHECK-NEXT: fmr f23, f1
-; CHECK-NEXT: bl __extendhfsf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r25, 48
-; CHECK-NEXT: fmr f22, f1
-; CHECK-NEXT: bl __extendhfsf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r26, 48
-; CHECK-NEXT: fmr f21, f1
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r27, 48
-; CHECK-NEXT: fmr f20, f1
-; CHECK-NEXT: bl __extendhfsf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r28, 48
-; CHECK-NEXT: fmr f19, f1
-; CHECK-NEXT: bl __extendhfsf2
-; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r29, 48
-; CHECK-NEXT: fmr f18, f1
-; CHECK-NEXT: bl __extendhfsf2
+; CHECK-NEXT: mtvsrd v31, r21
+; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r31, 48
-; CHECK-NEXT: fmr f17, f1
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: li r3, 128
+; CHECK-NEXT: xxmrghd vs0, v31, vs0
+; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT: mr r3, r22
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r14, 48
-; CHECK-NEXT: fmr f16, f1
-; CHECK-NEXT: bl __extendhfsf2
+; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: clrldi r3, r15, 48
-; CHECK-NEXT: fmr f15, f1
+; CHECK-NEXT: mr r24, r3
+; CHECK-NEXT: mr r3, r23
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: ld r3, 48(r1) # 8-byte Folded Reload
-; CHECK-NEXT: fmr f14, f1
-; CHECK-NEXT: clrldi r3, r3, 48
-; CHECK-NEXT: bl __extendhfsf2
+; CHECK-NEXT: mtvsrd v31, r24
+; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: ld r3, 56(r1) # 8-byte Folded Reload
-; CHECK-NEXT: fmr f30, f1
-; CHECK-NEXT: clrldi r3, r3, 48
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: mr r3, r25
+; CHECK-NEXT: xxmrghd v27, vs0, v31
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: ld r3, 64(r1) # 8-byte Folded Reload
-; CHECK-NEXT: xxlor v30, f1, f1
-; CHECK-NEXT: clrldi r3, r3, 48
-; CHECK-NEXT: bl __extendhfsf2
+; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: ld r3, 72(r1) # 8-byte Folded Reload
-; CHECK-NEXT: xxlor v29, f1, f1
-; CHECK-NEXT: clrldi r3, r3, 48
+; CHECK-NEXT: mr r25, r3
+; CHECK-NEXT: mr r3, r26
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: ld r3, 80(r1) # 8-byte Folded Reload
-; CHECK-NEXT: xxlor v28, f1, f1
-; CHECK-NEXT: clrldi r3, r3, 48
-; CHECK-NEXT: bl __extendhfsf2
+; CHECK-NEXT: mtvsrd v31, r25
+; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: ld r3, 88(r1) # 8-byte Folded Reload
-; CHECK-NEXT: xxlor v27, f1, f1
-; CHECK-NEXT: clrldi r3, r3, 48
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: mr r3, r27
+; CHECK-NEXT: xxmrghd v26, vs0, v31
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: ld r3, 96(r1) # 8-byte Folded Reload
-; CHECK-NEXT: xxlor v26, f1, f1
-; CHECK-NEXT: clrldi r3, r3, 48
-; CHECK-NEXT: bl __extendhfsf2
+; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: ld r3, 104(r1) # 8-byte Folded Reload
-; CHECK-NEXT: xxlor v25, f1, f1
-; CHECK-NEXT: clrldi r3, r3, 48
+; CHECK-NEXT: mr r27, r3
+; CHECK-NEXT: mr r3, r28
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: ld r3, 112(r1) # 8-byte Folded Reload
-; CHECK-NEXT: xxlor v24, f1, f1
-; CHECK-NEXT: clrldi r3, r3, 48
-; CHECK-NEXT: bl __extendhfsf2
+; CHECK-NEXT: mtvsrd v31, r27
+; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: ld r3, 120(r1) # 8-byte Folded Reload
-; CHECK-NEXT: xxlor v23, f1, f1
-; CHECK-NEXT: clrldi r3, r3, 48
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: mr r3, r29
+; CHECK-NEXT: xxmrghd v25, vs0, v31
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: ld r3, 128(r1) # 8-byte Folded Reload
-; CHECK-NEXT: xxlor v22, f1, f1
-; CHECK-NEXT: clrldi r3, r3, 48
-; CHECK-NEXT: bl __extendhfsf2
+; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: ld r3, 144(r1) # 8-byte Folded Reload
-; CHECK-NEXT: xxlor v21, f1, f1
-; CHECK-NEXT: clrldi r3, r3, 48
+; CHECK-NEXT: mr r29, r3
+; CHECK-NEXT: mr r3, r31
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: ld r3, 160(r1) # 8-byte Folded Reload
-; CHECK-NEXT: xxlor v20, f1, f1
-; CHECK-NEXT: clrldi r3, r3, 48
-; CHECK-NEXT: bl __extendhfsf2
+; CHECK-NEXT: mtvsrd v31, r29
+; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: ld r3, 176(r1) # 8-byte Folded Reload
-; CHECK-NEXT: fmr f31, f1
-; CHECK-NEXT: clrldi r3, r3, 48
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: mr r3, r14
+; CHECK-NEXT: xxmrghd v24, vs0, v31
; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f31
-; CHECK-NEXT: mtvsrd v31, r3
-; CHECK-NEXT: bl lrintf
-; CHECK-NEXT: nop
-; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: li r3, 176
-; CHECK-NEXT: xxlor f1, v20, v20
-; CHECK-NEXT: xxmrghd vs0, vs0, v31
-; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT: bl lrintf
+; CHECK-NEXT: mr r29, r3
+; CHECK-NEXT: ld r3, 48(r1) # 8-byte Folded Reload
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: xxlor f1, v21, v21
-; CHECK-NEXT: mtvsrd v31, r3
+; CHECK-NEXT: mtvsrd v31, r29
; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: li r3, 160
-; CHECK-NEXT: xxlor f1, v22, v22
-; CHECK-NEXT: xxmrghd vs0, vs0, v31
-; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT: bl lrintf
+; CHECK-NEXT: ld r3, 56(r1) # 8-byte Folded Reload
+; CHECK-NEXT: xxmrghd v23, vs0, v31
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: xxlor f1, v23, v23
-; CHECK-NEXT: mtvsrd v31, r3
; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: li r3, 144
-; CHECK-NEXT: xxlor f1, v24, v24
-; CHECK-NEXT: xxmrghd vs0, vs0, v31
-; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT: bl lrintf
+; CHECK-NEXT: mr r29, r3
+; CHECK-NEXT: ld r3, 64(r1) # 8-byte Folded Reload
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: xxlor f1, v25, v25
-; CHECK-NEXT: mtvsrd v31, r3
+; CHECK-NEXT: mtvsrd v31, r29
; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: li r3, 128
-; CHECK-NEXT: xxlor f1, v26, v26
-; CHECK-NEXT: xxmrghd vs0, vs0, v31
-; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT: bl lrintf
+; CHECK-NEXT: ld r3, 72(r1) # 8-byte Folded Reload
+; CHECK-NEXT: xxmrghd v22, vs0, v31
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: xxlor f1, v27, v27
-; CHECK-NEXT: mtvsrd v31, r3
; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: xxlor f1, v28, v28
-; CHECK-NEXT: xxmrghd v27, vs0, v31
-; CHECK-NEXT: bl lrintf
+; CHECK-NEXT: mr r29, r3
+; CHECK-NEXT: ld r3, 80(r1) # 8-byte Folded Reload
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: xxlor f1, v29, v29
-; CHECK-NEXT: mtvsrd v31, r3
+; CHECK-NEXT: mtvsrd v31, r29
; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: xxlor f1, v30, v30
-; CHECK-NEXT: xxmrghd v29, vs0, v31
-; CHECK-NEXT: bl lrintf
+; CHECK-NEXT: ld r3, 88(r1) # 8-byte Folded Reload
+; CHECK-NEXT: xxmrghd v21, vs0, v31
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f30
-; CHECK-NEXT: mtvsrd v31, r3
; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f14
-; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: xxmrghd v31, vs0, v31
-; CHECK-NEXT: bl lrintf
+; CHECK-NEXT: mr r29, r3
+; CHECK-NEXT: ld r3, 96(r1) # 8-byte Folded Reload
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f15
-; CHECK-NEXT: mtvsrd v30, r3
+; CHECK-NEXT: mtvsrd v31, r29
; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f16
; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: xxmrghd v30, vs0, v30
-; CHECK-NEXT: bl lrintf
+; CHECK-NEXT: ld r3, 104(r1) # 8-byte Folded Reload
+; CHECK-NEXT: xxmrghd v20, vs0, v31
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f17
-; CHECK-NEXT: mtvsrd v28, r3
; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f18
-; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: xxmrghd v28, vs0, v28
-; CHECK-NEXT: bl lrintf
+; CHECK-NEXT: mr r29, r3
+; CHECK-NEXT: ld r3, 112(r1) # 8-byte Folded Reload
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f19
-; CHECK-NEXT: mtvsrd v26, r3
+; CHECK-NEXT: mtvsrd v31, r29
; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f20
; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: xxmrghd v26, vs0, v26
-; CHECK-NEXT: bl lrintf
+; CHECK-NEXT: ld r3, 120(r1) # 8-byte Folded Reload
+; CHECK-NEXT: xxmrghd v31, vs0, v31
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f21
-; CHECK-NEXT: mtvsrd v24, r3
; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f22
-; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: xxmrghd v24, vs0, v24
-; CHECK-NEXT: bl lrintf
+; CHECK-NEXT: mr r29, r3
+; CHECK-NEXT: ld r3, 144(r1) # 8-byte Folded Reload
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f23
-; CHECK-NEXT: mtvsrd v22, r3
+; CHECK-NEXT: mtvsrd v30, r29
; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f24
; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: xxmrghd v22, vs0, v22
-; CHECK-NEXT: bl lrintf
+; CHECK-NEXT: ld r3, 152(r1) # 8-byte Folded Reload
+; CHECK-NEXT: xxmrghd v30, vs0, v30
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f25
-; CHECK-NEXT: mtvsrd v20, r3
; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f26
-; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: xxmrghd v20, vs0, v20
-; CHECK-NEXT: bl lrintf
+; CHECK-NEXT: mr r29, r3
+; CHECK-NEXT: ld r3, 176(r1) # 8-byte Folded Reload
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f27
-; CHECK-NEXT: mtvsrd v21, r3
+; CHECK-NEXT: mtvsrd v29, r29
; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f28
; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: xxmrghd v21, vs0, v21
-; CHECK-NEXT: bl lrintf
+; CHECK-NEXT: ld r3, 184(r1) # 8-byte Folded Reload
+; CHECK-NEXT: xxmrghd v29, vs0, v29
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: fmr f1, f29
-; CHECK-NEXT: mtvsrd v23, r3
; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
-; CHECK-NEXT: mtfprd f0, r3
-; CHECK-NEXT: li r3, 200
-; CHECK-NEXT: lxsspx f1, r1, r3 # 4-byte Folded Reload
-; CHECK-NEXT: xxmrghd v23, vs0, v23
-; CHECK-NEXT: bl lrintf
+; CHECK-NEXT: mr r29, r3
+; CHECK-NEXT: ld r3, 216(r1) # 8-byte Folded Reload
+; CHECK-NEXT: bl __extendhfsf2
; CHECK-NEXT: nop
-; CHECK-NEXT: mtvsrd v25, r3
-; CHECK-NEXT: li r3, 204
-; CHECK-NEXT: lxsspx f1, r1, r3 # 4-byte Folded Reload
+; CHECK-NEXT: mtvsrd v28, r29
; CHECK-NEXT: bl lrintf
; CHECK-NEXT: nop
; CHECK-NEXT: mtfprd f0, r3
; CHECK-NEXT: li r3, 240
-; CHECK-NEXT: xxswapd vs1, v23
+; CHECK-NEXT: xxswapd vs1, v29
; CHECK-NEXT: li r4, 128
-; CHECK-NEXT: xxswapd vs2, v21
-; CHECK-NEXT: xxswapd vs3, v31
-; CHECK-NEXT: xxmrghd v2, vs0, v25
+; CHECK-NEXT: xxswapd vs2, v30
+; CHECK-NEXT: xxswapd vs3, v25
+; CHECK-NEXT: xxmrghd v2, vs0, v28
; CHECK-NEXT: xxswapd vs0, v2
; CHECK-NEXT: stxvd2x vs0, r30, r3
; CHECK-NEXT: li r3, 224
@@ -2747,35 +1903,35 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) nounwind {
; CHECK-NEXT: li r3, 208
; CHECK-NEXT: stxvd2x vs2, r30, r3
; CHECK-NEXT: li r3, 192
-; CHECK-NEXT: xxswapd vs0, v20
+; CHECK-NEXT: xxswapd vs0, v31
; CHECK-NEXT: stxvd2x vs0, r30, r3
; CHECK-NEXT: li r3, 176
-; CHECK-NEXT: xxswapd vs1, v22
+; CHECK-NEXT: xxswapd vs1, v20
; CHECK-NEXT: stxvd2x vs1, r30, r3
; CHECK-NEXT: li r3, 160
-; CHECK-NEXT: xxswapd vs2, v28
-; CHECK-NEXT: xxswapd vs0, v24
+; CHECK-NEXT: xxswapd vs2, v23
+; CHECK-NEXT: xxswapd vs0, v21
; CHECK-NEXT: stxvd2x vs0, r30, r3
; CHECK-NEXT: li r3, 144
-; CHECK-NEXT: xxswapd vs1, v26
+; CHECK-NEXT: xxswapd vs1, v22
; CHECK-NEXT: stxvd2x vs1, r30, r3
; CHECK-NEXT: li r3, 128
; CHECK-NEXT: stxvd2x vs2, r30, r3
; CHECK-NEXT: li r3, 112
-; CHECK-NEXT: xxswapd vs0, v30
+; CHECK-NEXT: xxswapd vs0, v24
; CHECK-NEXT: stxvd2x vs0, r30, r3
; CHECK-NEXT: li r3, 96
; CHECK-NEXT: stxvd2x vs3, r30, r3
; CHECK-NEXT: li r3, 80
; CHECK-NEXT: lxvd2x vs2, r1, r4 # 16-byte Folded Reload
-; CHECK-NEXT: li r4, 144
-; CHECK-NEXT: xxswapd vs1, v29
+; CHECK-NEXT: li r4, 160
+; CHECK-NEXT: xxswapd vs1, v26
; CHECK-NEXT: stxvd2x vs1, r30, r3
; CHECK-NEXT: li r3, 64
; CHECK-NEXT: lxvd2x vs1, r1, r4 # 16-byte Folded Reload
-; CHECK-NEXT: li r4, 160
+; CHECK-NEXT: li r4, 192
; CHECK-NEXT: lxvd2x vs3, r1, r4 # 16-byte Folded Reload
-; CHECK-NEXT: li r4, 176
+; CHECK-NEXT: li r4, 224
; CHECK-NEXT: lxvd2x vs4, r1, r4 # 16-byte Folded Reload
; CHECK-NEXT: xxswapd vs0, v27
; CHECK-NEXT: stxvd2x vs0, r30, r3
@@ -2788,69 +1944,51 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) nounwind {
; CHECK-NEXT: li r3, 16
; CHECK-NEXT: xxswapd vs3, vs3
; CHECK-NEXT: stxvd2x vs3, r30, r3
-; CHECK-NEXT: li r3, 384
+; CHECK-NEXT: li r3, 416
; CHECK-NEXT: xxswapd vs4, vs4
; CHECK-NEXT: stxvd2x vs4, 0, r30
; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT: li r3, 368
-; CHECK-NEXT: lfd f31, 680(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f30, 672(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f29, 664(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f28, 656(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f27, 648(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f26, 640(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f25, 632(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f24, 624(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f23, 616(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f22, 608(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f21, 600(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f20, 592(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f19, 584(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f18, 576(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f17, 568(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f16, 560(r1) # 8-byte Folded Reload
+; CHECK-NEXT: li r3, 400
+; CHECK-NEXT: ld r31, 568(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r30, 560(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r29, 552(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r28, 544(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r27, 536(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r26, 528(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r25, 520(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r24, 512(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r23, 504(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r22, 496(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r21, 488(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r20, 480(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r19, 472(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r18, 464(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r17, 456(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r16, 448(r1) # 8-byte Folded Reload
; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT: li r3, 352
-; CHECK-NEXT: lfd f15, 552(r1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd f14, 544(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r31, 536(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r30, 528(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r29, 520(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r28, 512(r1) # 8-byte Folded Reload
+; CHECK-NEXT: li r3, 384
+; CHECK-NEXT: ld r15, 440(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r14, 432(r1) # 8-byte Folded Reload
; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT: li r3, 336
-; CHECK-NEXT: ld r27, 504(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r26, 496(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r25, 488(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r24, 480(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r23, 472(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r22, 464(r1) # 8-byte Folded Reload
+; CHECK-NEXT: li r3, 368
; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT: li r3, 320
-; CHECK-NEXT: ld r21, 456(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r20, 448(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r19, 440(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r18, 432(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r17, 424(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r16, 416(r1) # 8-byte Folded Reload
+; CHECK-NEXT: li r3, 352
; CHECK-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT: li r3, 304
-; CHECK-NEXT: ld r15, 408(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r14, 400(r1) # 8-byte Folded Reload
+; CHECK-NEXT: li r3, 336
; CHECK-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT: li r3, 288
+; CHECK-NEXT: li r3, 320
; CHECK-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT: li r3, 272
+; CHECK-NEXT: li r3, 304
; CHECK-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT: li r3, 256
+; CHECK-NEXT: li r3, 288
; CHECK-NEXT: lxvd2x v23, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT: li r3, 240
+; CHECK-NEXT: li r3, 272
; CHECK-NEXT: lxvd2x v22, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT: li r3, 224
+; CHECK-NEXT: li r3, 256
; CHECK-NEXT: lxvd2x v21, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT: li r3, 208
+; CHECK-NEXT: li r3, 240
; CHECK-NEXT: lxvd2x v20, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT: addi r1, r1, 688
+; CHECK-NEXT: addi r1, r1, 576
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
@@ -2858,516 +1996,410 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) nounwind {
; FAST-LABEL: lrint_v32i64_v32f16:
; FAST: # %bb.0:
; FAST-NEXT: mflr r0
-; FAST-NEXT: stdu r1, -480(r1)
-; FAST-NEXT: li r4, 128
-; FAST-NEXT: std r0, 496(r1)
-; FAST-NEXT: std r30, 320(r1) # 8-byte Folded Spill
+; FAST-NEXT: stdu r1, -560(r1)
+; FAST-NEXT: std r0, 576(r1)
+; FAST-NEXT: std r30, 544(r1) # 8-byte Folded Spill
; FAST-NEXT: mr r30, r3
-; FAST-NEXT: stfd f14, 336(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f15, 344(r1) # 8-byte Folded Spill
-; FAST-NEXT: fmr f14, f5
-; FAST-NEXT: stfd f16, 352(r1) # 8-byte Folded Spill
-; FAST-NEXT: stxvd2x v20, r1, r4 # 16-byte Folded Spill
-; FAST-NEXT: li r4, 144
-; FAST-NEXT: fmr f16, f4
-; FAST-NEXT: stfd f17, 360(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f18, 368(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f19, 376(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f20, 384(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f21, 392(r1) # 8-byte Folded Spill
-; FAST-NEXT: stxvd2x v21, r1, r4 # 16-byte Folded Spill
-; FAST-NEXT: li r4, 160
-; FAST-NEXT: stfd f22, 400(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f23, 408(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f24, 416(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f25, 424(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f26, 432(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f27, 440(r1) # 8-byte Folded Spill
-; FAST-NEXT: stxvd2x v22, r1, r4 # 16-byte Folded Spill
-; FAST-NEXT: li r4, 176
-; FAST-NEXT: xxlor v22, f3, f3
-; FAST-NEXT: stfd f28, 448(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f29, 456(r1) # 8-byte Folded Spill
-; FAST-NEXT: fmr f29, f9
-; FAST-NEXT: stfd f30, 464(r1) # 8-byte Folded Spill
-; FAST-NEXT: stfd f31, 472(r1) # 8-byte Folded Spill
-; FAST-NEXT: stxvd2x v23, r1, r4 # 16-byte Folded Spill
-; FAST-NEXT: li r4, 192
-; FAST-NEXT: xxlor v23, f2, f2
-; FAST-NEXT: stxvd2x v24, r1, r4 # 16-byte Folded Spill
-; FAST-NEXT: li r4, 208
-; FAST-NEXT: stxvd2x v25, r1, r4 # 16-byte Folded Spill
-; FAST-NEXT: li r4, 224
-; FAST-NEXT: xxlor v25, f13, f13
-; FAST-NEXT: stxvd2x v26, r1, r4 # 16-byte Folded Spill
-; FAST-NEXT: li r4, 240
-; FAST-NEXT: xxlor v26, f12, f12
-; FAST-NEXT: stxvd2x v27, r1, r4 # 16-byte Folded Spill
-; FAST-NEXT: li r4, 256
-; FAST-NEXT: xxlor v27, f11, f11
-; FAST-NEXT: stxvd2x v28, r1, r4 # 16-byte Folded Spill
-; FAST-NEXT: li r4, 272
-; FAST-NEXT: xxlor v28, f10, f10
-; FAST-NEXT: stxvd2x v29, r1, r4 # 16-byte Folded Spill
-; FAST-NEXT: li r4, 288
-; FAST-NEXT: xxlor v29, f8, f8
-; FAST-NEXT: stxvd2x v30, r1, r4 # 16-byte Folded Spill
-; FAST-NEXT: li r4, 304
-; FAST-NEXT: xxlor v30, f7, f7
-; FAST-NEXT: stxvd2x v31, r1, r4 # 16-byte Folded Spill
-; FAST-NEXT: li r4, 44
-; FAST-NEXT: xxlor v31, f6, f6
-; FAST-NEXT: stxsspx f1, r1, r4 # 4-byte Folded Spill
-; FAST-NEXT: lfs f1, 768(r1)
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: lhz r3, 848(r1)
+; FAST-NEXT: li r11, 224
+; FAST-NEXT: std r14, 416(r1) # 8-byte Folded Spill
+; FAST-NEXT: std r15, 424(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r15, 736(r1)
+; FAST-NEXT: stxvd2x v20, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT: li r11, 240
+; FAST-NEXT: std r19, 456(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r14, 728(r1)
+; FAST-NEXT: std r3, 184(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r3, 840(r1)
+; FAST-NEXT: lhz r19, 656(r1)
+; FAST-NEXT: stxvd2x v21, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT: li r11, 256
+; FAST-NEXT: std r21, 472(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r21, 664(r1)
+; FAST-NEXT: stxvd2x v22, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT: std r3, 176(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r3, 832(r1)
+; FAST-NEXT: li r11, 272
+; FAST-NEXT: std r23, 488(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r23, 672(r1)
+; FAST-NEXT: stxvd2x v23, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT: li r11, 288
+; FAST-NEXT: std r24, 496(r1) # 8-byte Folded Spill
+; FAST-NEXT: std r26, 512(r1) # 8-byte Folded Spill
+; FAST-NEXT: std r27, 520(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r27, 696(r1)
+; FAST-NEXT: lhz r26, 688(r1)
+; FAST-NEXT: lhz r24, 680(r1)
+; FAST-NEXT: stxvd2x v24, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT: std r3, 152(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r3, 824(r1)
+; FAST-NEXT: li r11, 304
+; FAST-NEXT: std r28, 528(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r28, 704(r1)
+; FAST-NEXT: stxvd2x v25, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT: std r3, 144(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r3, 816(r1)
+; FAST-NEXT: li r11, 320
+; FAST-NEXT: std r29, 536(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r29, 712(r1)
+; FAST-NEXT: stxvd2x v26, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT: li r11, 336
+; FAST-NEXT: std r31, 552(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r31, 720(r1)
+; FAST-NEXT: std r16, 432(r1) # 8-byte Folded Spill
+; FAST-NEXT: std r17, 440(r1) # 8-byte Folded Spill
+; FAST-NEXT: mr r17, r6
+; FAST-NEXT: mr r16, r5
+; FAST-NEXT: stxvd2x v27, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT: std r3, 136(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r3, 808(r1)
+; FAST-NEXT: li r11, 352
+; FAST-NEXT: std r18, 448(r1) # 8-byte Folded Spill
+; FAST-NEXT: std r20, 464(r1) # 8-byte Folded Spill
+; FAST-NEXT: mr r20, r8
+; FAST-NEXT: mr r18, r7
+; FAST-NEXT: stxvd2x v28, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT: std r3, 104(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r3, 800(r1)
+; FAST-NEXT: li r11, 368
+; FAST-NEXT: std r22, 480(r1) # 8-byte Folded Spill
+; FAST-NEXT: std r25, 504(r1) # 8-byte Folded Spill
+; FAST-NEXT: mr r25, r10
+; FAST-NEXT: mr r22, r9
+; FAST-NEXT: stxvd2x v29, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT: li r11, 384
+; FAST-NEXT: std r3, 96(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r3, 792(r1)
+; FAST-NEXT: stxvd2x v30, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT: li r11, 400
+; FAST-NEXT: std r3, 88(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r3, 784(r1)
+; FAST-NEXT: stxvd2x v31, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT: std r3, 80(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r3, 776(r1)
+; FAST-NEXT: std r3, 72(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r3, 768(r1)
+; FAST-NEXT: std r3, 64(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r3, 760(r1)
+; FAST-NEXT: std r3, 56(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r3, 752(r1)
+; FAST-NEXT: std r3, 48(r1) # 8-byte Folded Spill
+; FAST-NEXT: lhz r3, 744(r1)
+; FAST-NEXT: std r3, 40(r1) # 8-byte Folded Spill
+; FAST-NEXT: clrldi r3, r4, 48
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: li r3, 120
-; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill
-; FAST-NEXT: lfs f1, 760(r1)
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v31, r3
+; FAST-NEXT: clrldi r3, r16, 48
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: li r3, 112
-; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill
-; FAST-NEXT: lfs f1, 752(r1)
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: li r3, 208
+; FAST-NEXT: xxmrghd vs0, vs0, v31
+; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: clrldi r3, r17, 48
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: li r3, 104
-; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill
-; FAST-NEXT: lfs f1, 744(r1)
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v31, r3
+; FAST-NEXT: clrldi r3, r18, 48
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: li r3, 96
-; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill
-; FAST-NEXT: lfs f1, 736(r1)
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: li r3, 192
+; FAST-NEXT: xxmrghd vs0, vs0, v31
+; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: clrldi r3, r20, 48
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: li r3, 88
-; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill
-; FAST-NEXT: lfs f1, 728(r1)
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v31, r3
+; FAST-NEXT: clrldi r3, r22, 48
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: li r3, 80
-; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill
-; FAST-NEXT: lfs f1, 720(r1)
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: li r3, 160
+; FAST-NEXT: xxmrghd vs0, vs0, v31
+; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: mr r3, r19
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: li r3, 72
-; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill
-; FAST-NEXT: lfs f1, 712(r1)
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v31, r3
+; FAST-NEXT: clrldi r3, r25, 48
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: li r3, 64
-; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill
-; FAST-NEXT: lfs f1, 704(r1)
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: li r3, 112
+; FAST-NEXT: xxmrghd vs0, v31, vs0
+; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT: mr r3, r21
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: li r3, 56
-; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill
-; FAST-NEXT: lfs f1, 696(r1)
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v31, r3
+; FAST-NEXT: mr r3, r23
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: li r3, 48
-; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill
-; FAST-NEXT: lfs f1, 688(r1)
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: mr r3, r24
+; FAST-NEXT: xxmrghd v27, vs0, v31
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: xxlor v21, f1, f1
-; FAST-NEXT: lfs f1, 680(r1)
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v31, r3
+; FAST-NEXT: mr r3, r26
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: xxlor v20, f1, f1
-; FAST-NEXT: lfs f1, 672(r1)
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: mr r3, r27
+; FAST-NEXT: xxmrghd v26, vs0, v31
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: xxlor v24, f1, f1
-; FAST-NEXT: lfs f1, 664(r1)
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v31, r3
+; FAST-NEXT: mr r3, r28
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f31, f1
-; FAST-NEXT: lfs f1, 656(r1)
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: mr r3, r29
+; FAST-NEXT: xxmrghd v25, vs0, v31
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f30, f1
-; FAST-NEXT: lfs f1, 648(r1)
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v31, r3
+; FAST-NEXT: mr r3, r31
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f28, f1
-; FAST-NEXT: lfs f1, 640(r1)
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: mr r3, r14
+; FAST-NEXT: xxmrghd v24, vs0, v31
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f27, f1
-; FAST-NEXT: lfs f1, 632(r1)
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v31, r3
+; FAST-NEXT: mr r3, r15
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f26, f1
-; FAST-NEXT: lfs f1, 624(r1)
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: ld r3, 40(r1) # 8-byte Folded Reload
+; FAST-NEXT: xxmrghd v23, vs0, v31
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f25, f1
-; FAST-NEXT: xxlor f1, v25, v25
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v31, r3
+; FAST-NEXT: ld r3, 48(r1) # 8-byte Folded Reload
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f24, f1
-; FAST-NEXT: xxlor f1, v26, v26
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: ld r3, 56(r1) # 8-byte Folded Reload
+; FAST-NEXT: xxmrghd v22, vs0, v31
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f23, f1
-; FAST-NEXT: xxlor f1, v27, v27
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v31, r3
+; FAST-NEXT: ld r3, 64(r1) # 8-byte Folded Reload
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f22, f1
-; FAST-NEXT: xxlor f1, v28, v28
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: ld r3, 72(r1) # 8-byte Folded Reload
+; FAST-NEXT: xxmrghd v21, vs0, v31
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f21, f1
-; FAST-NEXT: fmr f1, f29
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v31, r3
+; FAST-NEXT: ld r3, 80(r1) # 8-byte Folded Reload
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f20, f1
-; FAST-NEXT: xxlor f1, v29, v29
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: ld r3, 88(r1) # 8-byte Folded Reload
+; FAST-NEXT: xxmrghd v20, vs0, v31
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f19, f1
-; FAST-NEXT: xxlor f1, v30, v30
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v31, r3
+; FAST-NEXT: ld r3, 96(r1) # 8-byte Folded Reload
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f18, f1
-; FAST-NEXT: xxlor f1, v31, v31
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: ld r3, 104(r1) # 8-byte Folded Reload
+; FAST-NEXT: xxmrghd v31, vs0, v31
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f29, f1
-; FAST-NEXT: fmr f1, f14
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v30, r3
+; FAST-NEXT: ld r3, 136(r1) # 8-byte Folded Reload
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f14, f1
-; FAST-NEXT: fmr f1, f16
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: ld r3, 144(r1) # 8-byte Folded Reload
+; FAST-NEXT: xxmrghd v30, vs0, v30
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f16, f1
-; FAST-NEXT: xxlor f1, v22, v22
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v29, r3
+; FAST-NEXT: ld r3, 152(r1) # 8-byte Folded Reload
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fmr f17, f1
-; FAST-NEXT: xxlor f1, v23, v23
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
+; FAST-NEXT: ld r3, 176(r1) # 8-byte Folded Reload
+; FAST-NEXT: xxmrghd v29, vs0, v29
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: li r3, 44
-; FAST-NEXT: fmr f15, f1
-; FAST-NEXT: lxsspx f1, r1, r3 # 4-byte Folded Reload
-; FAST-NEXT: bl __truncsfhf2
-; FAST-NEXT: nop
-; FAST-NEXT: clrldi r3, r3, 48
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtvsrd v28, r3
+; FAST-NEXT: ld r3, 184(r1) # 8-byte Folded Reload
; FAST-NEXT: bl __extendhfsf2
; FAST-NEXT: nop
-; FAST-NEXT: fctid f3, f15
-; FAST-NEXT: fctid f4, f17
-; FAST-NEXT: mffprd r3, f3
-; FAST-NEXT: fctid f5, f16
-; FAST-NEXT: fctid f6, f14
-; FAST-NEXT: fctid f7, f18
-; FAST-NEXT: fctid f8, f19
-; FAST-NEXT: fctid f13, f1
-; FAST-NEXT: fctid f9, f20
-; FAST-NEXT: fctid f10, f22
-; FAST-NEXT: fctid f11, f24
-; FAST-NEXT: fctid f12, f25
-; FAST-NEXT: fctid f2, f23
-; FAST-NEXT: fctid f0, f21
-; FAST-NEXT: mtvsrd v2, r3
-; FAST-NEXT: mffprd r3, f4
-; FAST-NEXT: mtvsrd v3, r3
-; FAST-NEXT: mffprd r3, f5
-; FAST-NEXT: mtfprd f5, r3
-; FAST-NEXT: mffprd r3, f6
-; FAST-NEXT: mtfprd f1, r3
-; FAST-NEXT: mffprd r3, f7
-; FAST-NEXT: mtfprd f6, r3
-; FAST-NEXT: mffprd r3, f8
-; FAST-NEXT: mtfprd f7, r3
-; FAST-NEXT: mffprd r3, f9
-; FAST-NEXT: mtfprd f3, r3
-; FAST-NEXT: mffprd r3, f10
-; FAST-NEXT: mtfprd f4, r3
-; FAST-NEXT: mffprd r3, f11
-; FAST-NEXT: fctid f11, f31
-; FAST-NEXT: lfd f31, 56(r1) # 8-byte Folded Reload
-; FAST-NEXT: mtfprd f8, r3
-; FAST-NEXT: mffprd r3, f12
-; FAST-NEXT: xxlor f12, v24, v24
-; FAST-NEXT: fctid f31, f31
-; FAST-NEXT: fctid f12, f12
-; FAST-NEXT: mtfprd f9, r3
-; FAST-NEXT: mffprd r3, f13
-; FAST-NEXT: lfd f13, 48(r1) # 8-byte Folded Reload
-; FAST-NEXT: mtfprd f10, r3
-; FAST-NEXT: fctid f13, f13
-; FAST-NEXT: xxmrghd v3, vs5, v3
-; FAST-NEXT: fctid f5, f26
-; FAST-NEXT: mffprd r3, f5
-; FAST-NEXT: mtfprd f5, r3
-; FAST-NEXT: xxmrghd v4, vs7, vs6
-; FAST-NEXT: fctid f6, f27
-; FAST-NEXT: fctid f7, f28
-; FAST-NEXT: mffprd r3, f6
-; FAST-NEXT: lfd f28, 96(r1) # 8-byte Folded Reload
-; FAST-NEXT: fctid f28, f28
-; FAST-NEXT: mtfprd f6, r3
-; FAST-NEXT: mffprd r3, f7
-; FAST-NEXT: mtfprd f7, r3
-; FAST-NEXT: xxmrghd v2, v2, vs10
-; FAST-NEXT: fctid f10, f30
-; FAST-NEXT: mffprd r3, f10
-; FAST-NEXT: lfd f30, 80(r1) # 8-byte Folded Reload
-; FAST-NEXT: fctid f30, f30
-; FAST-NEXT: mtfprd f10, r3
-; FAST-NEXT: mffprd r3, f11
-; FAST-NEXT: mtfprd f11, r3
-; FAST-NEXT: mffprd r3, f12
-; FAST-NEXT: mtfprd f12, r3
-; FAST-NEXT: xxmrghd v5, vs12, vs11
-; FAST-NEXT: xxlor f11, v20, v20
-; FAST-NEXT: xxlor f12, v21, v21
-; FAST-NEXT: fctid f11, f11
-; FAST-NEXT: fctid f12, f12
-; FAST-NEXT: mffprd r3, f11
-; FAST-NEXT: mtfprd f11, r3
-; FAST-NEXT: mffprd r3, f12
-; FAST-NEXT: mtfprd f12, r3
-; FAST-NEXT: mffprd r3, f13
-; FAST-NEXT: mtfprd f13, r3
-; FAST-NEXT: mffprd r3, f31
-; FAST-NEXT: lfd f31, 64(r1) # 8-byte Folded Reload
-; FAST-NEXT: fctid f31, f31
-; FAST-NEXT: mtvsrd v0, r3
-; FAST-NEXT: mffprd r3, f31
-; FAST-NEXT: lfd f31, 72(r1) # 8-byte Folded Reload
-; FAST-NEXT: mtvsrd v1, r3
-; FAST-NEXT: mffprd r3, f30
-; FAST-NEXT: lfd f30, 88(r1) # 8-byte Folded Reload
-; FAST-NEXT: fctid f31, f31
-; FAST-NEXT: mtvsrd v6, r3
-; FAST-NEXT: mffprd r3, f28
-; FAST-NEXT: lfd f28, 104(r1) # 8-byte Folded Reload
-; FAST-NEXT: fctid f30, f30
-; FAST-NEXT: fctid f28, f28
-; FAST-NEXT: mtvsrd v7, r3
-; FAST-NEXT: mffprd r3, f28
-; FAST-NEXT: lfd f28, 112(r1) # 8-byte Folded Reload
-; FAST-NEXT: fctid f28, f28
-; FAST-NEXT: mtvsrd v8, r3
-; FAST-NEXT: mffprd r3, f28
-; FAST-NEXT: lfd f28, 120(r1) # 8-byte Folded Reload
-; FAST-NEXT: fctid f28, f28
-; FAST-NEXT: xxmrghd v10, vs12, vs11
-; FAST-NEXT: xxmrghd v0, v0, vs13
-; FAST-NEXT: xxswapd vs12, v0
-; FAST-NEXT: xxmrghd v0, vs9, vs8
-; FAST-NEXT: xxmrghd v7, v8, v7
-; FAST-NEXT: mtvsrd v8, r3
-; FAST-NEXT: mffprd r3, f28
-; FAST-NEXT: mtvsrd v9, r3
-; FAST-NEXT: mffprd r3, f30
-; FAST-NEXT: xxswapd v7, v7
-; FAST-NEXT: xxmrghd v8, v9, v8
-; FAST-NEXT: mtvsrd v9, r3
-; FAST-NEXT: mffprd r3, f31
-; FAST-NEXT: xxswapd v8, v8
-; FAST-NEXT: xxmrghd v6, v9, v6
-; FAST-NEXT: mtvsrd v9, r3
+; FAST-NEXT: fctid f0, f1
+; FAST-NEXT: xxswapd vs1, v29
+; FAST-NEXT: li r4, 112
+; FAST-NEXT: xxswapd vs2, v30
+; FAST-NEXT: xxswapd vs3, v25
+; FAST-NEXT: mffprd r3, f0
+; FAST-NEXT: mtfprd f0, r3
; FAST-NEXT: li r3, 240
-; FAST-NEXT: stxvd2x v8, r30, r3
+; FAST-NEXT: xxmrghd v2, vs0, v28
+; FAST-NEXT: xxswapd vs0, v2
+; FAST-NEXT: stxvd2x vs0, r30, r3
; FAST-NEXT: li r3, 224
-; FAST-NEXT: stxvd2x v7, r30, r3
+; FAST-NEXT: stxvd2x vs1, r30, r3
; FAST-NEXT: li r3, 208
-; FAST-NEXT: xxswapd vs11, v6
-; FAST-NEXT: xxmrghd v6, vs10, vs7
-; FAST-NEXT: stxvd2x vs11, r30, r3
+; FAST-NEXT: stxvd2x vs2, r30, r3
; FAST-NEXT: li r3, 192
-; FAST-NEXT: xxmrghd v1, v9, v1
-; FAST-NEXT: xxswapd vs11, v1
-; FAST-NEXT: xxmrghd v1, vs6, vs5
-; FAST-NEXT: xxswapd vs5, v10
-; FAST-NEXT: xxswapd vs6, v5
-; FAST-NEXT: stxvd2x vs11, r30, r3
+; FAST-NEXT: xxswapd vs0, v31
+; FAST-NEXT: stxvd2x vs0, r30, r3
; FAST-NEXT: li r3, 176
-; FAST-NEXT: stxvd2x vs12, r30, r3
+; FAST-NEXT: xxswapd vs1, v20
+; FAST-NEXT: stxvd2x vs1, r30, r3
; FAST-NEXT: li r3, 160
-; FAST-NEXT: stxvd2x vs5, r30, r3
+; FAST-NEXT: xxswapd vs2, v23
+; FAST-NEXT: xxswapd vs0, v21
+; FAST-NEXT: stxvd2x vs0, r30, r3
; FAST-NEXT: li r3, 144
-; FAST-NEXT: stxvd2x vs6, r30, r3
-; FAST-NEXT: mffprd r3, f2
-; FAST-NEXT: mtfprd f7, r3
+; FAST-NEXT: xxswapd vs1, v22
+; FAST-NEXT: stxvd2x vs1, r30, r3
; FAST-NEXT: li r3, 128
-; FAST-NEXT: xxswapd vs5, v6
-; FAST-NEXT: stxvd2x vs5, r30, r3
-; FAST-NEXT: li r3, 112
-; FAST-NEXT: xxswapd vs2, v1
-; FAST-NEXT: xxswapd vs6, v0
; FAST-NEXT: stxvd2x vs2, r30, r3
+; FAST-NEXT: li r3, 112
+; FAST-NEXT: xxswapd vs0, v24
+; FAST-NEXT: stxvd2x vs0, r30, r3
; FAST-NEXT: li r3, 96
-; FAST-NEXT: fctid f2, f29
-; FAST-NEXT: stxvd2x vs6, r30, r3
-; FAST-NEXT: mffprd r3, f0
-; FAST-NEXT: mtfprd f0, r3
-; FAST-NEXT: mffprd r3, f2
-; FAST-NEXT: mtfprd f2, r3
+; FAST-NEXT: stxvd2x vs3, r30, r3
; FAST-NEXT: li r3, 80
-; FAST-NEXT: xxmrghd v5, vs7, vs4
-; FAST-NEXT: xxswapd vs4, v2
-; FAST-NEXT: xxmrghd v0, vs0, vs3
-; FAST-NEXT: xxswapd vs0, v5
-; FAST-NEXT: xxswapd vs3, v3
-; FAST-NEXT: stxvd2x vs0, r30, r3
+; FAST-NEXT: lxvd2x vs2, r1, r4 # 16-byte Folded Reload
+; FAST-NEXT: li r4, 160
+; FAST-NEXT: xxswapd vs1, v26
+; FAST-NEXT: stxvd2x vs1, r30, r3
; FAST-NEXT: li r3, 64
-; FAST-NEXT: xxswapd vs0, v0
+; FAST-NEXT: lxvd2x vs1, r1, r4 # 16-byte Folded Reload
+; FAST-NEXT: li r4, 192
+; FAST-NEXT: lxvd2x vs3, r1, r4 # 16-byte Folded Reload
+; FAST-NEXT: li r4, 208
+; FAST-NEXT: lxvd2x vs4, r1, r4 # 16-byte Folded Reload
+; FAST-NEXT: xxswapd vs0, v27
; FAST-NEXT: stxvd2x vs0, r30, r3
; FAST-NEXT: li r3, 48
-; FAST-NEXT: xxmrghd v5, vs2, vs1
-; FAST-NEXT: xxswapd vs1, v4
-; FAST-NEXT: stxvd2x vs1, r30, r3
-; FAST-NEXT: li r3, 32
-; FAST-NEXT: xxswapd vs2, v5
+; FAST-NEXT: xxswapd vs2, vs2
; FAST-NEXT: stxvd2x vs2, r30, r3
+; FAST-NEXT: li r3, 32
+; FAST-NEXT: xxswapd vs1, vs1
+; FAST-NEXT: stxvd2x vs1, r30, r3
; FAST-NEXT: li r3, 16
+; FAST-NEXT: xxswapd vs3, vs3
; FAST-NEXT: stxvd2x vs3, r30, r3
-; FAST-NEXT: li r3, 304
+; FAST-NEXT: li r3, 400
+; FAST-NEXT: xxswapd vs4, vs4
; FAST-NEXT: stxvd2x vs4, 0, r30
-; FAST-NEXT: lfd f31, 472(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f30, 464(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f29, 456(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f28, 448(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f27, 440(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f26, 432(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f25, 424(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f24, 416(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f23, 408(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f22, 400(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f21, 392(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f20, 384(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f19, 376(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f18, 368(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f17, 360(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f16, 352(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f15, 344(r1) # 8-byte Folded Reload
-; FAST-NEXT: lfd f14, 336(r1) # 8-byte Folded Reload
; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
-; FAST-NEXT: li r3, 288
-; FAST-NEXT: ld r30, 320(r1) # 8-byte Folded Reload
+; FAST-NEXT: li r3, 384
+; FAST-NEXT: ld r31, 552(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r30, 544(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r29, 536(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r28, 528(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r27, 520(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r26, 512(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r25, 504(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r24, 496(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r23, 488(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r22, 480(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r21, 472(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r20, 464(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r19, 456(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r18, 448(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r17, 440(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r16, 432(r1) # 8-byte Folded Reload
; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
-; FAST-NEXT: li r3, 272
+; FAST-NEXT: li r3, 368
+; FAST-NEXT: ld r15, 424(r1) # 8-byte Folded Reload
+; FAST-NEXT: ld r14, 416(r1) # 8-byte Folded Reload
; FAST-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload
-; FAST-NEXT: li r3, 256
+; FAST-NEXT: li r3, 352
; FAST-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload
-; FAST-NEXT: li r3, 240
+; FAST-NEXT: li r3, 336
; FAST-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload
-; FAST-NEXT: li r3, 224
+; FAST-NEXT: li r3, 320
; FAST-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload
-; FAST-NEXT: li r3, 208
+; FAST-NEXT: li r3, 304
; FAST-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload
-; FAST-NEXT: li r3, 192
+; FAST-NEXT: li r3, 288
; FAST-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload
-; FAST-NEXT: li r3, 176
+; FAST-NEXT: li r3, 272
; FAST-NEXT: lxvd2x v23, r1, r3 # 16-byte Folded Reload
-; FAST-NEXT: li r3, 160
+; FAST-NEXT: li r3, 256
; FAST-NEXT: lxvd2x v22, r1, r3 # 16-byte Folded Reload
-; FAST-NEXT: li r3, 144
+; FAST-NEXT: li r3, 240
; FAST-NEXT: lxvd2x v21, r1, r3 # 16-byte Folded Reload
-; FAST-NEXT: li r3, 128
+; FAST-NEXT: li r3, 224
; FAST-NEXT: lxvd2x v20, r1, r3 # 16-byte Folded Reload
-; FAST-NEXT: addi r1, r1, 480
+; FAST-NEXT: addi r1, r1, 560
; FAST-NEXT: ld r0, 16(r1)
; FAST-NEXT: mtlr r0
; FAST-NEXT: blr
More information about the llvm-commits
mailing list